├── .gitignore
├── .project
├── .pydevproject
├── Anaconda_CheatSheet.pdf
├── Course 1 - Programming for everybody (Getting started)
    ├── ex01
    │   ├── first.py
    │   └── test.py
    ├── ex02
    │   ├── ex02_02.py
    │   └── ex02_03.py
    ├── ex03
    │   ├── ex03_01.py
    │   └── ex03_03.py
    ├── ex04
    │   └── ex04_06.py
    └── ex05
    │   ├── ex05_01.py
    │   └── ex05_02.py
├── Course 2 - Python data structures
    ├── ex06
    │   └── ex06_05.py
    ├── ex07
    │   ├── ex07_01.py
    │   ├── ex07_02.py
    │   ├── mbox-short.txt
    │   └── words.txt
    ├── ex08
    │   ├── ex08_04.py
    │   ├── ex08_05.py
    │   ├── mbox-short.txt
    │   └── romeo.txt
    ├── ex09
    │   ├── ex09_04.py
    │   └── mbox-short.txt
    └── ex10
    │   ├── ex10_02.py
    │   └── mbox-short.txt
├── Course 3 - Using Python to access web data
    ├── ex11
    │   ├── ex11.py
    │   ├── regex_sum_27824.txt
    │   └── regex_sum_42.txt
    ├── ex12
    │   ├── following_links.py
    │   ├── scraping.py
    │   └── socket1.py
    └── ex13
    │   ├── extracting_data_from_JSON.py
    │   ├── extracting_data_from_xml.py
    │   └── using_GeoJSON_API.py
├── Course 4 - Using databases with Python
    ├── ex15
    │   ├── Library.xml
    │   ├── emaildb.sqlite
    │   ├── ex15 - hexa.db
    │   ├── ex15_counting_e-mail_in_a_database.py
    │   ├── ex15_multi-table_database_tracks.py
    │   ├── ex15_roster.py
    │   ├── mbox.txt
    │   ├── roster_data.json
    │   ├── rosterdb.sqlite
    │   └── trackdb.sqlite
    └── ex16
    │   ├── README.txt
    │   ├── geodata.sqlite
    │   ├── geodump running.jpg
    │   ├── geodump.py
    │   ├── geoload running.jpg
    │   ├── geoload.py
    │   ├── where.data
    │   ├── where.html
    │   ├── where.js
    │   └── zoomed map with added location.jpg
├── Course 5 - Capstone - Retrieving, processing and visualising data with Python
    ├── ex17
    │   ├── LICENSE
    │   ├── README.txt
    │   ├── __pycache__
    │   │   └── spider.cpython-36.pyc
    │   ├── d3.v2.js
    │   ├── dr-chuck-site-dump.jpg
    │   ├── dr-chuck-site-top25.jpg
    │   ├── force.css
    │   ├── force.html
    │   ├── force.js
    │   ├── spdump.py
    │   ├── spider-coincube.sqlite
    │   ├── spider-dr-chuck.sqlite
    │   ├── spider.js
    │   ├── spider.py
    │   ├── spider.sqlite
    │   ├── spjson.py
    │   ├── sprank.py
    │   ├── spreset.py
    │   ├── variance-site-dump.jpg
    │   └── variance-top25.jpg
    └── ex18
    │   └── gmane
    │       ├── README.txt
    │       ├── content.sqlite
    │       ├── content.sqlite-journal.temp
    │       ├── content.sqlite.first.jpg
    │       ├── d3.layout.cloud.js
    │       ├── d3.v2.js
    │       ├── gbasic.py
    │       ├── gbasic.py.running.jpg
    │       ├── gbasic.py.running2.jpg
    │       ├── gline.htm
    │       ├── gline.jpg
    │       ├── gline.js
    │       ├── gline.py
    │       ├── gmane.py
    │       ├── gmodel.py
    │       ├── gmodel.py.running.jpg
    │       ├── gword.htm
    │       ├── gword.jpg
    │       ├── gword.js
    │       ├── gword.py
    │       ├── gyear.py
    │       ├── index.sqlite
    │       ├── index.sqlite.second.jpg
    │       └── mapping.sqlite
├── Python for Everybody.pdf
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | othw_cmd.JPG
2 | othw_editor.JPG
3 | pythonlearn.epub
4 | */*.jpg
5 | */*/*.jpg


--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>py4e</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.python.pydev.PyDevBuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 	</buildSpec>
14 | 	<natures>
15 | 		<nature>org.python.pydev.pythonNature</nature>
16 | 	</natures>
17 | </projectDescription>
18 | 


--------------------------------------------------------------------------------
/.pydevproject:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2 | <?eclipse-pydev version="1.0"?><pydev_project>
3 | <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
4 | <path>/${PROJECT_DIR_NAME}</path>
5 | </pydev_pathproperty>
6 | <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 3.6</pydev_property>
7 | <pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
8 | </pydev_project>
9 | 


--------------------------------------------------------------------------------
/Anaconda_CheatSheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Anaconda_CheatSheet.pdf


--------------------------------------------------------------------------------
/Course 1 - Programming for everybody (Getting started)/ex01/first.py:
--------------------------------------------------------------------------------
1 | print('hello world')


--------------------------------------------------------------------------------
/Course 1 - Programming for everybody (Getting started)/ex01/test.py:
--------------------------------------------------------------------------------
1 | print('test')
2 | 


--------------------------------------------------------------------------------
/Course 1 - Programming for everybody (Getting started)/ex02/ex02_02.py:
--------------------------------------------------------------------------------
1 | # Welcoming the user
2 | name = input("Enter your name")
3 | print("Hello", name)


--------------------------------------------------------------------------------
/Course 1 - Programming for everybody (Getting started)/ex02/ex02_03.py:
--------------------------------------------------------------------------------
1 | # Ask user to input hours worked and rate
2 | hrs = input("Enter Hours:")
3 | rate = input("Enter Rate:")
4 | 
5 | # Compute pay and print
6 | pay = float(hrs) * float(rate)
7 | print("Pay:", pay)


--------------------------------------------------------------------------------
/Course 1 - Programming for everybody (Getting started)/ex03/ex03_01.py:
--------------------------------------------------------------------------------
 1 | # Asking input from user on hours and rate
 2 | hrs = input("Enter Hours:")
 3 | rate = input("Rate per Hour:")
 4 | 
 5 | # Converting the input into float numbers
 6 | h = float(hrs)
 7 | r = float(rate)
 8 | 
 9 | # Print gross pay using a multiplier for rates above 40 hours
10 | if h <= 40.0 :
11 |     print(h * r)
12 | else :
13 |     multiplier = 1.5
14 |     print((40 * r) + ((h - 40) * r * multiplier))


--------------------------------------------------------------------------------
/Course 1 - Programming for everybody (Getting started)/ex03/ex03_03.py:
--------------------------------------------------------------------------------
 1 | score = input("Enter Score: ")
 2 | try :
 3 |     sc = float(score)
 4 | except :
 5 |     print("Error, please enter a number.")
 6 |     quit()
 7 |     
 8 | if sc >= 1.0 :
 9 |     print("Please enter a number smaller than or equal to 1.0.")
10 | elif sc >= 0.9 :
11 |     print("A")
12 | elif sc >= 0.8 :
13 |     print("B")
14 | elif sc >= 0.7 :
15 |     print("C")
16 | elif sc >= 0.6 :
17 |     print("D")
18 | elif sc >= 0.0 :
19 |     print("F")
20 | else :
21 |     print("Please enter a number greater than or equal to 0.0.")


--------------------------------------------------------------------------------
/Course 1 - Programming for everybody (Getting started)/ex04/ex04_06.py:
--------------------------------------------------------------------------------
 1 | # Defining the "computepay" function to return the gross pay according to schedule
 2 | def computepay(h, r):
 3 |     if h <= 40.0 :
 4 |         return h * r
 5 |     else :
 6 |         m = 1.5
 7 |         return (40 * r) + ((h - 40) * r * m)
 8 | 
 9 | 
10 | # Asking input from user on hours and rate
11 | hrs = input("Enter Hours: ")
12 | rate = input("Rate per Hour: ")
13 | 
14 | # Converting the input into float numbers
15 | h = float(hrs)
16 | r = float(rate)
17 | 
18 | # Print gross pay using a multiplier for rates above 40 hours
19 | print(computepay(h, r))


--------------------------------------------------------------------------------
/Course 1 - Programming for everybody (Getting started)/ex05/ex05_01.py:
--------------------------------------------------------------------------------
 1 | # Initialising variables
 2 | count = 0
 3 | total = 0
 4 | 
 5 | # Starting loop
 6 | while True:
 7 |     # Ask for the user input
 8 |     sval = input('Enter a number: ')
 9 |     
10 |     # If user types 'done' then exit
11 |     if sval == 'done':
12 |         break
13 | 
14 |     # Trying to convert user input to a value, but if it is not working, give an error message
15 |     try:
16 |         fval = float(sval)
17 |     except:
18 |         print('Invalid input')
19 |         continue
20 | 
21 |     # Adjusting counters
22 |     count = count + 1
23 |     total = total + fval
24 | 
25 | # Printing the total, the number of inputs and the average
26 | print(total, count, total/count)


--------------------------------------------------------------------------------
/Course 1 - Programming for everybody (Getting started)/ex05/ex05_02.py:
--------------------------------------------------------------------------------
 1 | # Initialising variables
 2 | smallest = None
 3 | largest = None
 4 | 
 5 | # Starting loop
 6 | while True:
 7 |     # Ask for the user input
 8 |     sval = input('Enter a number: ')
 9 |     
10 |     # If user types 'done' then exit
11 |     if sval == 'done': break
12 | 
13 |     # Trying to convert user input to a value, but if it is not working, give an error message
14 |     try:
15 |         fval = float(sval)
16 |     except:
17 |         print('Invalid input')
18 |         continue
19 | 
20 |     # Adjusting counters
21 |     if ((smallest is None) or (fval < smallest)):
22 |         smallest = int(fval)
23 |     if ((largest is None) or (fval > largest)):
24 |         largest = int(fval)
25 | 
26 | # Printing the total, the number of inputs and the average
27 | print('Maximum is', largest)
28 | print('Minimum is', smallest)


--------------------------------------------------------------------------------
/Course 2 - Python data structures/ex06/ex06_05.py:
--------------------------------------------------------------------------------
 1 | text = "X-DSPAM-Confidence:    0.8475";
 2 | 
 3 | # Finding the colon ':' sign in the text
 4 | colon = text.find(':')
 5 | # Taking the part of the text after the colon
 6 | snumforslice = text[colon+1:]
 7 | # Stripping the text taken
 8 | snum = snumforslice.strip()
 9 | # Printing the converted text to float
10 | print(float(snum))


--------------------------------------------------------------------------------
/Course 2 - Python data structures/ex07/ex07_01.py:
--------------------------------------------------------------------------------
1 | # Use words.txt as the file name
2 | fname = input("Enter file name: ")
3 | fh = open(fname)
4 | ftext = fh.read()
5 | ftext = ftext.strip()
6 | print(ftext.upper())


--------------------------------------------------------------------------------
/Course 2 - Python data structures/ex07/ex07_02.py:
--------------------------------------------------------------------------------
 1 | # Use the file name mbox-short.txt as the file name
 2 | fname = input("Enter file name: ")
 3 | fh = open(fname)
 4 | total = 0
 5 | count = 0
 6 | for line in fh:
 7 |     if not line.startswith("X-DSPAM-Confidence:") :
 8 |         continue
 9 |     else :
10 |         snumt = line[20:]
11 |         snum = snumt.strip()
12 |         num = float(snum)
13 |         total = total + num
14 |         count = count + 1
15 | avg = total / count
16 | print("Average spam confidence:",avg)
17 | 


--------------------------------------------------------------------------------
/Course 2 - Python data structures/ex07/words.txt:
--------------------------------------------------------------------------------
 1 | Writing programs or programming is a very creative
 2 | and rewarding activity  You can write programs for
 3 | many reasons ranging from making your living to solving
 4 | a difficult data analysis problem to having fun to helping
 5 | someone else solve a problem  This book assumes that
 6 | {\em everyone} needs to know how to program and that once
 7 | you know how to program, you will figure out what you want
 8 | to do with your newfound skills
 9 | 
10 | We are surrounded in our daily lives with computers ranging
11 | from laptops to cell phones  We can think of these computers
12 | as our personal assistants who can take care of many things
13 | on our behalf  The hardware in our current-day computers
14 | is essentially built to continuously ask us the question
15 | What would you like me to do next
16 | 
17 | Our computers are fast and have vasts amounts of memory and 
18 | could be very helpful to us if we only knew the language to 
19 | speak to explain to the computer what we would like it to 
20 | do next If we knew this language we could tell the 
21 | computer to do tasks on our behalf that were reptitive  
22 | Interestingly, the kinds of things computers can do best
23 | are often the kinds of things that we humans find boring
24 | and mind-numbing
25 | 


--------------------------------------------------------------------------------
/Course 2 - Python data structures/ex08/ex08_04.py:
--------------------------------------------------------------------------------
 1 | # Opening the file
 2 | fname = input("Enter file name: ")
 3 | fh = open(fname)
 4 | 
 5 | # Defining the list to be built
 6 | lst = list()
 7 | 
 8 | # Reading the file line-by-line
 9 | for line in fh:
10 |     # For each line, splitting the line into words
11 |     line.rstrip()
12 |     ls = line.split()
13 |     # For each word checking if already being on the list and appending to it if not
14 |     for word in ls:
15 |         if word not in lst: lst.append(word)
16 | 
17 | # Sorting the list and printing
18 | lst.sort()
19 | print(lst)


--------------------------------------------------------------------------------
/Course 2 - Python data structures/ex08/ex08_05.py:
--------------------------------------------------------------------------------
 1 | fname = input("Enter file name: ")
 2 | if len(fname) < 1 : fname = "mbox-short.txt"
 3 | 
 4 | fh = open(fname)
 5 | count = 0
 6 | 
 7 | # Reading file line-by-line
 8 | for line in fh:
 9 |     line.rstrip()
10 |     # Looking for lines starting with 'From'
11 |     if line.startswith('From ') :
12 |         # Splitting lines starting with 'From'
13 |         ls = line.split()
14 |         # Printing the second item (e-mail address) of the split
15 |         print(ls[1])
16 |         count = count + 1
17 | 
18 | # Printing the count of e-mail addresses
19 | print("There were", count, "lines in the file with From as the first word")


--------------------------------------------------------------------------------
/Course 2 - Python data structures/ex08/romeo.txt:
--------------------------------------------------------------------------------
1 | But soft what light through yonder window breaks
2 | It is the east and Juliet is the sun
3 | Arise fair sun and kill the envious moon
4 | Who is already sick and pale with grief
5 | 


--------------------------------------------------------------------------------
/Course 2 - Python data structures/ex09/ex09_04.py:
--------------------------------------------------------------------------------
 1 | # Asking user to enter the source file
 2 | fname = input("Enter file:")
 3 | if len(fname) < 1 : fname = "mbox-short.txt"
 4 | # Opening the source file
 5 | fhandle = open(fname)
 6 | 
 7 | # Creating a dictionary for the senders' e-mail addresses
 8 | sender = dict()
 9 | 
10 | # Reading file line-by-line
11 | for line in fhandle:
12 |     line.rstrip()
13 |     # Looking for lines starting with 'From'
14 |     if line.startswith('From ') :
15 |         # Splitting lines starting with 'From'
16 |         ls = line.split()
17 |         # Adding the second item (e-mail address) of the split if not yet in the dictionary and counting
18 |         sender[ls[1]] = sender.get(ls[1], 0) + 1
19 | 
20 | # Selecting the most frequently occuring e-mail address and its count   
21 | most_email = None
22 | most_count = None
23 | for email,count in sender.items():
24 |     if most_count is None or count > most_count :
25 |         most_email = email
26 |         most_count = count
27 | 
28 | # Printing the most frequent e-mail address and its number of occurence
29 | print(most_email, most_count)


--------------------------------------------------------------------------------
/Course 2 - Python data structures/ex10/ex10_02.py:
--------------------------------------------------------------------------------
 1 | # Asking user to enter the source file
 2 | fname = input("Enter file:")
 3 | if len(fname) < 1 : fname = "mbox-short.txt"
 4 | # Opening the source file
 5 | fhandle = open(fname)
 6 | 
 7 | # Creating a dictionary for the hours
 8 | hours = dict()
 9 | 
10 | # Reading file line-by-line
11 | for line in fhandle:
12 |     line.rstrip()
13 |     # Looking for lines starting with 'From'
14 |     if line.startswith('From ') :
15 |         # Splitting lines starting with 'From'
16 |         ls = line.split()
17 |         # Taking the split with the time
18 |         time = ls[5]
19 |         # Splitting the time
20 |         tm = time.split(':')
21 |         # Taking the split with the hour
22 |         hour = tm[0]
23 |         # Adding the hour in the dictionary and counting
24 |         hours[hour] = hours.get(hour, 0) + 1
25 | 
26 | # Printing the hours and their counts in ascending order by hours
27 | for k,v in sorted(hours.items()):
28 |     print(k,v)


--------------------------------------------------------------------------------
/Course 3 - Using Python to access web data/ex11/ex11.py:
--------------------------------------------------------------------------------
 1 | # Importing the regex library
 2 | import re
 3 | 
 4 | # Asking user to enter the source file
 5 | fname = input("Enter file name: ")
 6 | if len(fname) < 1 : fname = "regex_sum_42.txt"
 7 | # Opening the source file
 8 | fhandle = open(fname)
 9 | 
10 | # Initialising the sum of the numbers
11 | total = 0
12 | 
13 | # Reading file line-by-line
14 | for line in fhandle:
15 |     # Finding all the numbers as strings into listOfNums
16 |     listOfNums = re.findall('([0-9]+)', line.rstrip())
17 |     # If there is not any number in a line go to the next loop...
18 |     if len(listOfNums) < 1 :
19 |         continue
20 |     else :
21 |         # ... else looping through the list of numbers found...
22 |         for snum in listOfNums :
23 |             # ... and add up the converted numbers
24 |             total = total + int(snum)
25 | 
26 | # Printing the sum of the numbers
27 | print(total)
28 | 


--------------------------------------------------------------------------------
/Course 3 - Using Python to access web data/ex11/regex_sum_27824.txt:
--------------------------------------------------------------------------------
  1 | This file contains the actual data for your assignment - good luck!
  2 | 
  3 | 
  4 | Why should you learn to write programs?
  5 | 
  6 | Writing programs (or programming) is a very creative 
  7 | and rewarding activity.  You can write programs for 
  8 | many reasons, ranging from making your living to solving
  9 | a difficult data analysis problem to having fun to helping
 10 | someone else solve a problem.  This book assumes that 
 11 | everyone needs to know how to program, and that once 
 12 | you know how to program you will figure out what you want 
 13 | to do with your newfound skills.  
 14 | 
 15 | We are surrounded in our daily lives with computers ranging 
 16 | from laptops to cell phones.  We can think of these computers
 17 | as our personal assistants who can take care of many things
 18 | on our behalf.  The hardware in our current-day computers 
 19 | 7971 is essentially built to continuously ask us the question, 
 20 | 3634 What would you like me to do next? 6057
 21 | 
 22 | Programmers add an operating system and a set of applications
 23 | to the hardware and we end up with a Personal Digital
 24 | Assistant that is quite helpful and capable of helping
 25 | us do many different things.
 26 | 
 27 | Our computers are fast and have vast amounts of memory and 
 28 | could be very helpful to us if we only knew the language to
 29 | speak to explain to the computer what we would like it to 
 30 | 5789 do next.  If we knew this language, we could tell the 4701
 31 | computer to do tasks on our behalf that were repetitive.  
 32 | Interestingly, the kinds of things computers can do best
 33 | are often the kinds of things that we humans find boring
 34 | and mind-numbing.
 35 | 
 36 | For example, look at the first three paragraphs of this
 37 | chapter and tell me the most commonly used word and how
 38 | many times the word is used.  While you were able to read
 39 | and understand the words in a few seconds, counting them
 40 | is almost painful because it is not the kind of problem 
 41 | that human minds are designed to solve.  For a computer
 42 | the opposite is true, reading and understanding text 
 43 | from a piece of paper is hard for a computer to do 
 44 | but counting the words and telling you how many times
 45 | the most used word was used is very easy for the
 46 | 2412 computer: 216
 47 | 
 48 | Our personal information analysis assistant quickly 
 49 | told us that the word to was used sixteen times in the
 50 | first three paragraphs of this chapter.
 51 | 
 52 | This very fact that computers are good at things 
 53 | that humans are not is why you need to become
 54 | skilled at talking computer language.  Once you learn
 55 | this new language, you can delegate mundane tasks
 56 | to your partner (the computer), leaving more time 
 57 | for you to do the 
 58 | things that you are uniquely suited for.  You bring 
 59 | creativity, intuition, and inventiveness to this
 60 | partnership.  
 61 | 
 62 | Creativity and motivation
 63 | 9783  1690
 64 | 589 While this book is not intended for professional programmers, professional 5572
 65 | programming can be a very rewarding job both financially and personally.
 66 | Building useful, elegant, and clever programs for others to use is a very
 67 | creative activity.  Your computer or Personal Digital Assistant (PDA) 
 68 | usually contains many different programs from many different groups of 
 69 | programmers, each competing for your attention and interest.  They try 
 70 | their best to meet your needs and give you a great user experience in the
 71 | process.   In some situations, when you choose a piece of software, the 
 72 | programmers are directly compensated because of your choice.
 73 | 
 74 | If we think of programs as the creative output of groups of programmers,
 75 | perhaps the following figure is a more sensible version of our PDA:
 76 | 
 77 | For now, our primary motivation is not to make money or please end users, but
 78 | instead for us to be more productive in handling the data and 
 79 | information that we will encounter in our lives.
 80 | When you first start, you will be both the programmer and the end user of
 81 | your programs.  As you gain skill as a programmer and
 82 | 6807 programming feels more creative to you, your thoughts may turn 6119
 83 | toward developing programs for others.
 84 | 
 85 | 9372 Computer hardware architecture 1821
 86 | 
 87 | Before 3145 we 3306 start 558 learning the language we 
 88 |  speak to give instructions to computers to 
 89 | develop software, we need to learn a small amount about 
 90 | how computers are built.  
 91 | 
 92 | Central Processing Unit (or CPU) is 
 93 | the part of the computer that is built to be obsessed 
 94 | with what is next?  If your computer is rated
 95 | at three Gigahertz, it means that the CPU will ask What next?
 96 | three billion times per second.  You are going to have to 
 97 | learn how to talk fast to keep up with the CPU.
 98 | 
 99 | Main Memory is used to store information
100 | that the CPU needs in a hurry.  The main memory is nearly as 
101 | fast as the CPU.  But the information stored in the main
102 | memory vanishes when the computer is turned off.
103 | 
104 | Secondary Memory is also used to store
105 | information, but it is much slower than the main memory.
106 | The advantage of the secondary memory is that it can
107 | store information even when there is no power to the
108 | computer.  Examples of secondary memory are disk drives
109 | or flash memory (typically found in USB sticks and portable
110 | music players).
111 | 
112 | Input and Output Devices are simply our
113 | screen, keyboard, mouse, microphone, speaker, touchpad, etc.  
114 | They are all of the ways we interact with the computer.
115 | 
116 | These days, most computers also have a
117 | Network Connection to retrieve information over a network.
118 | We can think of the network as a very slow place to store and
119 | retrieve data that might not always be up.  So in a sense,
120 | the network is a slower and at times unreliable form of
121 | Secondary Memory.
122 | 
123 | 4108 While most of the detail of how these components work is best left 
124 | to computer builders, it helps to have some terminology
125 | so we can talk about these different parts as we write our programs.
126 | 2439 
127 | As a programmer, your job is to use and orchestrate 
128 | each of these resources to solve the problem that you need to solve
129 | 6725 and analyze the data you get from the solution.  As a programmer you will 
130 | mostly be talking to the CPU and telling it what to 
131 | do next.  Sometimes you will tell the CPU to use the main memory,
132 | secondary 4782 memory, 3702 network, 4024 or the input/output devices.
133 |  
134 | You need to be the person who answers the CPU's What next? 
135 | question.  But it would be very uncomfortable to shrink you 
136 | down to five mm  tall and insert you into the computer just so you 
137 | could issue a command three billion times per second.  So instead,
138 | you must write down your instructions in advance.
139 | We call these stored instructions a program and the act 
140 | of writing these instructions down and getting the instructions to 
141 | be correct programming.
142 | 
143 | 5151 6693 581
144 | 
145 | In the rest of this book, we will try to turn you into a person
146 | who is skilled in the art of programming.  In the end you will be a 
147 | programmer --- perhaps not a professional programmer, but 
148 | at least you will have the skills to look at a data/information
149 | analysis problem and develop a program to solve the problem.
150 | 2000  2477
151 | 8429 problem solving 7557
152 | 
153 | In a sense, you need two skills to be a programmer:
154 | 
155 | First, you need to know the programming language (Python) -
156 | you need to know the vocabulary and the grammar.  You need to be able 
157 | to spell the words in this new language properly and know how to construct 
158 | 5272 well-formed sentences in this new language. 7193
159 | 
160 | Second, you need to tell a story.  In writing a story,
161 | you combine words and sentences to convey an idea to the reader. 
162 | There is a skill and art in constructing the story, and skill in
163 | story writing is improved by doing some writing and getting some
164 | feedback.  In programming, our program is the story and the 
165 | problem you are trying to solve is the idea.
166 | 
167 | itemize
168 | 
169 | Once you learn one programming language such as Python, you will 
170 | find it much easier to learn a second programming language such
171 | as JavaScript or C++.  The new programming language has very different 
172 | vocabulary and grammar but the problem-solving skills 
173 | will be the same across all programming languages.
174 | 
175 | You will learn the vocabulary and sentences of Python pretty quickly.
176 | It will take longer for you to be able to write a coherent program
177 | to solve a brand-new problem.  We teach programming much like we teach
178 | 4790 writing.  We start reading and explaining programs, then we write 1143
179 | simple programs, and then we write increasingly complex programs over time.
180 | At some point you get your muse and see the patterns on your own
181 | and can see more naturally how to take a problem and 
182 | write a program that solves that problem.  And once you get 
183 | to that point, programming becomes a very pleasant and creative process.  
184 | 
185 | 7747 We start with the vocabulary and structure of Python programs.  Be patient 179
186 | as the simple examples remind you of when you started reading for the first
187 | time. 
188 | 8360 4425 2668
189 | Words and sentences
190 | 
191 | Unlike human languages, the Python vocabulary is actually pretty small.
192 | We call this vocabulary the reserved words.  These are words that
193 | have very special meaning to Python.  When Python sees these words in 
194 | a Python program, they have one and only one meaning to Python.  Later
195 | as you write programs you will make up your own words that have meaning to 
196 | you called variables.   You will have great latitude in choosing
197 | your names for your variables, but you cannot use any of Python's 
198 | reserved words as a name for a variable.
199 | 
200 | When we train a dog, we use special words like
201 | sit, stay, and fetch.  When you talk to a dog and
202 | don't use any of the reserved words, they just look at you with a 
203 | quizzical look on their face until you say a reserved word.  
204 | For example, if you say, 
205 | I wish more people would walk to improve their overall health, 
206 | what most dogs likely hear is,
207 | blah blah blah walk blah blah blah blah.
208 | That is because walk is a reserved word in dog language.  
209 | 
210 | The reserved words in the language where humans talk to 
211 | Python include the following:
212 | 
213 | and       del       from      not       while    
214 | as        elif      global    or        with     
215 | assert    else      if        pass      yield    
216 | break     except    import    print              
217 | class     exec      in        raise              
218 | continue  finally   is        return             
219 | def       for       lambda    try
220 | 
221 | That is it, and unlike a dog, Python is already completely trained.
222 | When you say try, Python will try every time you say it without
223 | fail.
224 | 6741 
225 | We will learn these reserved words and how they are used in good time,
226 | 577 but for now we will focus on the Python equivalent of speak (in 
227 | human-to-dog language).  The nice thing about telling Python to speak
228 | is that we can even tell it what to say by giving it a message in quotes:
229 | 
230 | And we have even written our first syntactically correct Python sentence.
231 | Our sentence starts with the reserved word print followed
232 | by a string of text of our choosing enclosed in single quotes.
233 | 
234 | Conversing with Python
235 | 
236 | Now that we have a word and a simple sentence that we know in Python,
237 | we need to know how to start a conversation with Python to test 
238 | our new language skills.
239 | 
240 | Before you can converse with Python, you must first install the Python
241 | 2435 software on your computer and learn how to start Python on your 
242 | computer.  That is too much detail for this chapter so I suggest
243 | that you consult www.py4e.com where I have detailed
244 | instructions and screencasts of setting up and starting Python 
245 | on Macintosh and Windows systems.  At some point, you will be in 
246 | a terminal or command window and you will type python and 
247 | the Python interpreter will start executing in interactive mode
248 | and appear somewhat as follows:
249 | interactive mode
250 | 9658 
251 | The >>> prompt is the Python interpreter's way of asking you, What
252 | do you want me to do next?  Python is ready to have a conversation with
253 | you.  All you have to know is how to speak the Python language.
254 | 
255 | 1246 Let's say for example that you did not know even the simplest Python language 81
256 | words or sentences. You might want to use the standard line that astronauts 
257 | use 9474 when 7688 they 4987 land on a faraway planet and try to speak with the inhabitants
258 |  of the planet:
259 | 
260 | This is not going so well.  Unless you think of something quickly,
261 | the inhabitants of the planet are likely to stab you with their spears, 
262 | put you on a spit, roast you over a fire, and eat you for dinner.
263 | 
264 | At this point, you should also realize that while Python 
265 | 7286 is amazingly complex and powerful and very picky about 
266 | the syntax you use to communicate with it, Python is 
267 | not intelligent.  You are really just having a conversation
268 | with yourself, but using proper syntax.
269 | 
270 | In a sense, when you use a program written by someone else
271 | the conversation is between you and those other
272 | programmers with Python acting as an intermediary.  Python
273 | is a way for the creators of programs to express how the 
274 | conversation is supposed to proceed.  And
275 | in just a few more chapters, you will be one of those
276 | programmers using Python to talk to the users of your program.
277 | 
278 | Before we leave our first conversation with the Python 
279 | interpreter, you should probably know the proper way
280 | to say good-bye when interacting with the inhabitants
281 | of Planet Python:
282 | 
283 | You will notice that the error is different for the first two
284 | incorrect attempts.   The second error is different because 
285 | if is a reserved word and Python saw the reserved word
286 | and thought we were trying to say something but got the syntax
287 | of the sentence wrong.
288 | 9667 
289 | Terminology: interpreter and compiler
290 | 749 
291 | Python is a high-level language intended to be relatively
292 | straightforward for humans to read and write and for computers
293 | to read and process.  Other high-level languages include Java, C++,
294 | PHP, Ruby, Basic, Perl, JavaScript, and many more.  The actual hardware
295 | inside the Central Processing Unit (CPU) does not understand any
296 | of these high-level languages.
297 | 5978 
298 | The CPU understands a language we call machine language.  Machine
299 | language is very simple and frankly very tiresome to write because it 
300 | is represented all in zeros and ones.
301 | 
302 | Machine language seems quite simple on the surface, given that there 
303 | are only zeros and ones, but its syntax is even more complex
304 | and 8985 far 3142 more 1330 intricate than Python.  So very few programmers ever write
305 |  machine language.  Instead we build various translators to allow
306 | programmers to write in high-level languages like Python or JavaScript
307 | 6699 and these translators convert the programs to machine language for actual
308 | execution by the CPU.
309 | 
310 | Since machine language is tied to the computer hardware, machine language
311 | is not portable across different types of hardware.  Programs written in 
312 | high-level languages can be moved between different computers by using a 
313 | different interpreter on the new machine or recompiling the code to create
314 | 7192 a machine language version of the program for the new machine. 1097
315 | 5657 
316 | These programming language translators fall into two general categories:
317 | (one) interpreters and (two) compilers.
318 | 
319 | An interpreter reads the source code of the program as written by the
320 | programmer, parses the source code, and interprets the instructions on the fly.
321 | Python is an interpreter and when we are running Python interactively, 
322 | we can type a line of Python (a sentence) and Python processes it immediately
323 | and is ready for us to type another line of Python.   
324 | 
325 | Some of the lines of Python tell Python that you want it to remember some 
326 | value for later.   We need to pick a name for that value to be remembered and
327 | we can use that symbolic name to retrieve the value later.  We use the 
328 | term variable to refer to the labels we use to refer to this stored data.
329 | 
330 | In this example, we ask Python to remember the value six and use the label x
331 | so we can retrieve the value later.   We verify that Python has actually remembered
332 | the value using x and multiply
333 | it by seven and put the newly computed value in y.  Then we ask Python to print out
334 | the value currently in y.
335 | 
336 | Even though we are typing these commands into Python one line at a time, Python
337 | is treating them as an ordered sequence of statements with later statements able
338 | to retrieve data created in earlier statements.   We are writing our first 
339 | simple paragraph with four sentences in a logical and meaningful order.
340 | 
341 | It is the nature of an interpreter to be able to have an interactive conversation
342 | as shown above.  A compiler needs to be handed the entire program in a file, and then 
343 | it runs a process to translate the high-level source code into machine language
344 | and then the compiler puts the resulting machine language into a file for later
345 | execution.
346 | 
347 | If you have a Windows system, often these executable machine language programs have a
348 | suffix of .exe or .dll which stand for executable and dynamic link
349 | library respectively.  In Linux and Macintosh, there is no suffix that uniquely marks
350 | a file as executable.
351 | 
352 | If you were to open an executable file in a text editor, it would look 
353 | completely crazy and be unreadable:
354 | 
355 | It is not easy to read or write machine language, so it is nice that we have
356 | compilers that allow us to write in high-level
357 | 8728 languages like Python or C.
358 | 
359 | Now at this point in our discussion of compilers and interpreters, you should 
360 | be wondering a bit about the Python interpreter itself.  What language is 
361 | it written in?  Is it written in a compiled language?  When we type
362 | python, what exactly is happening?
363 | 
364 | The Python interpreter is written in a high-level language called C.  
365 | You can look at the actual source code for the Python interpreter by
366 | going to www.python.org and working your way to their source code.
367 | So Python is a program itself and it is compiled into machine code.
368 | When you installed Python on your computer (or the vendor installed it),
369 | you 9124 copied 4446 a 4202 machine-code copy of the translated Python program onto your
370 |  system.   In Windows, the executable machine code for Python itself is likely
371 | in a file.
372 | 
373 | That is more than you really need to know to be a Python programmer, but
374 | sometimes it pays to answer those little nagging questions right at 
375 | 8716 the beginning.
376 | 
377 | 2096 Writing a program 8218
378 | 
379 | Typing commands into the Python interpreter is a great way to experiment 
380 | with Python's features, but it is not recommended for solving more complex problems.
381 | 
382 | When we want to write a program, 
383 | we use a text editor to write the Python instructions into a file,
384 | which is called a script.  By
385 | convention, Python scripts have names that end with .py.
386 | 
387 | script
388 | 
389 | To execute the script, you have to tell the Python interpreter 
390 | the name of the file.  In a Unix or Windows command window, 
391 | you would type python hello.py as follows:
392 | 2939 
393 | We call the Python interpreter and tell it to read its source code from
394 | the file hello.py instead of prompting us for lines of Python code
395 | interactively.
396 | 
397 | You will notice that there was no need to have quit() at the end of
398 | the Python program in the file.   When Python is reading your source code
399 | from a file, it knows to stop when it reaches the end of the file.
400 | 
401 | What is a program?
402 | 
403 | The definition of a program at its most basic is a sequence
404 | of Python statements that have been crafted to do something.
405 | Even our simple hello.py script is a program.  It is a one-line
406 | program and is not particularly useful, but in the strictest definition,
407 | it is a Python program.
408 | 
409 | It might be easiest to understand what a program is by thinking about a problem 
410 | that a program might be built to solve, and then looking at a program
411 | that would solve that problem.
412 | 
413 | Lets say you are doing Social Computing research on Facebook posts and 
414 | you are interested in the most frequently used word in a series of posts.
415 | You could print out the stream of Facebook posts and pore over the text
416 | looking for the most common word, but that would take a long time and be very 
417 | mistake prone.  You would be smart to write a Python program to handle the
418 | task quickly and accurately so you can spend the weekend doing something 
419 | fun.
420 | 
421 | For example, look at the following text about a clown and a car.  Look at the 
422 | text and figure out the most common word and how many times it occurs.
423 | 
424 | Then imagine that you are doing this task looking at millions of lines of 
425 | text.  Frankly it would be quicker for you to learn Python and write a 
426 | Python program to count the words than it would be to manually 
427 | scan the words.
428 | 
429 | The even better news is that I already came up with a simple program to 
430 | find the most common word in a text file.  I wrote it,
431 | tested it, and now I am giving it to you to use so you can save some time.
432 | 
433 | You don't even need to know Python to use this program.  You will need to get through 
434 | Chapter ten of this book to fully understand the awesome Python techniques that were
435 | used to make the program.  You are the end user, you simply use the program and marvel
436 | at its cleverness and how it saved you so much manual effort.
437 | You simply type the code 
438 | into a file called words.py and run it or you download the source 
439 | code from http://www.py4e.com/code3/ and run it.
440 | 
441 | This is a good example of how Python and the Python language are acting as an intermediary
442 | between you (the end user) and me (the programmer).  Python is a way for us to exchange useful
443 | instruction sequences (i.e., programs) in a common language that can be used by anyone who 
444 | installs Python on their computer.  So neither of us are talking to Python,
445 | instead we are communicating with each other through Python.
446 | 
447 | The building blocks of programs
448 | 
449 | In the next few chapters, we will learn more about the vocabulary, sentence structure,
450 | paragraph structure, and story structure of Python.  We will learn about the powerful
451 | capabilities of Python and how to compose those capabilities together to create useful
452 | programs.
453 | 
454 | There are some low-level conceptual patterns that we use to construct programs.  These
455 | constructs are not just for Python programs, they are part of every programming language
456 | from machine language up to the high-level languages.
457 | 
458 | description
459 | 
460 | Get data from the outside world.  This might be 
461 | reading data from a file, or even some kind of sensor like 
462 | a microphone or GPS.  In our initial programs, our input will come from the user
463 | typing data on the keyboard.
464 | 
465 | Display the results of the program on a screen
466 | or store them in a file or perhaps write them to a device like a
467 | speaker to play music or speak text.
468 | 
469 | Perform statements one after
470 | another in the order they are encountered in the script.
471 | 
472 | Check for certain conditions and
473 | then execute or skip a sequence of statements.
474 | 
475 | Perform some set of statements 
476 | repeatedly, usually with
477 | some variation.
478 | 
479 | Write a set of instructions once and give them a name
480 | and then reuse those instructions as needed throughout your program.
481 | 
482 | description
483 | 
484 | It sounds almost too simple to be true, and of course it is never
485 | so simple.  It is like saying that walking is simply
486 | putting one foot in front of the other.  The art 
487 | of writing a program is composing and weaving these
488 | basic elements together many times over to produce something
489 | that is useful to its users.
490 | 
491 | The word counting program above directly uses all of 
492 | these patterns except for one.
493 | 
494 | What could possibly go wrong?
495 | 
496 | As we saw in our earliest conversations with Python, we must
497 | communicate very precisely when we write Python code.  The smallest
498 | deviation or mistake will cause Python to give up looking at your
499 | program.
500 | 
501 | Beginning programmers often take the fact that Python leaves no
502 | room for errors as evidence that Python is mean, hateful, and cruel.
503 | While Python seems to like everyone else, Python knows them 
504 | personally and holds a grudge against them.  Because of this grudge,
505 | Python takes our perfectly written programs and rejects them as 
506 | unfit just to torment us.
507 | 
508 | There is little to be gained by arguing with Python.  It is just a tool.
509 | It has no emotions and it is happy and ready to serve you whenever you
510 | need it.  Its error messages sound harsh, but they are just Python's
511 | call for help.  It has looked at what you typed, and it simply cannot
512 | understand what you have entered.
513 | 
514 | Python is much more like a dog, loving you unconditionally, having a few
515 | key words that it understands, looking you with a sweet look on its
516 | face (>>>), and waiting for you to say something it understands.
517 | When Python says SyntaxError: invalid syntax, it is simply wagging
518 | its tail and saying, You seemed to say something but I just don't
519 | understand what you meant, but please keep talking to me (>>>).
520 | 
521 | As your programs become increasingly sophisticated, you will encounter three 
522 | general types of errors:
523 | 
524 | description
525 | 
526 | These are the first errors you will make and the easiest
527 | to fix.  A syntax error means that you have violated the grammar rules of Python.
528 | Python does its best to point right at the line and character where 
529 | it noticed it was confused.  The only tricky bit of syntax errors is that sometimes
530 | the mistake that needs fixing is actually earlier in the program than where Python
531 | noticed it was confused.  So the line and character that Python indicates in 
532 | a syntax error may just be a starting point for your investigation.
533 | 
534 | A logic error is when your program has good syntax but there is a mistake 
535 | in the order of the statements or perhaps a mistake in how the statements relate to one another.
536 | A good example of a logic error might be, take a drink from your water bottle, put it 
537 | in your backpack, walk to the library, and then put the top back on the bottle.
538 | 
539 | A semantic error is when your description of the steps to take 
540 | is syntactically perfect and in the right order, but there is simply a mistake in 
541 | the program.  The program is perfectly correct but it does not do what
542 | you intended for it to do. A simple example would
543 | be if you were giving a person directions to a restaurant and said, ...when you reach
544 | the intersection with the gas station, turn left and go one mile and the restaurant
545 | is a red building on your left.  Your friend is very late and calls you to tell you that
546 | they are on a farm and walking around behind a barn, with no sign of a restaurant.  
547 | Then you say did you turn left or right at the gas station? and 
548 | they say, I followed your directions perfectly, I have 
549 | them written down, it says turn left and go one mile at the gas station.  Then you say,
550 | I am very sorry, because while my instructions were syntactically correct, they 
551 | sadly contained a small but undetected semantic error.. 
552 | 
553 | description
554 | 
555 | Again in all three types of errors, Python is merely trying its hardest to 
556 | do exactly what you have asked.
557 | 
558 | The learning journey
559 | 
560 | As you progress through the rest of the book, don't be afraid if the concepts 
561 | don't seem to fit together well the first time.  When you were learning to speak, 
562 | it was not a problem  for your first few years that you just made cute gurgling noises.
563 | And it was OK if it took six months for you to move from simple vocabulary to 
564 | simple sentences and took five or six more years to move from sentences to paragraphs, and a
565 | few more years to be able to write an interesting complete short story on your own.
566 | 
567 | We want you to learn Python much more rapidly, so we teach it all at the same time
568 | over the next few chapters.  
569 | But it is like learning a new language that takes time to absorb and understand
570 | before it feels natural.
571 | That leads to some confusion as we visit and revisit
572 | topics to try to get you to see the big picture while we are defining the tiny
573 | fragments that make up that big picture.  While the book is written linearly, and
574 | if you are taking a course it will progress in a linear fashion, don't hesitate
575 | to be very nonlinear in how you approach the material.  Look forwards and backwards
576 | and read with a light touch.  By skimming more advanced material without 
577 | fully understanding the details, you can get a better understanding of the why? 
578 | of programming.  By reviewing previous material and even redoing earlier 
579 | exercises, you will realize that you actually learned a lot of material even 
580 | if the material you are currently staring at seems a bit impenetrable.
581 | 
582 | Usually when you are learning your first programming language, there are a few
583 | wonderful Ah Hah! moments where you can look up from pounding away at some rock
584 | with a hammer and chisel and step away and see that you are indeed building 
585 | a beautiful sculpture.
586 | 
587 | If something seems particularly hard, there is usually no value in staying up all 
588 | night and staring at it.   Take a break, take a nap, have a snack, explain what you 
589 | are having a problem with to someone (or perhaps your dog), and then come back to it with
590 | fresh eyes.  I assure you that once you learn the programming concepts in the book
591 | you will look back and see that it was all really easy and elegant and it simply 
592 | took you a bit of time to absorb it.
593 | 42
594 | The end
595 | 


--------------------------------------------------------------------------------
/Course 3 - Using Python to access web data/ex12/following_links.py:
--------------------------------------------------------------------------------
 1 | # This program follows links at a given position for a given number of times and lists the resulting chain
 2 | 
 3 | # Importing
 4 | from urllib.request import urlopen
 5 | from bs4 import BeautifulSoup
 6 | import ssl
 7 | 
 8 | # Initialising the count and total
 9 | name = list()
10 | 
11 | # Ignoring SSL certificate errors
12 | ctx = ssl.create_default_context()
13 | ctx.check_hostname = False
14 | ctx.verify_mode = ssl.CERT_NONE
15 | 
16 | # Asking user to input parameters
17 | url = input('Enter URL: ')
18 | pos_str = input('Scanning for a tag that is at the following position relative to the first name in the list: ')
19 | rep_str = input('Repeating the process to follow the link for the following number of times: ')
20 | 
21 | # Converting the inputs
22 | position = int(pos_str)
23 | repeat = int(rep_str)
24 | 
25 | # Looping through the layers of webpages 4 times
26 | for repeat in range(repeat):
27 |     # Reading the whole fine into a single long string
28 |     html = urlopen(url, context=ctx).read()
29 |     
30 |     # Creating an organised string (soup) with BeautifulSoup
31 |     soup = BeautifulSoup(html, 'html.parser')
32 |     
33 |     # Retrieving all of the 'a' tags
34 |     tags = soup('a')
35 |     
36 |     # Adding the name of the person at the given position to the list
37 |     name.append(tags[position-1].contents[0])
38 |     
39 |     # Updating the URL for the next loop
40 |     url = tags[position-1].get('href', None)
41 | 
42 | # Printing the list with the names
43 | print(name)


--------------------------------------------------------------------------------
/Course 3 - Using Python to access web data/ex12/scraping.py:
--------------------------------------------------------------------------------
 1 | # This program scrapes a website for numbers and returns their count and sum.
 2 | 
 3 | # Importing
 4 | from urllib.request import urlopen
 5 | from bs4 import BeautifulSoup
 6 | import ssl
 7 | 
 8 | # Initialising the count and total
 9 | count = 0
10 | total = 0
11 | 
12 | # Ignoring SSL certificate errors
13 | ctx = ssl.create_default_context()
14 | ctx.check_hostname = False
15 | ctx.verify_mode = ssl.CERT_NONE
16 | 
17 | # Asking user to input the URL
18 | url = input('Enter URL: ')
19 | html = urlopen(url, context=ctx).read()
20 | 
21 | # Creating an organised string (soup) with BeautifulSoup
22 | soup = BeautifulSoup(html, "html.parser")
23 | 
24 | # Retrieve all of the span tags
25 | tags = soup('span')
26 | for tag in tags:
27 |     # Trying to convert the tag's content into integer
28 |     try:
29 |         total = total + int(tag.contents[0])
30 |         count = count + 1
31 |     except:
32 |         continue
33 | 
34 | # Printing the results
35 | print('Count',count)
36 | print('Sum',total)


--------------------------------------------------------------------------------
/Course 3 - Using Python to access web data/ex12/socket1.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | 
 3 | mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 4 | mysock.connect(('data.pr4e.org', 80))
 5 | cmd = 'GET http://data.pr4e.org/intro-short.txt HTTP/1.0\r\n\r\n'.encode()
 6 | mysock.send(cmd)
 7 | 
 8 | while True:
 9 |     data = mysock.recv(512)
10 |     if (len(data) < 1):
11 |         break
12 |     print(data.decode())
13 | mysock.close()
14 | 


--------------------------------------------------------------------------------
/Course 3 - Using Python to access web data/ex13/extracting_data_from_JSON.py:
--------------------------------------------------------------------------------
 1 | # This programme reads an online JSON file from which it extracts the values of the "count" items and sums those values
 2 | 
 3 | # Importing libraries
 4 | import urllib.request, urllib.parse, urllib.error
 5 | import json
 6 | 
 7 | # Initialising
 8 | total_count = 0
 9 | 
10 | # Asking user to input the source URL of the JSON data file
11 | url = input('Enter URL: ')
12 | print('Retrieving',url)
13 | uhandle = urllib.request.urlopen(url)
14 | data = uhandle.read()
15 | 
16 | # Transforming the text of the JSON file into a tree
17 | tree = json.loads(data)
18 | 
19 | # Looping through all the comments under the "comments" item
20 | for comment in tree['comments']:
21 |     # Converting the text of the "count" items into an integer and summing
22 |     try:
23 |         total_count = total_count + int(comment['count'])
24 |     except:
25 |         continue
26 | 
27 | # Printing the result
28 | print('Retrieved',len(data),'characters')
29 | print('Count:',len(tree['comments']))
30 | print('Sum:',total_count)


--------------------------------------------------------------------------------
/Course 3 - Using Python to access web data/ex13/extracting_data_from_xml.py:
--------------------------------------------------------------------------------
 1 | # This programme reads an online XML file from which it extracts the values of the <count> tags and sums those values
 2 | 
 3 | # Importing libraries
 4 | import urllib.request, urllib.parse, urllib.error
 5 | import xml.etree.ElementTree as ET
 6 | 
 7 | # Initialising
 8 | total_count = 0
 9 | 
10 | # Asking user to input the source URL of the XML data file
11 | url = input('Enter URL: ')
12 | uhandle = urllib.request.urlopen(url)
13 | data = uhandle.read()
14 | 
15 | # Transforming the text of the XML file to a tree
16 | tree = ET.fromstring(data)
17 | 
18 | # Finding all the <comment> tags and putting them into a list
19 | counts_str = tree.findall('.//count')
20 | 
21 | # Looping through all the <count> nodes
22 | for count_str in counts_str:
23 |     # Converting the text of the <count> nodes into an integer and summing
24 |     try:
25 |         total_count = total_count + int(count_str.text)
26 |     except:
27 |         continue
28 | 
29 | # Printing the result
30 | print('Receiving',len(data),'characters')
31 | print('Count:',len(counts_str))
32 | print('Sum:',total_count)


--------------------------------------------------------------------------------
/Course 3 - Using Python to access web data/ex13/using_GeoJSON_API.py:
--------------------------------------------------------------------------------
 1 | #===============================================================================
 2 | # This programme reads an online JSON file from which it extracts the first
 3 | # "place_id", which uniquely identifies a place on Google Maps
 4 | #===============================================================================
 5 | 
 6 | # Importing libraries
 7 | import urllib.request, urllib.parse, urllib.error
 8 | import json
 9 | 
10 | # Initialising
11 | service_url = 'http://py4e-data.dr-chuck.net/geojson?'
12 | 
13 | # Asking user to input the source URL of the JSON data file
14 | loc = input('Enter location: ')
15 | 
16 | # Concatinating the URL for the request
17 | url = service_url + urllib.parse.urlencode({'address' : loc}) 
18 | print('Retrieving', loc, 'here:', url)
19 | 
20 | # Opening connection to the JSON data file
21 | uhandle = urllib.request.urlopen(url)
22 | data = uhandle.read().decode()
23 | print('Retrieved', len(data), 'characters')
24 |  
25 | # Transforming the text of the JSON file into a tree
26 | try:
27 |     tree = json.loads(data)
28 | except:
29 |     tree = None
30 |  
31 | # Finding the first "place_id" in the JSON data file
32 | place_id = tree["results"][0]["place_id"]
33 |  
34 | # Printing the result
35 | print('Place ID:', place_id)


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex15/emaildb.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 4 - Using databases with Python/ex15/emaildb.sqlite


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex15/ex15 - hexa.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 4 - Using databases with Python/ex15/ex15 - hexa.db


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex15/ex15_counting_e-mail_in_a_database.py:
--------------------------------------------------------------------------------
 1 | #===============================================================================
 2 | # This programme reads an offline txt file and counts the number of email
 3 | # messages per organization (i.e. domain name of the email address) using
 4 | # a database to maintain the counts.
 5 | #===============================================================================
 6 | 
 7 | # Importing libraries
 8 | import sqlite3
 9 | 
10 | #--- Creating and connecting to database
11 | conn = sqlite3.connect('emaildb.sqlite')
12 | cur = conn.cursor()
13 | 
14 | #--- Initialising database
15 | cur.execute('DROP TABLE IF EXISTS Counts')
16 | cur.execute('''
17 | CREATE TABLE Counts (org TEXT, count INTEGER)''')
18 |  
19 | #--- Asking user to input the data file name and provide 'mbox.txt' as a fallback
20 | fname = input('Enter file name: ')
21 | if (len(fname) < 1): fname = 'mbox.txt'
22 |  
23 | #--- Opening and reading the file
24 | fh = open(fname)
25 | for line in fh:
26 |     #--- Skipping the irrelevant lines
27 |     if not line.startswith('From: '): continue
28 |     #--- Splitting the lines, taking the split with the e-mail, then splitting the e-mail to get the domain
29 |     email = line.split()[1]
30 |     domain = email.split('@')[1]
31 |     #--- Getting the current count value from the database
32 |     cur.execute('SELECT count FROM Counts WHERE org = ? ', (domain,))
33 |     row = cur.fetchone()
34 |     #--- If the e-mail is not yet in the database, then add it...
35 |     if row is None:
36 |         cur.execute('''INSERT INTO Counts (org, count)
37 |                 VALUES (?, 1)''', (domain,))
38 |     #--- ... else update the count value in the database
39 |     else:
40 |         cur.execute('UPDATE Counts SET count = count + 1 WHERE org = ?',
41 |                     (domain,))
42 | #--- Commit changes to the database
43 | conn.commit()
44 |  
45 | #--- Querying the results
46 | sqlstr = 'SELECT org, count FROM Counts ORDER BY count DESC LIMIT 10'
47 | for row in cur.execute(sqlstr):
48 |     print(str(row[0]), row[1])
49 | 
50 | #--- Closing connection with the database
51 | cur.close()
52 | 


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex15/ex15_multi-table_database_tracks.py:
--------------------------------------------------------------------------------
  1 | #===============================================================================
  2 | # This application will read an iTunes export file in XML and produce a properly
  3 | # normalized database.
  4 | #===============================================================================
  5 | 
  6 | # Importing libraries
  7 | import xml.etree.ElementTree as ET
  8 | import sqlite3
  9 | 
 10 | #--- Creating and connecting to database
 11 | conn = sqlite3.connect('trackdb.sqlite')
 12 | cur = conn.cursor()
 13 | 
 14 | #--- Initialising database
 15 | cur.executescript('''
 16 | DROP TABLE IF EXISTS Artist;
 17 | DROP TABLE IF EXISTS Genre;
 18 | DROP TABLE IF EXISTS Album;
 19 | DROP TABLE IF EXISTS Track;
 20 | ''')
 21 | cur.executescript('''
 22 | CREATE TABLE Artist (
 23 |     id  INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
 24 |     name    TEXT UNIQUE
 25 | );
 26 | 
 27 | CREATE TABLE Genre (
 28 |     id  INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
 29 |     name    TEXT UNIQUE
 30 | );
 31 | 
 32 | CREATE TABLE Album (
 33 |     id  INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
 34 |     artist_id  INTEGER,
 35 |     title   TEXT UNIQUE
 36 | );
 37 | 
 38 | CREATE TABLE Track (
 39 |     id  INTEGER NOT NULL PRIMARY KEY 
 40 |         AUTOINCREMENT UNIQUE,
 41 |     title TEXT  UNIQUE,
 42 |     album_id  INTEGER,
 43 |     genre_id  INTEGER,
 44 |     len INTEGER, rating INTEGER, count INTEGER
 45 | );
 46 | ''')
 47 | 
 48 | #--- Asking user to input the data file name and provide 'Library.xml' as a fallback
 49 | fname = input('Enter file name: ')
 50 | if ( len(fname) < 1 ) : fname = 'Library.xml'
 51 | 
 52 | #--- The structure of the of the interesting objects in the XML
 53 | # <key>Track ID</key><integer>369</integer>
 54 | # <key>Name</key><string>Another One Bites The Dust</string>
 55 | # <key>Artist</key><string>Queen</string>
 56 | 
 57 | #--- Defining a function to lookup the value of a 'key' tag
 58 | def lookup(d, key):
 59 |     found = False
 60 |     for child in d:
 61 |         if found : return child.text
 62 |         if child.tag == 'key' and child.text == key :
 63 |             found = True
 64 |     return None
 65 | 
 66 | #--- Opening and parsing the file
 67 | stuff = ET.parse(fname)
 68 | #--- Selecting the 'dict' tags in the third depth and counting them
 69 | all = stuff.findall('dict/dict/dict')
 70 | print('Dict count:', len(all))
 71 | #--- Looping through all result elements 'dict'
 72 | for entry in all:
 73 |     #--- Trying to look up the different data fields. If not found, moving on to the next element
 74 |     if ( lookup(entry, 'Track ID') is None ) : continue
 75 | 
 76 |     name = lookup(entry, 'Name')
 77 |     artist = lookup(entry, 'Artist')
 78 |     album = lookup(entry, 'Album')
 79 |     genre = lookup(entry, 'Genre')
 80 |     count = lookup(entry, 'Play Count')
 81 |     rating = lookup(entry, 'Rating')
 82 |     length = lookup(entry, 'Total Time')
 83 | 
 84 |     #--- If a data field is not found, then move on to the next element
 85 |     if name is None or artist is None or album is None or genre is None: 
 86 |         continue
 87 | 
 88 |     #--- Printing the data field of the search result element for the user
 89 |     print(name, artist, album, genre, count, rating, length)
 90 | 
 91 |     #--- Updating the relevant tables with the data field of the search result element
 92 |     cur.execute('''INSERT OR IGNORE INTO Artist (name) 
 93 |         VALUES ( ? )''', ( artist, ) )
 94 |     cur.execute('SELECT id FROM Artist WHERE name = ? ', (artist, ))
 95 |     artist_id = cur.fetchone()[0]
 96 | 
 97 |     cur.execute('''INSERT OR IGNORE INTO Genre (name) 
 98 |         VALUES ( ? )''', ( genre, ) )
 99 |     cur.execute('SELECT id FROM Genre WHERE name = ? ', (genre, ))
100 |     genre_id = cur.fetchone()[0]
101 | 
102 |     cur.execute('''INSERT OR IGNORE INTO Album (artist_id, title) 
103 |         VALUES ( ?, ? )''', ( artist_id, album ) )
104 |     cur.execute('SELECT id FROM Album WHERE title = ? ', (album, ))
105 |     album_id = cur.fetchone()[0]
106 | 
107 |     cur.execute('''INSERT OR REPLACE INTO Track
108 |         (title, album_id, genre_id, len, rating, count) 
109 |         VALUES ( ?, ?, ?, ?, ?, ? )''', 
110 |         ( name, album_id, genre_id, length, rating, count ) )
111 | 
112 |     #--- Committing the changes
113 |     conn.commit()
114 | 


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex15/ex15_roster.py:
--------------------------------------------------------------------------------
 1 | #===============================================================================
 2 | # This application will read roster data in JSON format, parse the file, and
 3 | # then produce an SQLite database that contains a User, Course, and Member
 4 | # table and populate the tables from the data file.
 5 | #===============================================================================
 6 | 
 7 | # Importing libraries
 8 | import json
 9 | import sqlite3
10 | 
11 | #--- Creating and connecting to database
12 | conn = sqlite3.connect('rosterdb.sqlite')
13 | cur = conn.cursor()
14 | 
15 | #--- Initialising database
16 | cur.executescript('''
17 | DROP TABLE IF EXISTS User;
18 | DROP TABLE IF EXISTS Member;
19 | DROP TABLE IF EXISTS Course;
20 | 
21 | CREATE TABLE User (
22 |     id     INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
23 |     name   TEXT UNIQUE
24 | );
25 | 
26 | CREATE TABLE Course (
27 |     id     INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
28 |     title  TEXT UNIQUE
29 | );
30 | 
31 | CREATE TABLE Member (
32 |     user_id     INTEGER,
33 |     course_id   INTEGER,
34 |     role        INTEGER,
35 |     PRIMARY KEY (user_id, course_id)
36 | )
37 | ''')
38 | 
39 | #--- Asking user to input the JSON data file name and provide 'roster_data_sample.json' as a fallback
40 | fname = input('Enter file name: ')
41 | if len(fname) < 1:
42 |     fname = 'roster_data.json'
43 | 
44 | #--- The structure of the JSON object is the following:
45 | # [
46 | #   [ "Charley", "si110", 1 ],
47 | #   [ "Mea", "si110", 0 ],
48 | 
49 | #--- Opening and reading the file and putting it into a JSON object
50 | str_data = open(fname).read()
51 | json_data = json.loads(str_data)
52 | 
53 | #--- Looping through the JSON object
54 | for entry in json_data:
55 | 
56 |     #--- Looking up the different data fields
57 |     name = entry[0];
58 |     title = entry[1];
59 |     role = entry[2];
60 | 
61 |     #--- Printing the data field of the search result JSON object for the user
62 |     print((name, title, role))
63 | 
64 |     #--- Updating the relevant tables with the data field of the search result JSON object
65 |     cur.execute('''INSERT OR IGNORE INTO User (name)
66 |         VALUES ( ? )''', ( name, ) )
67 |     cur.execute('SELECT id FROM User WHERE name = ? ', (name, ))
68 |     user_id = cur.fetchone()[0]
69 | 
70 |     cur.execute('''INSERT OR IGNORE INTO Course (title)
71 |         VALUES ( ? )''', ( title, ) )
72 |     cur.execute('SELECT id FROM Course WHERE title = ? ', (title, ))
73 |     course_id = cur.fetchone()[0]
74 | 
75 |     cur.execute('''INSERT OR REPLACE INTO Member
76 |         (user_id, course_id, role) VALUES ( ?, ?, ? )''',
77 |         ( user_id, course_id, role ) )
78 | 
79 |     #--- Committing the changes
80 |     conn.commit()
81 | 


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex15/roster_data.json:
--------------------------------------------------------------------------------
   1 | [
   2 |   [
   3 |     "Ijay",
   4 |     "si110",
   5 |     1
   6 |   ],
   7 |   [
   8 |     "Vrishin",
   9 |     "si110",
  10 |     0
  11 |   ],
  12 |   [
  13 |     "Aleesha",
  14 |     "si110",
  15 |     0
  16 |   ],
  17 |   [
  18 |     "Brea",
  19 |     "si110",
  20 |     0
  21 |   ],
  22 |   [
  23 |     "Lana",
  24 |     "si110",
  25 |     0
  26 |   ],
  27 |   [
  28 |     "Kaine",
  29 |     "si110",
  30 |     0
  31 |   ],
  32 |   [
  33 |     "Denon",
  34 |     "si110",
  35 |     0
  36 |   ],
  37 |   [
  38 |     "Dillon",
  39 |     "si110",
  40 |     0
  41 |   ],
  42 |   [
  43 |     "Temba",
  44 |     "si110",
  45 |     0
  46 |   ],
  47 |   [
  48 |     "Gemma",
  49 |     "si110",
  50 |     0
  51 |   ],
  52 |   [
  53 |     "Bree",
  54 |     "si110",
  55 |     0
  56 |   ],
  57 |   [
  58 |     "Etienne",
  59 |     "si110",
  60 |     0
  61 |   ],
  62 |   [
  63 |     "Ami",
  64 |     "si110",
  65 |     0
  66 |   ],
  67 |   [
  68 |     "Konstancja",
  69 |     "si110",
  70 |     0
  71 |   ],
  72 |   [
  73 |     "Kenton",
  74 |     "si110",
  75 |     0
  76 |   ],
  77 |   [
  78 |     "Martin",
  79 |     "si110",
  80 |     0
  81 |   ],
  82 |   [
  83 |     "Rhuairidh",
  84 |     "si110",
  85 |     0
  86 |   ],
  87 |   [
  88 |     "Meshach",
  89 |     "si110",
  90 |     0
  91 |   ],
  92 |   [
  93 |     "Dareh",
  94 |     "si110",
  95 |     0
  96 |   ],
  97 |   [
  98 |     "Adie",
  99 |     "si110",
 100 |     0
 101 |   ],
 102 |   [
 103 |     "Kamila",
 104 |     "si110",
 105 |     0
 106 |   ],
 107 |   [
 108 |     "Grace",
 109 |     "si110",
 110 |     0
 111 |   ],
 112 |   [
 113 |     "Juan",
 114 |     "si110",
 115 |     0
 116 |   ],
 117 |   [
 118 |     "Rhiannin",
 119 |     "si110",
 120 |     0
 121 |   ],
 122 |   [
 123 |     "Azzedine",
 124 |     "si110",
 125 |     0
 126 |   ],
 127 |   [
 128 |     "Derrie",
 129 |     "si110",
 130 |     0
 131 |   ],
 132 |   [
 133 |     "Maaz",
 134 |     "si110",
 135 |     0
 136 |   ],
 137 |   [
 138 |     "Marina",
 139 |     "si110",
 140 |     0
 141 |   ],
 142 |   [
 143 |     "Rhys",
 144 |     "si110",
 145 |     0
 146 |   ],
 147 |   [
 148 |     "Oriana",
 149 |     "si110",
 150 |     0
 151 |   ],
 152 |   [
 153 |     "Evelyn",
 154 |     "si110",
 155 |     0
 156 |   ],
 157 |   [
 158 |     "Tigan",
 159 |     "si110",
 160 |     0
 161 |   ],
 162 |   [
 163 |     "Reuben",
 164 |     "si110",
 165 |     0
 166 |   ],
 167 |   [
 168 |     "Aadit",
 169 |     "si110",
 170 |     0
 171 |   ],
 172 |   [
 173 |     "Arooba",
 174 |     "si110",
 175 |     0
 176 |   ],
 177 |   [
 178 |     "Neave",
 179 |     "si110",
 180 |     0
 181 |   ],
 182 |   [
 183 |     "Kaira",
 184 |     "si110",
 185 |     0
 186 |   ],
 187 |   [
 188 |     "Igor",
 189 |     "si110",
 190 |     0
 191 |   ],
 192 |   [
 193 |     "Vuyolwethu",
 194 |     "si110",
 195 |     0
 196 |   ],
 197 |   [
 198 |     "Garoa",
 199 |     "si110",
 200 |     0
 201 |   ],
 202 |   [
 203 |     "Obieluem",
 204 |     "si110",
 205 |     0
 206 |   ],
 207 |   [
 208 |     "Alecia",
 209 |     "si110",
 210 |     0
 211 |   ],
 212 |   [
 213 |     "Pablo",
 214 |     "si110",
 215 |     0
 216 |   ],
 217 |   [
 218 |     "Lara",
 219 |     "si110",
 220 |     0
 221 |   ],
 222 |   [
 223 |     "Roxanna",
 224 |     "si106",
 225 |     1
 226 |   ],
 227 |   [
 228 |     "Iman",
 229 |     "si106",
 230 |     0
 231 |   ],
 232 |   [
 233 |     "Edie",
 234 |     "si106",
 235 |     0
 236 |   ],
 237 |   [
 238 |     "Cambell",
 239 |     "si106",
 240 |     0
 241 |   ],
 242 |   [
 243 |     "Kiarash",
 244 |     "si106",
 245 |     0
 246 |   ],
 247 |   [
 248 |     "Stuart",
 249 |     "si106",
 250 |     0
 251 |   ],
 252 |   [
 253 |     "Naideen",
 254 |     "si106",
 255 |     0
 256 |   ],
 257 |   [
 258 |     "Ammara",
 259 |     "si106",
 260 |     0
 261 |   ],
 262 |   [
 263 |     "Thara",
 264 |     "si106",
 265 |     0
 266 |   ],
 267 |   [
 268 |     "Ishbel",
 269 |     "si106",
 270 |     0
 271 |   ],
 272 |   [
 273 |     "Heyden",
 274 |     "si106",
 275 |     0
 276 |   ],
 277 |   [
 278 |     "Aaryn",
 279 |     "si106",
 280 |     0
 281 |   ],
 282 |   [
 283 |     "Abdul",
 284 |     "si106",
 285 |     0
 286 |   ],
 287 |   [
 288 |     "Josephina",
 289 |     "si106",
 290 |     0
 291 |   ],
 292 |   [
 293 |     "Chen",
 294 |     "si106",
 295 |     0
 296 |   ],
 297 |   [
 298 |     "Zayne",
 299 |     "si106",
 300 |     0
 301 |   ],
 302 |   [
 303 |     "Todd",
 304 |     "si106",
 305 |     0
 306 |   ],
 307 |   [
 308 |     "Miyha",
 309 |     "si106",
 310 |     0
 311 |   ],
 312 |   [
 313 |     "Murry",
 314 |     "si106",
 315 |     0
 316 |   ],
 317 |   [
 318 |     "Kacy",
 319 |     "si106",
 320 |     0
 321 |   ],
 322 |   [
 323 |     "Harman",
 324 |     "si106",
 325 |     0
 326 |   ],
 327 |   [
 328 |     "Tyllor",
 329 |     "si106",
 330 |     0
 331 |   ],
 332 |   [
 333 |     "Jonah",
 334 |     "si106",
 335 |     0
 336 |   ],
 337 |   [
 338 |     "Rayna",
 339 |     "si206",
 340 |     1
 341 |   ],
 342 |   [
 343 |     "Rylie",
 344 |     "si206",
 345 |     0
 346 |   ],
 347 |   [
 348 |     "Maeya",
 349 |     "si206",
 350 |     0
 351 |   ],
 352 |   [
 353 |     "Elleanne",
 354 |     "si206",
 355 |     0
 356 |   ],
 357 |   [
 358 |     "Ryleigh",
 359 |     "si206",
 360 |     0
 361 |   ],
 362 |   [
 363 |     "Sophi",
 364 |     "si206",
 365 |     0
 366 |   ],
 367 |   [
 368 |     "Easton",
 369 |     "si206",
 370 |     0
 371 |   ],
 372 |   [
 373 |     "Bobbie",
 374 |     "si206",
 375 |     0
 376 |   ],
 377 |   [
 378 |     "Caley",
 379 |     "si206",
 380 |     0
 381 |   ],
 382 |   [
 383 |     "Meabh",
 384 |     "si206",
 385 |     0
 386 |   ],
 387 |   [
 388 |     "Kenneth",
 389 |     "si206",
 390 |     0
 391 |   ],
 392 |   [
 393 |     "Heather",
 394 |     "si206",
 395 |     0
 396 |   ],
 397 |   [
 398 |     "Aaima",
 399 |     "si206",
 400 |     0
 401 |   ],
 402 |   [
 403 |     "Gigha",
 404 |     "si206",
 405 |     0
 406 |   ],
 407 |   [
 408 |     "Moayd",
 409 |     "si206",
 410 |     0
 411 |   ],
 412 |   [
 413 |     "Kaydie",
 414 |     "si206",
 415 |     0
 416 |   ],
 417 |   [
 418 |     "Zayn",
 419 |     "si206",
 420 |     0
 421 |   ],
 422 |   [
 423 |     "Kaytie",
 424 |     "si206",
 425 |     0
 426 |   ],
 427 |   [
 428 |     "Malisa",
 429 |     "si206",
 430 |     0
 431 |   ],
 432 |   [
 433 |     "Ceol",
 434 |     "si206",
 435 |     0
 436 |   ],
 437 |   [
 438 |     "Kaeden",
 439 |     "si206",
 440 |     0
 441 |   ],
 442 |   [
 443 |     "Meah",
 444 |     "si206",
 445 |     0
 446 |   ],
 447 |   [
 448 |     "Scout",
 449 |     "si206",
 450 |     0
 451 |   ],
 452 |   [
 453 |     "Lukmaan",
 454 |     "si206",
 455 |     0
 456 |   ],
 457 |   [
 458 |     "Enoghado",
 459 |     "si206",
 460 |     0
 461 |   ],
 462 |   [
 463 |     "Elyse",
 464 |     "si206",
 465 |     0
 466 |   ],
 467 |   [
 468 |     "Ellisha",
 469 |     "si206",
 470 |     0
 471 |   ],
 472 |   [
 473 |     "Mahek",
 474 |     "si206",
 475 |     0
 476 |   ],
 477 |   [
 478 |     "Shazil",
 479 |     "si206",
 480 |     0
 481 |   ],
 482 |   [
 483 |     "Xavier",
 484 |     "si206",
 485 |     0
 486 |   ],
 487 |   [
 488 |     "Elodie",
 489 |     "si206",
 490 |     0
 491 |   ],
 492 |   [
 493 |     "Shayaan",
 494 |     "si206",
 495 |     0
 496 |   ],
 497 |   [
 498 |     "Saul",
 499 |     "si206",
 500 |     0
 501 |   ],
 502 |   [
 503 |     "Ishwari",
 504 |     "si206",
 505 |     0
 506 |   ],
 507 |   [
 508 |     "Alessandra",
 509 |     "si206",
 510 |     0
 511 |   ],
 512 |   [
 513 |     "Fraser",
 514 |     "si206",
 515 |     0
 516 |   ],
 517 |   [
 518 |     "Estelle",
 519 |     "si206",
 520 |     0
 521 |   ],
 522 |   [
 523 |     "Braeden",
 524 |     "si206",
 525 |     0
 526 |   ],
 527 |   [
 528 |     "Daylen",
 529 |     "si206",
 530 |     0
 531 |   ],
 532 |   [
 533 |     "Conlyn",
 534 |     "si206",
 535 |     0
 536 |   ],
 537 |   [
 538 |     "Abdihakim",
 539 |     "si206",
 540 |     0
 541 |   ],
 542 |   [
 543 |     "Kaleb",
 544 |     "si206",
 545 |     0
 546 |   ],
 547 |   [
 548 |     "Karol",
 549 |     "si206",
 550 |     0
 551 |   ],
 552 |   [
 553 |     "Lilyana",
 554 |     "si206",
 555 |     0
 556 |   ],
 557 |   [
 558 |     "Jesuseun",
 559 |     "si206",
 560 |     0
 561 |   ],
 562 |   [
 563 |     "Talorcan",
 564 |     "si206",
 565 |     0
 566 |   ],
 567 |   [
 568 |     "Windsor",
 569 |     "si206",
 570 |     0
 571 |   ],
 572 |   [
 573 |     "Airen",
 574 |     "si206",
 575 |     0
 576 |   ],
 577 |   [
 578 |     "Rayan",
 579 |     "si206",
 580 |     0
 581 |   ],
 582 |   [
 583 |     "Blair",
 584 |     "si301",
 585 |     1
 586 |   ],
 587 |   [
 588 |     "Daksh",
 589 |     "si301",
 590 |     0
 591 |   ],
 592 |   [
 593 |     "Dhani",
 594 |     "si301",
 595 |     0
 596 |   ],
 597 |   [
 598 |     "Conlly",
 599 |     "si301",
 600 |     0
 601 |   ],
 602 |   [
 603 |     "Radmiras",
 604 |     "si301",
 605 |     0
 606 |   ],
 607 |   [
 608 |     "Nicki",
 609 |     "si301",
 610 |     0
 611 |   ],
 612 |   [
 613 |     "Likitta",
 614 |     "si301",
 615 |     0
 616 |   ],
 617 |   [
 618 |     "Shwetika",
 619 |     "si301",
 620 |     0
 621 |   ],
 622 |   [
 623 |     "Kaycie",
 624 |     "si301",
 625 |     0
 626 |   ],
 627 |   [
 628 |     "Leiten",
 629 |     "si301",
 630 |     0
 631 |   ],
 632 |   [
 633 |     "Madisen",
 634 |     "si301",
 635 |     0
 636 |   ],
 637 |   [
 638 |     "Nelly",
 639 |     "si301",
 640 |     0
 641 |   ],
 642 |   [
 643 |     "Clark",
 644 |     "si301",
 645 |     0
 646 |   ],
 647 |   [
 648 |     "Guy",
 649 |     "si301",
 650 |     0
 651 |   ],
 652 |   [
 653 |     "Teagan",
 654 |     "si301",
 655 |     0
 656 |   ],
 657 |   [
 658 |     "Alba",
 659 |     "si301",
 660 |     0
 661 |   ],
 662 |   [
 663 |     "Ty",
 664 |     "si301",
 665 |     0
 666 |   ],
 667 |   [
 668 |     "Carrie",
 669 |     "si301",
 670 |     0
 671 |   ],
 672 |   [
 673 |     "Husnain",
 674 |     "si301",
 675 |     0
 676 |   ],
 677 |   [
 678 |     "Regan",
 679 |     "si301",
 680 |     0
 681 |   ],
 682 |   [
 683 |     "Keryn",
 684 |     "si301",
 685 |     0
 686 |   ],
 687 |   [
 688 |     "Hui",
 689 |     "si301",
 690 |     0
 691 |   ],
 692 |   [
 693 |     "Celeste",
 694 |     "si301",
 695 |     0
 696 |   ],
 697 |   [
 698 |     "Eshaal",
 699 |     "si301",
 700 |     0
 701 |   ],
 702 |   [
 703 |     "Cadie",
 704 |     "si301",
 705 |     0
 706 |   ],
 707 |   [
 708 |     "Mirren",
 709 |     "si301",
 710 |     0
 711 |   ],
 712 |   [
 713 |     "Areeb",
 714 |     "si301",
 715 |     0
 716 |   ],
 717 |   [
 718 |     "Vasyl",
 719 |     "si301",
 720 |     0
 721 |   ],
 722 |   [
 723 |     "Rachael",
 724 |     "si301",
 725 |     0
 726 |   ],
 727 |   [
 728 |     "Annalicia",
 729 |     "si301",
 730 |     0
 731 |   ],
 732 |   [
 733 |     "Mikolaj",
 734 |     "si301",
 735 |     0
 736 |   ],
 737 |   [
 738 |     "Ruairi",
 739 |     "si301",
 740 |     0
 741 |   ],
 742 |   [
 743 |     "Zubair",
 744 |     "si301",
 745 |     0
 746 |   ],
 747 |   [
 748 |     "Clarisse",
 749 |     "si301",
 750 |     0
 751 |   ],
 752 |   [
 753 |     "Arda",
 754 |     "si301",
 755 |     0
 756 |   ],
 757 |   [
 758 |     "Alfred",
 759 |     "si301",
 760 |     0
 761 |   ],
 762 |   [
 763 |     "Anita",
 764 |     "si301",
 765 |     0
 766 |   ],
 767 |   [
 768 |     "Robby",
 769 |     "si301",
 770 |     0
 771 |   ],
 772 |   [
 773 |     "Sinali",
 774 |     "si301",
 775 |     0
 776 |   ],
 777 |   [
 778 |     "Joss",
 779 |     "si301",
 780 |     0
 781 |   ],
 782 |   [
 783 |     "Milie",
 784 |     "si310",
 785 |     1
 786 |   ],
 787 |   [
 788 |     "Jacqui",
 789 |     "si310",
 790 |     0
 791 |   ],
 792 |   [
 793 |     "Fionnah",
 794 |     "si310",
 795 |     0
 796 |   ],
 797 |   [
 798 |     "Luic",
 799 |     "si310",
 800 |     0
 801 |   ],
 802 |   [
 803 |     "Krista",
 804 |     "si310",
 805 |     0
 806 |   ],
 807 |   [
 808 |     "Amie",
 809 |     "si310",
 810 |     0
 811 |   ],
 812 |   [
 813 |     "Edith",
 814 |     "si310",
 815 |     0
 816 |   ],
 817 |   [
 818 |     "Evey",
 819 |     "si310",
 820 |     0
 821 |   ],
 822 |   [
 823 |     "Carmen",
 824 |     "si310",
 825 |     0
 826 |   ],
 827 |   [
 828 |     "Marla",
 829 |     "si310",
 830 |     0
 831 |   ],
 832 |   [
 833 |     "Avani",
 834 |     "si310",
 835 |     0
 836 |   ],
 837 |   [
 838 |     "Teagan",
 839 |     "si310",
 840 |     0
 841 |   ],
 842 |   [
 843 |     "Forbes",
 844 |     "si310",
 845 |     0
 846 |   ],
 847 |   [
 848 |     "Shayna",
 849 |     "si310",
 850 |     0
 851 |   ],
 852 |   [
 853 |     "Oliwia",
 854 |     "si310",
 855 |     0
 856 |   ],
 857 |   [
 858 |     "Zita",
 859 |     "si310",
 860 |     0
 861 |   ],
 862 |   [
 863 |     "Maison",
 864 |     "si310",
 865 |     0
 866 |   ],
 867 |   [
 868 |     "Jarred",
 869 |     "si310",
 870 |     0
 871 |   ],
 872 |   [
 873 |     "Kean",
 874 |     "si310",
 875 |     0
 876 |   ],
 877 |   [
 878 |     "Glydel",
 879 |     "si310",
 880 |     0
 881 |   ],
 882 |   [
 883 |     "Harish",
 884 |     "si310",
 885 |     0
 886 |   ],
 887 |   [
 888 |     "Rahman",
 889 |     "si310",
 890 |     0
 891 |   ],
 892 |   [
 893 |     "Christian",
 894 |     "si310",
 895 |     0
 896 |   ],
 897 |   [
 898 |     "Aamna",
 899 |     "si310",
 900 |     0
 901 |   ],
 902 |   [
 903 |     "Melania",
 904 |     "si310",
 905 |     0
 906 |   ],
 907 |   [
 908 |     "Reigan",
 909 |     "si310",
 910 |     0
 911 |   ],
 912 |   [
 913 |     "Andrew",
 914 |     "si310",
 915 |     0
 916 |   ],
 917 |   [
 918 |     "Zachery",
 919 |     "si310",
 920 |     0
 921 |   ],
 922 |   [
 923 |     "Aurlah",
 924 |     "si310",
 925 |     0
 926 |   ],
 927 |   [
 928 |     "Laison",
 929 |     "si334",
 930 |     1
 931 |   ],
 932 |   [
 933 |     "Iagan",
 934 |     "si334",
 935 |     0
 936 |   ],
 937 |   [
 938 |     "Jeffrey",
 939 |     "si334",
 940 |     0
 941 |   ],
 942 |   [
 943 |     "Alvern",
 944 |     "si334",
 945 |     0
 946 |   ],
 947 |   [
 948 |     "Zachary",
 949 |     "si334",
 950 |     0
 951 |   ],
 952 |   [
 953 |     "Presley",
 954 |     "si334",
 955 |     0
 956 |   ],
 957 |   [
 958 |     "Naideen",
 959 |     "si334",
 960 |     0
 961 |   ],
 962 |   [
 963 |     "Matthias",
 964 |     "si334",
 965 |     0
 966 |   ],
 967 |   [
 968 |     "Chala",
 969 |     "si334",
 970 |     0
 971 |   ],
 972 |   [
 973 |     "Uzma",
 974 |     "si334",
 975 |     0
 976 |   ],
 977 |   [
 978 |     "Kevin",
 979 |     "si334",
 980 |     0
 981 |   ],
 982 |   [
 983 |     "Xavier",
 984 |     "si334",
 985 |     0
 986 |   ],
 987 |   [
 988 |     "Orrin",
 989 |     "si334",
 990 |     0
 991 |   ],
 992 |   [
 993 |     "Zaynab",
 994 |     "si334",
 995 |     0
 996 |   ],
 997 |   [
 998 |     "Kye",
 999 |     "si334",
1000 |     0
1001 |   ],
1002 |   [
1003 |     "Kia",
1004 |     "si334",
1005 |     0
1006 |   ],
1007 |   [
1008 |     "Ebony",
1009 |     "si334",
1010 |     0
1011 |   ],
1012 |   [
1013 |     "Morwena",
1014 |     "si334",
1015 |     0
1016 |   ],
1017 |   [
1018 |     "Lyndsay",
1019 |     "si334",
1020 |     0
1021 |   ],
1022 |   [
1023 |     "Jagat",
1024 |     "si334",
1025 |     0
1026 |   ],
1027 |   [
1028 |     "Kirsty",
1029 |     "si334",
1030 |     0
1031 |   ],
1032 |   [
1033 |     "Regan",
1034 |     "si334",
1035 |     0
1036 |   ],
1037 |   [
1038 |     "Clove",
1039 |     "si334",
1040 |     0
1041 |   ],
1042 |   [
1043 |     "Jude",
1044 |     "si334",
1045 |     0
1046 |   ],
1047 |   [
1048 |     "Cacie",
1049 |     "si334",
1050 |     0
1051 |   ],
1052 |   [
1053 |     "Caolain",
1054 |     "si334",
1055 |     0
1056 |   ],
1057 |   [
1058 |     "Aileigh",
1059 |     "si334",
1060 |     0
1061 |   ],
1062 |   [
1063 |     "Macy",
1064 |     "si334",
1065 |     0
1066 |   ],
1067 |   [
1068 |     "Sol",
1069 |     "si334",
1070 |     0
1071 |   ],
1072 |   [
1073 |     "Aaryn",
1074 |     "si334",
1075 |     0
1076 |   ],
1077 |   [
1078 |     "Oskar",
1079 |     "si334",
1080 |     0
1081 |   ],
1082 |   [
1083 |     "Kiah",
1084 |     "si334",
1085 |     0
1086 |   ],
1087 |   [
1088 |     "Eliza",
1089 |     "si334",
1090 |     0
1091 |   ],
1092 |   [
1093 |     "Bayleigh",
1094 |     "si334",
1095 |     0
1096 |   ],
1097 |   [
1098 |     "Murdina",
1099 |     "si334",
1100 |     0
1101 |   ],
1102 |   [
1103 |     "Sohaa",
1104 |     "si363",
1105 |     1
1106 |   ],
1107 |   [
1108 |     "Cliodhna",
1109 |     "si363",
1110 |     0
1111 |   ],
1112 |   [
1113 |     "Kyla",
1114 |     "si363",
1115 |     0
1116 |   ],
1117 |   [
1118 |     "Emma",
1119 |     "si363",
1120 |     0
1121 |   ],
1122 |   [
1123 |     "Sorley",
1124 |     "si363",
1125 |     0
1126 |   ],
1127 |   [
1128 |     "Frankie",
1129 |     "si363",
1130 |     0
1131 |   ],
1132 |   [
1133 |     "Blaise",
1134 |     "si363",
1135 |     0
1136 |   ],
1137 |   [
1138 |     "Rowyn",
1139 |     "si363",
1140 |     0
1141 |   ],
1142 |   [
1143 |     "Pele",
1144 |     "si363",
1145 |     0
1146 |   ],
1147 |   [
1148 |     "Lindsay",
1149 |     "si363",
1150 |     0
1151 |   ],
1152 |   [
1153 |     "Sonni",
1154 |     "si363",
1155 |     0
1156 |   ],
1157 |   [
1158 |     "Nihal",
1159 |     "si363",
1160 |     0
1161 |   ],
1162 |   [
1163 |     "Elsi",
1164 |     "si363",
1165 |     0
1166 |   ],
1167 |   [
1168 |     "Kruz",
1169 |     "si363",
1170 |     0
1171 |   ],
1172 |   [
1173 |     "Pedram",
1174 |     "si363",
1175 |     0
1176 |   ],
1177 |   [
1178 |     "Caolain",
1179 |     "si363",
1180 |     0
1181 |   ],
1182 |   [
1183 |     "Symon",
1184 |     "si363",
1185 |     0
1186 |   ],
1187 |   [
1188 |     "Simonne",
1189 |     "si363",
1190 |     0
1191 |   ],
1192 |   [
1193 |     "Raith",
1194 |     "si363",
1195 |     0
1196 |   ],
1197 |   [
1198 |     "Rubi",
1199 |     "si363",
1200 |     0
1201 |   ],
1202 |   [
1203 |     "Cadon",
1204 |     "si363",
1205 |     0
1206 |   ],
1207 |   [
1208 |     "Trey",
1209 |     "si363",
1210 |     0
1211 |   ],
1212 |   [
1213 |     "Gytis",
1214 |     "si363",
1215 |     0
1216 |   ],
1217 |   [
1218 |     "Elshan",
1219 |     "si363",
1220 |     0
1221 |   ],
1222 |   [
1223 |     "Rhiannin",
1224 |     "si363",
1225 |     0
1226 |   ],
1227 |   [
1228 |     "Flyn",
1229 |     "si363",
1230 |     0
1231 |   ],
1232 |   [
1233 |     "Cormac",
1234 |     "si363",
1235 |     0
1236 |   ],
1237 |   [
1238 |     "Alina",
1239 |     "si363",
1240 |     0
1241 |   ],
1242 |   [
1243 |     "Millie",
1244 |     "si363",
1245 |     0
1246 |   ],
1247 |   [
1248 |     "Jorji",
1249 |     "si363",
1250 |     0
1251 |   ],
1252 |   [
1253 |     "Stevie",
1254 |     "si363",
1255 |     0
1256 |   ],
1257 |   [
1258 |     "Celina",
1259 |     "si363",
1260 |     0
1261 |   ],
1262 |   [
1263 |     "Peaches",
1264 |     "si363",
1265 |     0
1266 |   ],
1267 |   [
1268 |     "Meryl",
1269 |     "si363",
1270 |     0
1271 |   ],
1272 |   [
1273 |     "Bronwen",
1274 |     "si363",
1275 |     0
1276 |   ],
1277 |   [
1278 |     "Kalvyn",
1279 |     "si363",
1280 |     0
1281 |   ],
1282 |   [
1283 |     "Donald",
1284 |     "si363",
1285 |     0
1286 |   ],
1287 |   [
1288 |     "Nevin",
1289 |     "si363",
1290 |     0
1291 |   ],
1292 |   [
1293 |     "Crispin",
1294 |     "si363",
1295 |     0
1296 |   ],
1297 |   [
1298 |     "Kaelynn",
1299 |     "si363",
1300 |     0
1301 |   ],
1302 |   [
1303 |     "Braeden",
1304 |     "si363",
1305 |     0
1306 |   ],
1307 |   [
1308 |     "Karli",
1309 |     "si364",
1310 |     1
1311 |   ],
1312 |   [
1313 |     "Harleen",
1314 |     "si364",
1315 |     0
1316 |   ],
1317 |   [
1318 |     "Florin",
1319 |     "si364",
1320 |     0
1321 |   ],
1322 |   [
1323 |     "Phinehas",
1324 |     "si364",
1325 |     0
1326 |   ],
1327 |   [
1328 |     "Ellia",
1329 |     "si364",
1330 |     0
1331 |   ],
1332 |   [
1333 |     "Carla",
1334 |     "si364",
1335 |     0
1336 |   ],
1337 |   [
1338 |     "Yuri",
1339 |     "si364",
1340 |     0
1341 |   ],
1342 |   [
1343 |     "Ana",
1344 |     "si364",
1345 |     0
1346 |   ],
1347 |   [
1348 |     "Habeeb",
1349 |     "si364",
1350 |     0
1351 |   ],
1352 |   [
1353 |     "Haley",
1354 |     "si364",
1355 |     0
1356 |   ],
1357 |   [
1358 |     "Shauntel",
1359 |     "si364",
1360 |     0
1361 |   ],
1362 |   [
1363 |     "Olaoluwapolorimi",
1364 |     "si364",
1365 |     0
1366 |   ],
1367 |   [
1368 |     "Haneeah",
1369 |     "si364",
1370 |     0
1371 |   ],
1372 |   [
1373 |     "Eryk",
1374 |     "si364",
1375 |     0
1376 |   ],
1377 |   [
1378 |     "Anousha",
1379 |     "si364",
1380 |     0
1381 |   ],
1382 |   [
1383 |     "Annaleigh",
1384 |     "si364",
1385 |     0
1386 |   ],
1387 |   [
1388 |     "Taqwa",
1389 |     "si364",
1390 |     0
1391 |   ],
1392 |   [
1393 |     "Hogan",
1394 |     "si364",
1395 |     0
1396 |   ],
1397 |   [
1398 |     "Tasia",
1399 |     "si364",
1400 |     0
1401 |   ],
1402 |   [
1403 |     "Sophie",
1404 |     "si364",
1405 |     0
1406 |   ],
1407 |   [
1408 |     "Ghyll",
1409 |     "si364",
1410 |     0
1411 |   ],
1412 |   [
1413 |     "CJ",
1414 |     "si364",
1415 |     0
1416 |   ],
1417 |   [
1418 |     "Sahaib",
1419 |     "si364",
1420 |     0
1421 |   ],
1422 |   [
1423 |     "Keziah",
1424 |     "si364",
1425 |     0
1426 |   ],
1427 |   [
1428 |     "Keiron",
1429 |     "si364",
1430 |     0
1431 |   ],
1432 |   [
1433 |     "Dalton",
1434 |     "si364",
1435 |     0
1436 |   ],
1437 |   [
1438 |     "Abdurraheem",
1439 |     "si364",
1440 |     0
1441 |   ],
1442 |   [
1443 |     "Caitlinn",
1444 |     "si364",
1445 |     0
1446 |   ],
1447 |   [
1448 |     "Toby",
1449 |     "si364",
1450 |     0
1451 |   ],
1452 |   [
1453 |     "Taliya",
1454 |     "si364",
1455 |     0
1456 |   ],
1457 |   [
1458 |     "Dyllon",
1459 |     "si364",
1460 |     0
1461 |   ],
1462 |   [
1463 |     "Roman",
1464 |     "si364",
1465 |     0
1466 |   ],
1467 |   [
1468 |     "Caoilfhinn",
1469 |     "si364",
1470 |     0
1471 |   ],
1472 |   [
1473 |     "Ismail",
1474 |     "si364",
1475 |     0
1476 |   ],
1477 |   [
1478 |     "Karley",
1479 |     "si364",
1480 |     0
1481 |   ],
1482 |   [
1483 |     "Brajan",
1484 |     "si364",
1485 |     0
1486 |   ],
1487 |   [
1488 |     "Almaas",
1489 |     "si364",
1490 |     0
1491 |   ],
1492 |   [
1493 |     "Callie",
1494 |     "si364",
1495 |     0
1496 |   ],
1497 |   [
1498 |     "Jess",
1499 |     "si422",
1500 |     1
1501 |   ],
1502 |   [
1503 |     "Dillon",
1504 |     "si422",
1505 |     0
1506 |   ],
1507 |   [
1508 |     "Mehmet",
1509 |     "si422",
1510 |     0
1511 |   ],
1512 |   [
1513 |     "Micaila",
1514 |     "si422",
1515 |     0
1516 |   ],
1517 |   [
1518 |     "Alexx",
1519 |     "si422",
1520 |     0
1521 |   ],
1522 |   [
1523 |     "Dolan",
1524 |     "si422",
1525 |     0
1526 |   ],
1527 |   [
1528 |     "Braden",
1529 |     "si422",
1530 |     0
1531 |   ],
1532 |   [
1533 |     "Elena",
1534 |     "si422",
1535 |     0
1536 |   ],
1537 |   [
1538 |     "Joaquin",
1539 |     "si422",
1540 |     0
1541 |   ],
1542 |   [
1543 |     "Miley",
1544 |     "si422",
1545 |     0
1546 |   ],
1547 |   [
1548 |     "Dearbhla",
1549 |     "si422",
1550 |     0
1551 |   ],
1552 |   [
1553 |     "Francisca",
1554 |     "si422",
1555 |     0
1556 |   ],
1557 |   [
1558 |     "Bracken",
1559 |     "si422",
1560 |     0
1561 |   ],
1562 |   [
1563 |     "Stewarty",
1564 |     "si422",
1565 |     0
1566 |   ],
1567 |   [
1568 |     "Tea",
1569 |     "si422",
1570 |     0
1571 |   ],
1572 |   [
1573 |     "Stephen",
1574 |     "si422",
1575 |     0
1576 |   ],
1577 |   [
1578 |     "Abar",
1579 |     "si422",
1580 |     0
1581 |   ],
1582 |   [
1583 |     "Martin",
1584 |     "si430",
1585 |     1
1586 |   ],
1587 |   [
1588 |     "Carol",
1589 |     "si430",
1590 |     0
1591 |   ],
1592 |   [
1593 |     "Coray",
1594 |     "si430",
1595 |     0
1596 |   ],
1597 |   [
1598 |     "Reggie",
1599 |     "si430",
1600 |     0
1601 |   ],
1602 |   [
1603 |     "Jayhan",
1604 |     "si430",
1605 |     0
1606 |   ],
1607 |   [
1608 |     "Phoenix",
1609 |     "si430",
1610 |     0
1611 |   ],
1612 |   [
1613 |     "Cieran",
1614 |     "si430",
1615 |     0
1616 |   ],
1617 |   [
1618 |     "Annaliesse",
1619 |     "si430",
1620 |     0
1621 |   ],
1622 |   [
1623 |     "Eabha",
1624 |     "si430",
1625 |     0
1626 |   ],
1627 |   [
1628 |     "Cesar",
1629 |     "si430",
1630 |     0
1631 |   ],
1632 |   [
1633 |     "Nikol",
1634 |     "si430",
1635 |     0
1636 |   ],
1637 |   [
1638 |     "Anesu",
1639 |     "si430",
1640 |     0
1641 |   ],
1642 |   [
1643 |     "Elspeth",
1644 |     "si430",
1645 |     0
1646 |   ],
1647 |   [
1648 |     "Greg",
1649 |     "si430",
1650 |     0
1651 |   ],
1652 |   [
1653 |     "Chi",
1654 |     "si430",
1655 |     0
1656 |   ],
1657 |   [
1658 |     "Kaia",
1659 |     "si430",
1660 |     0
1661 |   ],
1662 |   [
1663 |     "Kaceylee",
1664 |     "si430",
1665 |     0
1666 |   ],
1667 |   [
1668 |     "Madelyn",
1669 |     "si430",
1670 |     0
1671 |   ],
1672 |   [
1673 |     "Ayan",
1674 |     "si430",
1675 |     0
1676 |   ],
1677 |   [
1678 |     "Zuzia",
1679 |     "si430",
1680 |     0
1681 |   ],
1682 |   [
1683 |     "Tasia",
1684 |     "si430",
1685 |     0
1686 |   ],
1687 |   [
1688 |     "Renars",
1689 |     "si430",
1690 |     0
1691 |   ],
1692 |   [
1693 |     "Fia",
1694 |     "si430",
1695 |     0
1696 |   ],
1697 |   [
1698 |     "Trey",
1699 |     "si430",
1700 |     0
1701 |   ],
1702 |   [
1703 |     "Leven",
1704 |     "si430",
1705 |     0
1706 |   ],
1707 |   [
1708 |     "VJay",
1709 |     "si430",
1710 |     0
1711 |   ],
1712 |   [
1713 |     "Fiza",
1714 |     "si430",
1715 |     0
1716 |   ],
1717 |   [
1718 |     "Sanaa",
1719 |     "si430",
1720 |     0
1721 |   ],
1722 |   [
1723 |     "Ingrid",
1724 |     "si430",
1725 |     0
1726 |   ],
1727 |   [
1728 |     "Laurajane",
1729 |     "si430",
1730 |     0
1731 |   ],
1732 |   [
1733 |     "Cyrus",
1734 |     "si430",
1735 |     0
1736 |   ],
1737 |   [
1738 |     "Juniper",
1739 |     "si430",
1740 |     0
1741 |   ],
1742 |   [
1743 |     "Aref",
1744 |     "si430",
1745 |     0
1746 |   ],
1747 |   [
1748 |     "Lex",
1749 |     "si430",
1750 |     0
1751 |   ],
1752 |   [
1753 |     "Deshawn",
1754 |     "si430",
1755 |     0
1756 |   ],
1757 |   [
1758 |     "Raigen",
1759 |     "si430",
1760 |     0
1761 |   ],
1762 |   [
1763 |     "Karl",
1764 |     "si430",
1765 |     0
1766 |   ],
1767 |   [
1768 |     "Aron",
1769 |     "si430",
1770 |     0
1771 |   ],
1772 |   [
1773 |     "Navneet",
1774 |     "si430",
1775 |     0
1776 |   ],
1777 |   [
1778 |     "Ruaidhri",
1779 |     "si430",
1780 |     0
1781 |   ],
1782 |   [
1783 |     "Cate",
1784 |     "si430",
1785 |     0
1786 |   ],
1787 |   [
1788 |     "Gary",
1789 |     "si430",
1790 |     0
1791 |   ],
1792 |   [
1793 |     "Nazia",
1794 |     "si430",
1795 |     0
1796 |   ],
1797 |   [
1798 |     "Efan",
1799 |     "si430",
1800 |     0
1801 |   ],
1802 |   [
1803 |     "Khizar",
1804 |     "si430",
1805 |     0
1806 |   ],
1807 |   [
1808 |     "Swarnalakshmi",
1809 |     "si430",
1810 |     0
1811 |   ],
1812 |   [
1813 |     "Dorian",
1814 |     "si430",
1815 |     0
1816 |   ],
1817 |   [
1818 |     "Roisin",
1819 |     "si430",
1820 |     0
1821 |   ],
1822 |   [
1823 |     "Anona",
1824 |     "si430",
1825 |     0
1826 |   ]
1827 | ]


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex15/rosterdb.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 4 - Using databases with Python/ex15/rosterdb.sqlite


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex15/trackdb.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 4 - Using databases with Python/ex15/trackdb.sqlite


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex16/README.txt:
--------------------------------------------------------------------------------
  1 | Using the Google Places API with a Database and
  2 | Visualizing Data on Google Map
  3 | 
  4 | In this project, we are using the Google geocoding API
  5 | to clean up some user-entered geographic locations of
  6 | university names and then placing the data on a Google
  7 | Map.
  8 | 
  9 | Note: Windows has difficulty in displaying UTF-8 characters
 10 | in the console so for each command window you open, you may need
 11 | to type the following command before running this code:
 12 | 
 13 |     chcp 65001
 14 | 
 15 | http://stackoverflow.com/questions/388490/unicode-characters-in-windows-command-line-how
 16 | 
 17 | 
 18 | You should install the SQLite browser to view and modify
 19 | the databases from:
 20 | 
 21 | http://sqlitebrowser.org/
 22 | 
 23 | The first problem to solve is that the Google geocoding
 24 | API is rate limited to a fixed number of requests per day.
 25 | So if you have a lot of data you might need to stop and
 26 | restart the lookup process several times.  So we break
 27 | the problem into two phases.
 28 | 
 29 | In the first phase we take our input data in the file
 30 | (where.data) and read it one line at a time, and retrieve the
 31 | geocoded response and store it in a database (geodata.sqlite).
 32 | Before we use the geocoding API, we simply check to see if
 33 | we already have the data for that particular line of input.
 34 | 
 35 | You can re-start the process at any time by removing the file
 36 | geodata.sqlite
 37 | 
 38 | Run the geoload.py program.   This program will read the input
 39 | lines in where.data and for each line check to see if it is already
 40 | in the database and if we don't have the data for the location,
 41 | call the geocoding API to retrieve the data and store it in
 42 | the database.
 43 | 
 44 | As of December 2016, the Google Geocoding APIs changed dramatically.
 45 | They moved some functionality that we use from the Geocoding API
 46 | into the Places API.  Also all the Google Geo-related APIs require an
 47 | API key. To complete this assignment without a Google account,
 48 | without an API key, or from a country that blocks
 49 | access to Google, you can use a subset of that data which is
 50 | available at:
 51 | 
 52 | http://py4e-data.dr-chuck.net/geojson
 53 | 
 54 | To use this, simply leave the api_key set to False in 
 55 | geoload.py.
 56 | 
 57 | This URL only has a subset of the data but it has no rate limit so
 58 | it is good for testing.
 59 | 
 60 | If you want to try this with the API key, follow the
 61 | instructions at:
 62 | 
 63 | https://developers.google.com/maps/documentation/geocoding/intro
 64 | 
 65 | and put the API key in the code.
 66 | 
 67 | Here is a sample run after there is already some data in the
 68 | database:
 69 | 
 70 | Mac: python3 geoload.py
 71 | Win: geoload.py
 72 | 
 73 | Found in database  Northeastern University
 74 | 
 75 | Found in database  University of Hong Kong, Illinois Institute of Technology, Bradley University
 76 | 
 77 | Found in database  Technion
 78 | 
 79 | Found in database  Viswakarma Institute, Pune, India
 80 | 
 81 | Found in database  UMD
 82 | 
 83 | Found in database  Tufts University
 84 | 
 85 | Resolving Monash University
 86 | Retrieving http://py4e-data.dr-chuck.net/geojson?address=Monash+University
 87 | Retrieved 2063 characters {    "results" : [
 88 | {u'status': u'OK', u'results': ... }
 89 | 
 90 | Resolving Kokshetau Institute of Economics and Management
 91 | Retrieving http://py4e-data.dr-chuck.net/geojson?address=Kokshetau+Institute+of+Economics+and+Management
 92 | Retrieved 1749 characters {    "results" : [
 93 | {u'status': u'OK', u'results': ... }
 94 | 
 95 | The first five locations are already in the database and so they
 96 | are skipped.  The program scans to the point where it finds un-retrieved
 97 | locations and starts retrieving them.
 98 | 
 99 | The geoload.py can be stopped at any time, and there is a counter
100 | that you can use to limit the number of calls to the geocoding
101 | API for each run.
102 | 
103 | Once you have some data loaded into geodata.sqlite, you can
104 | visualize the data using the (geodump.py) program.  This
105 | program reads the database and writes tile file (where.js)
106 | with the location, latitude, and longitude in the form of
107 | executable JavaScript code.
108 | 
109 | A run of the geodump.py program is as follows:
110 | 
111 | Mac: python3 geodump.py
112 | Win: geodump.py
113 | 
114 | Northeastern University, 360 Huntington Avenue, Boston, MA 02115, USA 42.3396998 -71.08975
115 | Bradley University, 1501 West Bradley Avenue, Peoria, IL 61625, USA 40.6963857 -89.6160811
116 | ...
117 | Technion, Viazman 87, Kesalsaba, 32000, Israel 32.7775 35.0216667
118 | Monash University Clayton Campus, Wellington Road, Clayton VIC 3800, Australia -37.9152113 145.134682
119 | Kokshetau, Kazakhstan 53.2833333 69.3833333
120 | ...
121 | 12 records written to where.js
122 | Open where.html to view the data in a browser
123 | 
124 | The file (where.html) consists of HTML and JavaScript to visualize
125 | a Google Map.  It reads the most recent data in where.js to get
126 | the data to be visualized.  Here is the format of the where.js file:
127 | 
128 | myData = [
129 | [42.3396998,-71.08975, 'Northeastern University, 360 Huntington Avenue, Boston, MA 02115, USA'],
130 | [40.6963857,-89.6160811, 'Bradley University, 1501 West Bradley Avenue, Peoria, IL 61625, USA'],
131 | [32.7775,35.0216667, 'Technion, Viazman 87, Kesalsaba, 32000, Israel'],
132 |    ...
133 | ];
134 | 
135 | This is a JavaScript list of lists.  The syntax for JavaScript
136 | list constants is very similar to Python so the syntax should
137 | be familiar to you.
138 | 
139 | Simply open where.html in a browser to see the locations.  You
140 | can hover over each map pin to find the location that the
141 | gecoding API returned for the user-entered input.  If you
142 | cannot see any data when you open the where.html file, you might
143 | want to check the JavaScript or developer console for your browser.
144 | 
145 | 


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex16/geodata.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 4 - Using databases with Python/ex16/geodata.sqlite


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex16/geodump running.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 4 - Using databases with Python/ex16/geodump running.jpg


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex16/geodump.py:
--------------------------------------------------------------------------------
 1 | #===============================================================================
 2 | # This project uses the Google GeoCoding API to retrieve data and then uses
 3 | # Google Maps to visualize the data.
 4 | # This part of the project reads the data from a database and writed the
 5 | # parts into a javascript file for future visualisation.
 6 | #===============================================================================
 7 | 
 8 | #--- Importing libraries
 9 | import sqlite3
10 | import json
11 | import codecs
12 | 
13 | #--- Opening connection to the SQLite database
14 | conn = sqlite3.connect('geodata.sqlite')
15 | cur = conn.cursor()
16 | 
17 | #--- Reading everything from the database into the cursor
18 | cur.execute('SELECT * FROM Locations')
19 | 
20 | #--- Opening javascript file for writing data for visualisation
21 | fhand = codecs.open('where.js', 'w', "utf-8")
22 | fhand.write("myData = [\n")
23 | count = 0
24 | 
25 | #--- Looping through all the data read from the database
26 | for row in cur :
27 |     #--- Transforming and loading the data into a JSON object
28 |     data = str(row[1].decode())
29 |     try: js = json.loads(str(data))
30 |     except: continue
31 | 
32 |     #--- Checking if the status is all right
33 |     if not('status' in js and js['status'] == 'OK') : continue
34 | 
35 |     #--- Getting the latitude, longitude and the formatted address
36 |     lat = js["results"][0]["geometry"]["location"]["lat"]
37 |     lng = js["results"][0]["geometry"]["location"]["lng"]
38 |     if lat == 0 or lng == 0 : continue
39 |     where = js['results'][0]['formatted_address']
40 |     where = where.replace("'", "")
41 |     
42 |     #--- Printing the latitude, longitude and the formatted address
43 |     try :
44 |         print(where, lat, lng)
45 | 
46 |         count = count + 1
47 |         if count > 1 : fhand.write(",\n")
48 |         output = "["+str(lat)+","+str(lng)+", '"+where+"']"
49 |         fhand.write(output)
50 |     except:
51 |         continue
52 | 
53 | #--- Finishing writing into the javascript file and closing the file
54 | fhand.write("\n];\n")
55 | cur.close()
56 | fhand.close()
57 | 
58 | #--- Printing the number of records written into the javascript file and a message
59 | #--- to continue with visualisation of the data in 'where.html'
60 | print(count, "records written to where.js")
61 | print("Open where.html to view the data in a browser")
62 | 
63 | 


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex16/geoload running.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 4 - Using databases with Python/ex16/geoload running.jpg


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex16/geoload.py:
--------------------------------------------------------------------------------
  1 | #===============================================================================
  2 | # This project uses the Google GeoCoding API to retrieve data and then uses
  3 | # Google Maps to visualize the data.
  4 | # This part of the project reads the data from a file, requests geocode of
  5 | # the read location from the Google Maps Geocoding API and writes the results
  6 | # into a database.
  7 | #===============================================================================
  8 | 
  9 | #--- Importing libraries
 10 | import urllib.request, urllib.parse, urllib.error
 11 | import http
 12 | import sqlite3
 13 | import json
 14 | import time
 15 | import ssl
 16 | import sys
 17 | 
 18 | #--- Initialising
 19 | api_key = 'AIzaSyD9z8vvTzJzjSbTXyR082e7BzUWQsimj-w' #False
 20 | # If you have a Google Places API key, enter it here
 21 | # api_key = 'AIzaSy___IDByT70'
 22 | 
 23 | if api_key is False:
 24 |     serviceurl = "http://py4e-data.dr-chuck.net/geojson?"
 25 | else :
 26 |     serviceurl = "https://maps.googleapis.com/maps/api/geocode/json?" #place/textsearch/json?"
 27 | 
 28 | # Additional detail for urllib
 29 | # http.client.HTTPConnection.debuglevel = 1
 30 | 
 31 | #--- Opening connection to the SQLite database
 32 | conn = sqlite3.connect('geodata.sqlite')
 33 | cur = conn.cursor()
 34 | 
 35 | #--- Initialising database
 36 | cur.execute('''
 37 | CREATE TABLE IF NOT EXISTS Locations (address TEXT, geodata TEXT)''')
 38 | 
 39 | #--- Ignoring SSL certificate errors
 40 | ctx = ssl.create_default_context()
 41 | ctx.check_hostname = False
 42 | ctx.verify_mode = ssl.CERT_NONE
 43 | 
 44 | #--- Opening connection to the data file
 45 | fh = open("where.data")
 46 | count = 0
 47 | #--- Looping through the data file by 200 lines at a time
 48 | for line in fh:
 49 |     if count > 200 :
 50 |         print('Retrieved 200 locations, restart to retrieve more')
 51 |         break
 52 | 
 53 |     #--- Getting the geodata of the address from the database
 54 |     address = line.strip()
 55 |     print('')
 56 |     cur.execute("SELECT geodata FROM Locations WHERE address= ?",
 57 |         (memoryview(address.encode()), ))
 58 | 
 59 |     #--- ... and printing message if geodata is already in the database
 60 |     try:
 61 |         data = cur.fetchone()[0]
 62 |         print("Found in database ",address)
 63 |         continue
 64 |     except:
 65 |         pass
 66 | 
 67 |     #--- Constructing the encoded URL using the address and the API key
 68 |     parms = dict()
 69 |     parms["address"] = address #parms["query"] = address
 70 |     if api_key is not False: parms['key'] = api_key
 71 |     url = serviceurl + urllib.parse.urlencode(parms)
 72 | 
 73 |     #--- Printing the URL for the user's convenience
 74 |     print('Retrieving', url)
 75 |     
 76 |     #--- Submitting the service request by opening the URL
 77 |     uh = urllib.request.urlopen(url, context=ctx)
 78 |     data = uh.read().decode()
 79 |     #--- Printing the first 20 characters of the result
 80 |     print('Retrieved', len(data), 'characters', data[:20].replace('\n', ' '))
 81 |     count = count + 1
 82 | 
 83 |     #--- Loading the received result in as JSON object or printing an error message
 84 |     try:
 85 |         js = json.loads(data)
 86 |     except:
 87 |         print(data)  # We print in case unicode causes an error
 88 |         continue
 89 | 
 90 |     #--- Printing an error message if something is wrong with the status
 91 |     if 'status' not in js or (js['status'] != 'OK' and js['status'] != 'ZERO_RESULTS') :
 92 |         print('==== Failure To Retrieve ====')
 93 |         print(data)
 94 |         break
 95 | 
 96 |     #--- Adding the received result into the SQLite database
 97 |     cur.execute('''INSERT INTO Locations (address, geodata)
 98 |             VALUES ( ?, ? )''', (memoryview(address.encode()), memoryview(data.encode()) ) )
 99 |     conn.commit()
100 |     
101 |     #--- Adding a delay at every 10 request
102 |     if count % 10 == 0 :
103 |         print('Pausing for a bit...')
104 |         time.sleep(1)
105 | 
106 | #--- Printing a message at the end to continue with dumping the data using 'geodump.py'
107 | print("Run geodump.py to read the data from the database so you can vizualize it on a map.")
108 | 


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex16/where.data:
--------------------------------------------------------------------------------
  1 | AGH University of Science and Technology
  2 | Academy of Fine Arts Warsaw Poland
  3 | American University in Cairo
  4 | Arizona State University
  5 | Athens Information Technology
  6 | BITS Pilani
  7 | Babcock University
  8 | Banaras Hindu University
  9 | Bangalore University
 10 | Baylor University
 11 | Beijing normal university
 12 | Belarusian State University
 13 | Belgrade University
 14 | Beloit College
 15 | Belorussian State University
 16 | Ben Gurion University
 17 | Bharthidasan University
 18 | Boston University
 19 | Budapesti Corvinus Egyetem
 20 | California Polytechnic State University of San Luis Obispo
 21 | California State University San Bernardino
 22 | City of Westminster College
 23 | Columbia University
 24 | Cranfield University
 25 | Czech Technical University in Prague
 26 | Dartmouth
 27 | De Anza College
 28 | Distant University of Hagen
 29 | Dnipropetrovsk National University
 30 | Dokuz Eylul University
 31 | Drexel
 32 | Drexel University and University of Texas at Austin
 33 | Duke University
 34 | EM Lyon
 35 | Ecole centrale de PARIS
 36 | Elon University
 37 | Erhvervsakademi Sydvest
 38 | Escuela Superior Politecnica del Litoral
 39 | Fachhochschule Dusseldorf
 40 | Fachhochschule FH Salzburg
 41 | Faculdade de Tecnologia do Estado de Sao Paulo
 42 | Faculty of Technical Sciences Novi Sad Serbia
 43 | Farmingdale State University
 44 | Federal University of Minas Gerais
 45 | Florida Atlantic University
 46 | Franklin Pierce College
 47 | Gauhati University
 48 | George Mason University
 49 | Georgetown University Law Center
 50 | Georgia State University
 51 | Grandville
 52 | Groep T University
 53 | Hanoi University of Science and Technology
 54 | Hebrew University
 55 | IIIT Hyderabad
 56 | IIT KANPUR
 57 | IT College of Estonia
 58 | IU
 59 | IUAV Venezia
 60 | Illinois Institute of Technology
 61 | Illinois State University Joliet Junior College
 62 | Indian Institute of Technology
 63 | Indian Institute of Technology Kharagpur India
 64 | Indian School of Mines Dhanbad
 65 | Indiana University
 66 | Indiana University at Bloomington
 67 | Institut Superieur de technologies
 68 | Institute of Business and Modern Technologies
 69 | Instituto Tecnologico de Santo Domingo
 70 | International Institute of Information Technology Hyderabad
 71 | Irkutsk State University
 72 | JADAVPUR UNIVERSITY
 73 | Jawaharlal Nehru Technological University
 74 | Jawaharlal Nehru University
 75 | Jordan University of Science and Technology
 76 | K-State
 77 | KUL
 78 | Kalamazoo College
 79 | Kaunas Technology University
 80 | Kaunas university of technology
 81 | Kazan Federal University
 82 | Kent State University
 83 | Kharkiv State Academy of Municipal Economy Ukraine
 84 | King Mongkuts University of Technology Thonburi
 85 | Kokshetau Institute of Economics and Management
 86 | Kyiv Polytechnic Institute
 87 | Kyiv Polytechnical Institute
 88 | Kyiv Unisersity of Oriental Language
 89 | Laurentian University
 90 | Lisandro Alvarado
 91 | Lodz University of Technology
 92 | Lviv University
 93 | MSU
 94 | Madras university
 95 | Magnitogorsk State Technical University
 96 | Malayer Azad University
 97 | Marietta College
 98 | Masdar Institute
 99 | Matematicki fakultet Beograd
100 | Michigan State University
101 | Middle East Technical University
102 | Missouri University of Science and Technology
103 | Monash
104 | Monash University
105 | Monash University Churchill Australia
106 | Monterrey Institute of Technology and Higher Education
107 | Moscow Engineering-Physics Institute
108 | Moscow Institute of Physics & Technology
109 | Moscow State University
110 | NIT ROURKELA
111 | NYU
112 | Nagpur University
113 | Nanyang Technological University
114 | National Institute of Technology Jalandhar
115 | National Taiwan University
116 | National University of Engineering
117 | North Central College
118 | Northeastern University
119 | Northwestern University
120 | Obninsk Technical University of Nuclear Power Engineering Russia
121 | Old Dominion University
122 | Oregon Institute of Technology
123 | PUCMM
124 | Payame Noor University
125 | Penn State University
126 | Politecnico di Milano
127 | Politehnica University Bucharest
128 | Polytechnic University of Timisoara
129 | Pondicherry University
130 | Pontificia universidad catolica de chile
131 | Portland State University
132 | Purdue University Indianapolis
133 | R V College of Engineering
134 | RPI
135 | Ramapo College of New Jersey
136 | Rochester Institute of Technology
137 | SASTRA University
138 | Saint Petersburg State University
139 | Saint Petersburg State University of Aerospace Instrumentation
140 | Saint-Petersburg Polytechnic Univesity
141 | San Francisco State University
142 | San Jose State University
143 | Shanghai Jiao Tong University
144 | Sharif University of Technology
145 | Simon Bolivar University
146 | Simon Fraser University
147 | Smolensk State University
148 | Sonoma State University
149 | South Federal University
150 | Spiru Haret University
151 | Stanford
152 | State University of Campinas
153 | State University of New York College at Oswego
154 | Stellenbosch University
155 | Stonehill College
156 | Tallinn University
157 | Tallinn University of Technology
158 | Tampere University of Technology
159 | Tanta University
160 | Tarrant County College
161 | Technical University of Cluj-Napoca
162 | Technion
163 | Tel Aviv University
164 | The Jerusalem collage of engineering
165 | The University of Latvia
166 | The University of Manchester
167 | The University of South Africa
168 | Transilvania University
169 | Tufts University
170 | UC Berkeley
171 | UCLA
172 | UCSD
173 | UIUC
174 | UMD
175 | UNISA
176 | UNIVERSIDAD DE Buenos Aires
177 | UOC
178 | USC
179 | UW Madison
180 | Universidad Central de Venezuela
181 | Universidad Complutense de Madrid
182 | Universidad Cooperativa de Colombia
183 | Universidad Nacional Autonoma de Mexico
184 | Universidad Nacional Costa Rica
185 | Universidad Nacional de Colombia
186 | Universidad Tecnologica Boliviana
187 | Universidad de Buenos Aires
188 | Universidad de Castilla La Mancha
189 | Universidad de Los Andes Colombia
190 | Universidad de Oriente
191 | Universidad de San Carlos de Guatemala
192 | Universidad de Valladolid
193 | Universidad de la Sabana
194 | Universidad del Valle de Guatemala
195 | Universidade Federal da Paraiba
196 | Universidade Federal de Santa Catarina
197 | Universidade Federal do Rio Grande do Sul
198 | Universidade Federal do Rio de Janeiro
199 | Universidade Tecnica de Lisboa
200 | Universidade de Sao Paulo
201 | Universidade do Minho
202 | Universitas Gadjah Mada
203 | Universitat Politecnica de Valencia
204 | Universite Catholique de Louvain
205 | University College Dublin
206 | University Munich
207 | University of Akron
208 | University of Alberta
209 | University of Amsterdam
210 | University of Arkansas
211 | University of Athens
212 | University of Belgrade
213 | University of Birmingham
214 | University of Buenos Aires
215 | University of Cambridge
216 | University of Central Oklahoma
217 | University of Chicago
218 | University of Cincinnati
219 | University of Colorado at Boulder
220 | University of Connecticut
221 | University of Dallas
222 | University of Debrecen
223 | University of Delaware
224 | University of Erlangen-Nuremberg
225 | University of Essex
226 | University of Evora
227 | University of Florida
228 | University of Gothenburg
229 | University of Greifswald
230 | University of Hamburg
231 | University of Hawaii
232 | University of Helsinki
233 | University of Ilorin Kwara State
234 | University of Jaffna
235 | University of Kansas
236 | University of Kerala
237 | University of London
238 | University of Malaga
239 | University of Malaya
240 | University of Manchester
241 | University of Michigan
242 | University of Missouri - Columbia
243 | University of Moratuwa
244 | University of Mumbai
245 | University of Nebraska
246 | University of Nebraska - Lincoln
247 | University of New Haven
248 | University of New South Wales
249 | University of Notre Dame
250 | University of Oklahoma
251 | University of Ottawa
252 | University of Oxford
253 | University of Padua
254 | University of Pavia Italy
255 | University of Pennsylvania
256 | University of Piraeus Athens
257 | University of Pretoria
258 | University of Salamanca
259 | University of Sao Paulo
260 | University of Sarajevo
261 | University of Southern California
262 | University of Stellenbosch
263 | University of Tartu
264 | University of Tehran
265 | University of Texas
266 | University of Texas at Austin
267 | University of Toronto
268 | University of Tuebingen
269 | University of Twente
270 | University of Utah
271 | University of Vienna
272 | University of Warsaw
273 | University of Washington
274 | University of Washington - Bothell
275 | University of Waterloo
276 | University of West Florida
277 | University of Wisconsin
278 | University of the Punjab Lahore
279 | University of the Witwatersrand
280 | Vilnius Gediminas Technical University
281 | Vilnius University
282 | Virginia Commonwealth University
283 | Virginia Tech
284 | Viswakarma Institute Pune India
285 | Warsaw University
286 | Washington State University
287 | Wayne State
288 | Weber State
289 | Weizmann Institute of Science
290 | Western Governors University
291 | Xavier University
292 | Zagazig University
293 | allama iqbal open university islamabad
294 | arizona state university
295 | federal institute of tecnology and education from southeastern Minas Gerais
296 | kansas state university
297 | universidad complutense de madrid
298 | university of Patras
299 | university of padua
300 | 


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex16/where.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |   <head>
 3 |     <meta name="viewport" content="initial-scale=1.0, user-scalable=no">
 4 |     <meta charset="utf-8">
 5 |     <title>A Map of Information</title>
 6 |     <link href="https://google-developers.appspot.com/maps/documentation/javascript/examples/default.css" rel="stylesheet">
 7 | 
 8 |     <!-- If you are in China, you may need to use theis site for the Google Maps code
 9 |     <script src="https://maps.google.cn/maps/api/js" type="text/javascript"></script> -->
10 |     <!-- Note - you may need your own Google Maps API Key -->
11 |     <script src="https://maps.googleapis.com/maps/api/js?v=3.exp&key=AIzaSyD9z8vvTzJzjSbTXyR082e7BzUWQsimj-w"></script>
12 | 
13 | 
14 |     <script src="https://google-maps-utility-library-v3.googlecode.com/svn/trunk/markerclusterer/src/markerclusterer_compiled.js"></script>
15 |     <script src="where.js"></script>
16 |     <script>
17 | 
18 |       function initialize() {
19 |         alert("To see the title of a marker, hover over the marker but don't click.");
20 |         var myLatlng = new google.maps.LatLng(37.39361,-122.099263)
21 |         var mapOptions = {
22 |           zoom: 3,
23 |           center: myLatlng,
24 |           mapTypeId: google.maps.MapTypeId.ROADMAP
25 |         }
26 |         var map = new google.maps.Map(document.getElementById('map_canvas'), mapOptions);
27 | 
28 |         i = 0;
29 |         var markers = [];
30 |         for ( pos in myData ) {
31 |             i = i + 1;
32 |             var row = myData[pos];
33 | 		    window.console && console.log(row);
34 |             // if ( i < 3 ) { alert(row); }
35 |             var newLatlng = new google.maps.LatLng(row[0], row[1]);
36 |             var marker = new google.maps.Marker({
37 |                 position: newLatlng,
38 |                 map: map,
39 |                 title: row[2]
40 |             });
41 |             markers.push(marker);
42 |         }
43 |       }
44 |     </script>
45 |   </head>
46 |   <body onload="initialize()">
47 | <div id="map_canvas" style="height: 500px"></div>
48 | <p><b>About this Map</b></p>
49 | <p>
50 | This is a cool map from 
51 | <a href="https://www.py4e.com">www.py4e.com</a>.
52 | </p>
53 | </body>
54 | </html>
55 | 


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex16/where.js:
--------------------------------------------------------------------------------
 1 | myData = [
 2 | [50.06688579999999,19.9136192, 'aleja Adama Mickiewicza 30, 30-059 Kraków, Poland'],
 3 | [52.2394019,21.0150792, 'Krakowskie Przedmieście 5, 00-068 Warszawa, Poland'],
 4 | [33.4641541,-111.9231478, '1475 N Scottsdale Rd, Scottsdale, AZ 85257, USA'],
 5 | [38.0399391,23.8030901, 'Monumental Plaza, Building C, 1st Floor, Leof. Kifisias 44, Marousi 151 25, Greece'],
 6 | [28.3639976,75.58696809999999, 'VidyaVihar Campus, Pilani, Rajasthan 333031, India'],
 7 | [6.8919631,3.7186605, 'Ilishan Remo Ogun State Nigeria, ILISHAN REMO, Nigeria'],
 8 | [25.2677203,82.99125819999999, 'Ajagara, Banaras Hindu University Campus, Varanasi, Uttar Pradesh 221005, India'],
 9 | [12.9503878,77.5022224, 'Mysore Road, Jnana Bharathi, Bengaluru, Karnataka 560056, India'],
10 | [31.549841,-97.1143146, '1301 S University Parks Dr, Waco, TX 76706, USA'],
11 | [39.9619537,116.3662615, '19 Xinjiekou Outer St, BeiTaiPingZhuang, Haidian Qu, Beijing Shi, China, 100875'],
12 | [53.8930389,27.5455567, 'praspiekt Niezaliežnasci 4, Minsk, Belarus'],
13 | [44.8184339,20.4575676, 'Studentski trg 1, Beograd, Serbia'],
14 | [42.5030333,-89.0309048, '700 College St, Beloit, WI 53511, USA'],
15 | [53.8930389,27.5455567, 'praspiekt Niezaliežnasci 4, Minsk, Belarus'],
16 | [10.6779085,78.74454879999999, 'Palkalaiperur, Tiruchirappalli, Tamil Nadu 620024, India'],
17 | [42.3504997,-71.1053991, 'Boston, MA 02215, USA'],
18 | [47.486135,19.057964, 'Budapest, Fővám tér 8., 1093 Hungary'],
19 | [35.3050053,-120.6624942, 'San Luis Obispo, CA 93407, USA']
20 | ];
21 | 


--------------------------------------------------------------------------------
/Course 4 - Using databases with Python/ex16/zoomed map with added location.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 4 - Using databases with Python/ex16/zoomed map with added location.jpg


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012, Michael Bostock
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * The name Michael Bostock may not be used to endorse or promote products
15 |   derived from this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT,
21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
26 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/README.txt:
--------------------------------------------------------------------------------
  1 | Simple Python Search Spider, Page Ranker, and Visualizer
  2 | 
  3 | This is a set of programs that emulate some of the functions of a 
  4 | search engine.  They store their data in a SQLITE3 database named
  5 | 'spider.sqlite'.  This file can be removed at any time to restart the
  6 | process.   
  7 | 
  8 | You should install the SQLite browser to view and modify 
  9 | the databases from:
 10 | 
 11 | http://sqlitebrowser.org/
 12 | 
 13 | This program crawls a web site and pulls a series of pages into the
 14 | database, recording the links between pages.
 15 | 
 16 | Note: Windows has difficulty in displaying UTF-8 characters
 17 | in the console so for each console window you open, you may need
 18 | to type the following command before running this code:
 19 | 
 20 |     chcp 65001
 21 | 
 22 | http://stackoverflow.com/questions/388490/unicode-characters-in-windows-command-line-how
 23 | 
 24 | Mac: rm spider.sqlite
 25 | Mac: python3 spider.py
 26 | 
 27 | Win: del spider.sqlite
 28 | Win: spider.py
 29 | 
 30 | Enter web url or enter: http://www.dr-chuck.com/
 31 | ['http://www.dr-chuck.com']
 32 | How many pages:2
 33 | 1 http://www.dr-chuck.com/ 12
 34 | 2 http://www.dr-chuck.com/csev-blog/ 57
 35 | How many pages:
 36 | 
 37 | In this sample run, we told it to crawl a website and retrieve two 
 38 | pages.  If you restart the program again and tell it to crawl more
 39 | pages, it will not re-crawl any pages already in the database.  Upon 
 40 | restart it goes to a random non-crawled page and starts there.  So 
 41 | each successive run of spider.py is additive.
 42 | 
 43 | Mac: python3 spider.py 
 44 | Win: spider.py
 45 | 
 46 | Enter web url or enter: http://www.dr-chuck.com/
 47 | ['http://www.dr-chuck.com']
 48 | How many pages:3
 49 | 3 http://www.dr-chuck.com/csev-blog 57
 50 | 4 http://www.dr-chuck.com/dr-chuck/resume/speaking.htm 1
 51 | 5 http://www.dr-chuck.com/dr-chuck/resume/index.htm 13
 52 | How many pages:
 53 | 
 54 | You can have multiple starting points in the same database - 
 55 | within the program these are called "webs".   The spider
 56 | chooses randomly amongst all non-visited links across all
 57 | the webs.
 58 | 
 59 | If you want to dump the contents of the spider.sqlite file, you can 
 60 | run spdump.py as follows:
 61 | 
 62 | Mac: python3 spdump.py 
 63 | Win: spdump.py
 64 | 
 65 | (5, None, 1.0, 3, u'http://www.dr-chuck.com/csev-blog')
 66 | (3, None, 1.0, 4, u'http://www.dr-chuck.com/dr-chuck/resume/speaking.htm')
 67 | (1, None, 1.0, 2, u'http://www.dr-chuck.com/csev-blog/')
 68 | (1, None, 1.0, 5, u'http://www.dr-chuck.com/dr-chuck/resume/index.htm')
 69 | 4 rows.
 70 | 
 71 | This shows the number of incoming links, the old page rank, the new page
 72 | rank, the id of the page, and the url of the page.  The spdump.py program
 73 | only shows pages that have at least one incoming link to them.
 74 | 
 75 | Once you have a few pages in the database, you can run Page Rank on the
 76 | pages using the sprank.py program.  You simply tell it how many Page
 77 | Rank iterations to run.
 78 | 
 79 | Mac: python3 sprank.py 
 80 | Win: sprank.py 
 81 | 
 82 | How many iterations:2
 83 | 1 0.546848992536
 84 | 2 0.226714939664
 85 | [(1, 0.559), (2, 0.659), (3, 0.985), (4, 2.135), (5, 0.659)]
 86 | 
 87 | You can dump the database again to see that page rank has been updated:
 88 | 
 89 | Mac: python3 spdump.py 
 90 | Win: spdump.py 
 91 | 
 92 | (5, 1.0, 0.985, 3, u'http://www.dr-chuck.com/csev-blog')
 93 | (3, 1.0, 2.135, 4, u'http://www.dr-chuck.com/dr-chuck/resume/speaking.htm')
 94 | (1, 1.0, 0.659, 2, u'http://www.dr-chuck.com/csev-blog/')
 95 | (1, 1.0, 0.659, 5, u'http://www.dr-chuck.com/dr-chuck/resume/index.htm')
 96 | 4 rows.
 97 | 
 98 | You can run sprank.py as many times as you like and it will simply refine
 99 | the page rank the more times you run it.  You can even run sprank.py a few times
100 | and then go spider a few more pages sith spider.py and then run sprank.py
101 | to converge the page ranks.
102 | 
103 | If you want to restart the Page Rank calculations without re-spidering the 
104 | web pages, you can use spreset.py
105 | 
106 | Mac: python3 spreset.py 
107 | Win: spreset.py 
108 | 
109 | All pages set to a rank of 1.0
110 | 
111 | Mac: python3 sprank.py 
112 | Win: sprank.py 
113 | 
114 | How many iterations:50
115 | 1 0.546848992536
116 | 2 0.226714939664
117 | 3 0.0659516187242
118 | 4 0.0244199333
119 | 5 0.0102096489546
120 | 6 0.00610244329379
121 | ...
122 | 42 0.000109076928206
123 | 43 9.91987599002e-05
124 | 44 9.02151706798e-05
125 | 45 8.20451504471e-05
126 | 46 7.46150183837e-05
127 | 47 6.7857770908e-05
128 | 48 6.17124694224e-05
129 | 49 5.61236959327e-05
130 | 50 5.10410499467e-05
131 | [(512, 0.02963718031139026), (1, 12.790786721866658), (2, 28.939418898678284), (3, 6.808468390725946), (4, 13.469889092397006)]
132 | 
133 | For each iteration of the page rank algorithm it prints the average
134 | change per page of the page rank.   The network initially is quite 
135 | unbalanced and so the individual page ranks are changing wildly.
136 | But in a few short iterations, the page rank converges.  You 
137 | should run prank.py long enough that the page ranks converge.
138 | 
139 | If you want to visualize the current top pages in terms of page rank,
140 | run spjson.py to write the pages out in JSON format to be viewed in a
141 | web browser.
142 | 
143 | Mac: python3 spjson.py 
144 | Win: spjson.py 
145 | 
146 | Creating JSON output on spider.js...
147 | How many nodes? 30
148 | Open force.html in a browser to view the visualization
149 | 
150 | You can view this data by opening the file force.html in your web browser.  
151 | This shows an automatic layout of the nodes and links.  You can click and 
152 | drag any node and you can also double click on a node to find the URL
153 | that is represented by the node.
154 | 
155 | This visualization is provided using the force layout from:
156 | 
157 | http://mbostock.github.com/d3/
158 | 
159 | If you rerun the other utilities and then re-run spjson.py - you merely
160 | have to press refresh in the browser to get the new data from spider.js.
161 | 
162 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/__pycache__/spider.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/__pycache__/spider.cpython-36.pyc


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/dr-chuck-site-dump.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/dr-chuck-site-dump.jpg


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/dr-chuck-site-top25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/dr-chuck-site-top25.jpg


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/force.css:
--------------------------------------------------------------------------------
 1 | circle.node {
 2 |   stroke: #fff;
 3 |   stroke-width: 1.5px;
 4 | }
 5 | 
 6 | line.link {
 7 |   stroke: #999;
 8 |   stroke-opacity: .6;
 9 | }
10 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/force.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <title>Force-Directed Layout</title>
 5 |     <script type="text/javascript" src="d3.v2.js"></script>
 6 |     <script type="text/javascript" src="spider.js"></script>
 7 |     <link type="text/css" rel="stylesheet" href="force.css"/>
 8 |   </head>
 9 |   <body style="font-family: sans-serif;">
10 |     <script>
11 |         document.write("<p>Starting url: "+spiderJson.nodes[0].url+"</p>");
12 |     </script>
13 |     <div id="chart" style="border:1px"></div>
14 |     <script type="text/javascript" src="force.js"></script>
15 | 	<p>If you don't see a chart above, check the JavaScript console. You may
16 | 	need to use a different browser.</p>
17 |   </body>
18 | </html>
19 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/force.js:
--------------------------------------------------------------------------------
 1 | var width = 600,
 2 |     height = 600;
 3 | 
 4 | var color = d3.scale.category20();
 5 | 
 6 | var dist = (width + height) / 4;
 7 | 
 8 | var force = d3.layout.force()
 9 |     .charge(-120)
10 |     .linkDistance(dist)
11 |     .size([width, height]);
12 | 
13 | function getrank(rval) {
14 |   return (rval/2.0) + 3;
15 | }
16 | 
17 | function getcolor(rval) {
18 |   return color(rval);
19 | }
20 | 
21 | var svg = d3.select("#chart").append("svg")
22 |     .attr("width", width)
23 |     .attr("height", height);
24 | 
25 | function loadData(json) {
26 |   force
27 |       .nodes(json.nodes)
28 |       .links(json.links);
29 | 
30 |     var k = Math.sqrt(json.nodes.length / (width * height));
31 | 
32 |     force
33 |         .charge(-10 / k)
34 |         .gravity(100 * k)
35 |         .start();
36 | 
37 |   var link = svg.selectAll("line.link")
38 |       .data(json.links)
39 |       .enter().append("line")
40 |       .attr("class", "link")
41 |       .style("stroke-width", function(d) { return Math.sqrt(d.value); });
42 | 
43 |   var node = svg.selectAll("circle.node")
44 |       .data(json.nodes)
45 |       .enter().append("circle")
46 |       .attr("class", "node")
47 |       .attr("r", function(d) { return getrank(d.rank); } )
48 |       .style("fill", function(d) { return getcolor(d.rank); })
49 |       .on("dblclick",function(d) { 
50 |             if ( confirm('Do you want to open '+d.url) ) 
51 |                 window.open(d.url,'_new',''); 
52 |             d3.event.stopPropagation();
53 |         })
54 |       .call(force.drag);
55 | 
56 |   node.append("title")
57 |       .text(function(d) { return d.url; });
58 | 
59 |   force.on("tick", function() {
60 |     link.attr("x1", function(d) { return d.source.x; })
61 |         .attr("y1", function(d) { return d.source.y; })
62 |         .attr("x2", function(d) { return d.target.x; })
63 |         .attr("y2", function(d) { return d.target.y; });
64 | 
65 |     node.attr("cx", function(d) { return d.x; })
66 |         .attr("cy", function(d) { return d.y; });
67 |   });
68 | 
69 | }
70 | loadData(spiderJson);
71 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/spdump.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | 
 3 | conn = sqlite3.connect('spider.sqlite')
 4 | cur = conn.cursor()
 5 | 
 6 | cur.execute('''SELECT COUNT(from_id) AS inbound, old_rank, new_rank, id, url 
 7 |      FROM Pages JOIN Links ON Pages.id = Links.to_id
 8 |      WHERE html IS NOT NULL
 9 |      GROUP BY id ORDER BY inbound DESC''')
10 | 
11 | count = 0
12 | for row in cur :
13 |     if count < 50 : print(row)
14 |     count = count + 1
15 | print(count, 'rows.')
16 | cur.close()
17 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/spider-coincube.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/spider-coincube.sqlite


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/spider-dr-chuck.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/spider-dr-chuck.sqlite


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/spider.js:
--------------------------------------------------------------------------------
  1 | spiderJson = {"nodes":[
  2 | {"weight":21,"rank":19.0, "id":1, "url":"http://variance.hu"},
  3 | {"weight":27,"rank":15.558784255770082, "id":22, "url":"http://variance.hu/2018/01/03/kilenc-ev"},
  4 | {"weight":22,"rank":19.0, "id":107, "url":"http://variance.hu/tag/ant"},
  5 | {"weight":23,"rank":19.0, "id":150, "url":"http://variance.hu/tag/block"},
  6 | {"weight":22,"rank":19.0, "id":157, "url":"http://variance.hu/tag/bor"},
  7 | {"weight":22,"rank":19.0, "id":213, "url":"http://variance.hu/tag/cyptocurrency"},
  8 | {"weight":20,"rank":15.558784255770082, "id":299, "url":"http://variance.hu/tag/gephaz"},
  9 | {"weight":21,"rank":15.558784255770082, "id":341, "url":"http://variance.hu/tag/jobs"},
 10 | {"weight":21,"rank":15.558784255770082, "id":342, "url":"http://variance.hu/tag/job-seeking"},
 11 | {"weight":22,"rank":15.756062326850063, "id":354, "url":"http://variance.hu/tag/kotveny"},
 12 | {"weight":20,"rank":15.558784255770082, "id":361, "url":"http://variance.hu/tag/linear-regression"},
 13 | {"weight":20,"rank":16.528019996293462, "id":372, "url":"http://variance.hu/tag/manipulation"},
 14 | {"weight":27,"rank":15.609796663166051, "id":469, "url":"http://variance.hu/tag/python"},
 15 | {"weight":20,"rank":15.558784255770082, "id":553, "url":"http://variance.hu/tag/ta"},
 16 | {"weight":20,"rank":15.558784255770082, "id":556, "url":"http://variance.hu/tag/tangle"},
 17 | {"weight":21,"rank":15.558784255770082, "id":602, "url":"http://variance.hu/tag/utxo"},
 18 | {"weight":20,"rank":15.558784255770082, "id":621, "url":"http://variance.hu/tag/whales"},
 19 | {"weight":29,"rank":15.558784255770082, "id":636, "url":"http://variance.hu/tag/zec"},
 20 | {"weight":19,"rank":0.0, "id":661, "url":"http://variance.hu/2017/08/28/ismerkedes-a-bittrex-api-val-python"},
 21 | {"weight":9,"rank":0.00812587020466735, "id":675, "url":"http://variance.hu/2014/01/09/enervalt-piaci-hangulat-mellett-szaguld-az-oko-szektor"}],
 22 | "links":[
 23 | {"source":0,"target":0,"value":3},
 24 | {"source":0,"target":1,"value":3},
 25 | {"source":0,"target":2,"value":3},
 26 | {"source":0,"target":3,"value":3},
 27 | {"source":0,"target":4,"value":3},
 28 | {"source":0,"target":5,"value":3},
 29 | {"source":0,"target":6,"value":3},
 30 | {"source":0,"target":7,"value":3},
 31 | {"source":0,"target":8,"value":3},
 32 | {"source":0,"target":9,"value":3},
 33 | {"source":0,"target":10,"value":3},
 34 | {"source":0,"target":11,"value":3},
 35 | {"source":0,"target":12,"value":3},
 36 | {"source":0,"target":13,"value":3},
 37 | {"source":0,"target":14,"value":3},
 38 | {"source":0,"target":15,"value":3},
 39 | {"source":0,"target":16,"value":3},
 40 | {"source":0,"target":17,"value":3},
 41 | {"source":2,"target":0,"value":3},
 42 | {"source":2,"target":2,"value":3},
 43 | {"source":2,"target":3,"value":3},
 44 | {"source":2,"target":4,"value":3},
 45 | {"source":2,"target":5,"value":3},
 46 | {"source":2,"target":6,"value":3},
 47 | {"source":2,"target":7,"value":3},
 48 | {"source":2,"target":8,"value":3},
 49 | {"source":2,"target":9,"value":3},
 50 | {"source":2,"target":10,"value":3},
 51 | {"source":2,"target":11,"value":3},
 52 | {"source":2,"target":12,"value":3},
 53 | {"source":2,"target":13,"value":3},
 54 | {"source":2,"target":14,"value":3},
 55 | {"source":2,"target":15,"value":3},
 56 | {"source":2,"target":16,"value":3},
 57 | {"source":2,"target":17,"value":3},
 58 | {"source":2,"target":1,"value":3},
 59 | {"source":13,"target":0,"value":3},
 60 | {"source":13,"target":13,"value":3},
 61 | {"source":13,"target":2,"value":3},
 62 | {"source":13,"target":3,"value":3},
 63 | {"source":13,"target":4,"value":3},
 64 | {"source":13,"target":5,"value":3},
 65 | {"source":13,"target":6,"value":3},
 66 | {"source":13,"target":7,"value":3},
 67 | {"source":13,"target":8,"value":3},
 68 | {"source":13,"target":9,"value":3},
 69 | {"source":13,"target":10,"value":3},
 70 | {"source":13,"target":11,"value":3},
 71 | {"source":13,"target":12,"value":3},
 72 | {"source":13,"target":14,"value":3},
 73 | {"source":13,"target":15,"value":3},
 74 | {"source":13,"target":16,"value":3},
 75 | {"source":13,"target":17,"value":3},
 76 | {"source":13,"target":1,"value":3},
 77 | {"source":5,"target":0,"value":3},
 78 | {"source":5,"target":5,"value":3},
 79 | {"source":5,"target":2,"value":3},
 80 | {"source":5,"target":3,"value":3},
 81 | {"source":5,"target":4,"value":3},
 82 | {"source":5,"target":6,"value":3},
 83 | {"source":5,"target":7,"value":3},
 84 | {"source":5,"target":8,"value":3},
 85 | {"source":5,"target":9,"value":3},
 86 | {"source":5,"target":10,"value":3},
 87 | {"source":5,"target":11,"value":3},
 88 | {"source":5,"target":12,"value":3},
 89 | {"source":5,"target":13,"value":3},
 90 | {"source":5,"target":14,"value":3},
 91 | {"source":5,"target":15,"value":3},
 92 | {"source":5,"target":16,"value":3},
 93 | {"source":5,"target":17,"value":3},
 94 | {"source":5,"target":1,"value":3},
 95 | {"source":15,"target":0,"value":3},
 96 | {"source":15,"target":15,"value":3},
 97 | {"source":15,"target":2,"value":3},
 98 | {"source":15,"target":3,"value":3},
 99 | {"source":15,"target":4,"value":3},
100 | {"source":15,"target":5,"value":3},
101 | {"source":15,"target":6,"value":3},
102 | {"source":15,"target":7,"value":3},
103 | {"source":15,"target":8,"value":3},
104 | {"source":15,"target":9,"value":3},
105 | {"source":15,"target":10,"value":3},
106 | {"source":15,"target":11,"value":3},
107 | {"source":15,"target":12,"value":3},
108 | {"source":15,"target":13,"value":3},
109 | {"source":15,"target":14,"value":3},
110 | {"source":15,"target":16,"value":3},
111 | {"source":15,"target":17,"value":3},
112 | {"source":15,"target":1,"value":3},
113 | {"source":4,"target":0,"value":3},
114 | {"source":4,"target":4,"value":3},
115 | {"source":4,"target":2,"value":3},
116 | {"source":4,"target":3,"value":3},
117 | {"source":4,"target":5,"value":3},
118 | {"source":4,"target":6,"value":3},
119 | {"source":4,"target":7,"value":3},
120 | {"source":4,"target":8,"value":3},
121 | {"source":4,"target":9,"value":3},
122 | {"source":4,"target":10,"value":3},
123 | {"source":4,"target":11,"value":3},
124 | {"source":4,"target":12,"value":3},
125 | {"source":4,"target":13,"value":3},
126 | {"source":4,"target":14,"value":3},
127 | {"source":4,"target":15,"value":3},
128 | {"source":4,"target":16,"value":3},
129 | {"source":4,"target":17,"value":3},
130 | {"source":4,"target":1,"value":3},
131 | {"source":14,"target":0,"value":3},
132 | {"source":14,"target":14,"value":3},
133 | {"source":14,"target":2,"value":3},
134 | {"source":14,"target":3,"value":3},
135 | {"source":14,"target":4,"value":3},
136 | {"source":14,"target":5,"value":3},
137 | {"source":14,"target":6,"value":3},
138 | {"source":14,"target":7,"value":3},
139 | {"source":14,"target":8,"value":3},
140 | {"source":14,"target":9,"value":3},
141 | {"source":14,"target":10,"value":3},
142 | {"source":14,"target":11,"value":3},
143 | {"source":14,"target":12,"value":3},
144 | {"source":14,"target":13,"value":3},
145 | {"source":14,"target":15,"value":3},
146 | {"source":14,"target":16,"value":3},
147 | {"source":14,"target":17,"value":3},
148 | {"source":14,"target":1,"value":3},
149 | {"source":12,"target":0,"value":3},
150 | {"source":12,"target":12,"value":3},
151 | {"source":12,"target":18,"value":3},
152 | {"source":12,"target":2,"value":3},
153 | {"source":12,"target":3,"value":3},
154 | {"source":12,"target":4,"value":3},
155 | {"source":12,"target":5,"value":3},
156 | {"source":12,"target":6,"value":3},
157 | {"source":12,"target":7,"value":3},
158 | {"source":12,"target":8,"value":3},
159 | {"source":12,"target":9,"value":3},
160 | {"source":12,"target":10,"value":3},
161 | {"source":12,"target":11,"value":3},
162 | {"source":12,"target":13,"value":3},
163 | {"source":12,"target":14,"value":3},
164 | {"source":12,"target":15,"value":3},
165 | {"source":12,"target":16,"value":3},
166 | {"source":12,"target":17,"value":3},
167 | {"source":12,"target":1,"value":3},
168 | {"source":7,"target":0,"value":3},
169 | {"source":7,"target":7,"value":3},
170 | {"source":7,"target":8,"value":3},
171 | {"source":7,"target":2,"value":3},
172 | {"source":7,"target":3,"value":3},
173 | {"source":7,"target":4,"value":3},
174 | {"source":7,"target":5,"value":3},
175 | {"source":7,"target":6,"value":3},
176 | {"source":7,"target":9,"value":3},
177 | {"source":7,"target":10,"value":3},
178 | {"source":7,"target":11,"value":3},
179 | {"source":7,"target":12,"value":3},
180 | {"source":7,"target":13,"value":3},
181 | {"source":7,"target":14,"value":3},
182 | {"source":7,"target":15,"value":3},
183 | {"source":7,"target":16,"value":3},
184 | {"source":7,"target":17,"value":3},
185 | {"source":7,"target":1,"value":3},
186 | {"source":16,"target":0,"value":3},
187 | {"source":16,"target":16,"value":3},
188 | {"source":16,"target":2,"value":3},
189 | {"source":16,"target":3,"value":3},
190 | {"source":16,"target":4,"value":3},
191 | {"source":16,"target":5,"value":3},
192 | {"source":16,"target":6,"value":3},
193 | {"source":16,"target":7,"value":3},
194 | {"source":16,"target":8,"value":3},
195 | {"source":16,"target":9,"value":3},
196 | {"source":16,"target":10,"value":3},
197 | {"source":16,"target":11,"value":3},
198 | {"source":16,"target":12,"value":3},
199 | {"source":16,"target":13,"value":3},
200 | {"source":16,"target":14,"value":3},
201 | {"source":16,"target":15,"value":3},
202 | {"source":16,"target":17,"value":3},
203 | {"source":16,"target":1,"value":3},
204 | {"source":9,"target":0,"value":3},
205 | {"source":9,"target":9,"value":3},
206 | {"source":9,"target":19,"value":3},
207 | {"source":9,"target":2,"value":3},
208 | {"source":9,"target":3,"value":3},
209 | {"source":9,"target":4,"value":3},
210 | {"source":9,"target":5,"value":3},
211 | {"source":9,"target":6,"value":3},
212 | {"source":9,"target":7,"value":3},
213 | {"source":9,"target":8,"value":3},
214 | {"source":9,"target":10,"value":3},
215 | {"source":9,"target":11,"value":3},
216 | {"source":9,"target":12,"value":3},
217 | {"source":9,"target":13,"value":3},
218 | {"source":9,"target":14,"value":3},
219 | {"source":9,"target":15,"value":3},
220 | {"source":9,"target":16,"value":3},
221 | {"source":9,"target":17,"value":3},
222 | {"source":9,"target":1,"value":3},
223 | {"source":19,"target":0,"value":3},
224 | {"source":19,"target":19,"value":3},
225 | {"source":19,"target":9,"value":3},
226 | {"source":19,"target":2,"value":3},
227 | {"source":19,"target":3,"value":3},
228 | {"source":19,"target":4,"value":3},
229 | {"source":19,"target":5,"value":3},
230 | {"source":18,"target":0,"value":3},
231 | {"source":18,"target":18,"value":3},
232 | {"source":18,"target":12,"value":3},
233 | {"source":18,"target":2,"value":3},
234 | {"source":18,"target":3,"value":3},
235 | {"source":18,"target":4,"value":3},
236 | {"source":18,"target":5,"value":3},
237 | {"source":18,"target":6,"value":3},
238 | {"source":18,"target":7,"value":3},
239 | {"source":18,"target":8,"value":3},
240 | {"source":18,"target":9,"value":3},
241 | {"source":18,"target":10,"value":3},
242 | {"source":18,"target":11,"value":3},
243 | {"source":18,"target":13,"value":3},
244 | {"source":18,"target":14,"value":3},
245 | {"source":18,"target":15,"value":3},
246 | {"source":18,"target":16,"value":3},
247 | {"source":18,"target":17,"value":3},
248 | {"source":18,"target":1,"value":3},
249 | {"source":3,"target":0,"value":3},
250 | {"source":3,"target":3,"value":3},
251 | {"source":3,"target":2,"value":3},
252 | {"source":3,"target":4,"value":3},
253 | {"source":3,"target":5,"value":3},
254 | {"source":3,"target":6,"value":3},
255 | {"source":3,"target":7,"value":3},
256 | {"source":3,"target":8,"value":3},
257 | {"source":3,"target":9,"value":3},
258 | {"source":3,"target":10,"value":3},
259 | {"source":3,"target":11,"value":3},
260 | {"source":3,"target":12,"value":3},
261 | {"source":3,"target":13,"value":3},
262 | {"source":3,"target":14,"value":3},
263 | {"source":3,"target":15,"value":3},
264 | {"source":3,"target":16,"value":3},
265 | {"source":3,"target":17,"value":3},
266 | {"source":3,"target":1,"value":3},
267 | {"source":1,"target":0,"value":3},
268 | {"source":1,"target":1,"value":3},
269 | {"source":1,"target":2,"value":3},
270 | {"source":1,"target":3,"value":3},
271 | {"source":1,"target":4,"value":3},
272 | {"source":1,"target":5,"value":3},
273 | {"source":1,"target":6,"value":3},
274 | {"source":1,"target":7,"value":3},
275 | {"source":1,"target":8,"value":3},
276 | {"source":1,"target":9,"value":3},
277 | {"source":1,"target":10,"value":3},
278 | {"source":1,"target":11,"value":3},
279 | {"source":1,"target":12,"value":3},
280 | {"source":1,"target":13,"value":3},
281 | {"source":1,"target":14,"value":3},
282 | {"source":1,"target":15,"value":3},
283 | {"source":1,"target":16,"value":3},
284 | {"source":1,"target":17,"value":3},
285 | {"source":10,"target":0,"value":3},
286 | {"source":10,"target":10,"value":3},
287 | {"source":10,"target":2,"value":3},
288 | {"source":10,"target":3,"value":3},
289 | {"source":10,"target":4,"value":3},
290 | {"source":10,"target":5,"value":3},
291 | {"source":10,"target":6,"value":3},
292 | {"source":10,"target":7,"value":3},
293 | {"source":10,"target":8,"value":3},
294 | {"source":10,"target":9,"value":3},
295 | {"source":10,"target":11,"value":3},
296 | {"source":10,"target":12,"value":3},
297 | {"source":10,"target":13,"value":3},
298 | {"source":10,"target":14,"value":3},
299 | {"source":10,"target":15,"value":3},
300 | {"source":10,"target":16,"value":3},
301 | {"source":10,"target":17,"value":3},
302 | {"source":10,"target":1,"value":3},
303 | {"source":17,"target":0,"value":3},
304 | {"source":17,"target":17,"value":3},
305 | {"source":17,"target":2,"value":3},
306 | {"source":17,"target":3,"value":3},
307 | {"source":17,"target":4,"value":3},
308 | {"source":17,"target":5,"value":3},
309 | {"source":17,"target":6,"value":3},
310 | {"source":17,"target":7,"value":3},
311 | {"source":17,"target":8,"value":3},
312 | {"source":17,"target":9,"value":3},
313 | {"source":17,"target":10,"value":3},
314 | {"source":17,"target":11,"value":3},
315 | {"source":17,"target":12,"value":3},
316 | {"source":17,"target":13,"value":3},
317 | {"source":17,"target":14,"value":3},
318 | {"source":17,"target":15,"value":3},
319 | {"source":17,"target":16,"value":3},
320 | {"source":17,"target":1,"value":3},
321 | {"source":8,"target":0,"value":3},
322 | {"source":8,"target":8,"value":3},
323 | {"source":8,"target":7,"value":3},
324 | {"source":8,"target":2,"value":3},
325 | {"source":8,"target":3,"value":3},
326 | {"source":8,"target":4,"value":3},
327 | {"source":8,"target":5,"value":3},
328 | {"source":8,"target":6,"value":3},
329 | {"source":8,"target":9,"value":3},
330 | {"source":8,"target":10,"value":3},
331 | {"source":8,"target":11,"value":3},
332 | {"source":8,"target":12,"value":3},
333 | {"source":8,"target":13,"value":3},
334 | {"source":8,"target":14,"value":3},
335 | {"source":8,"target":15,"value":3},
336 | {"source":8,"target":16,"value":3},
337 | {"source":8,"target":17,"value":3},
338 | {"source":8,"target":1,"value":3},
339 | {"source":6,"target":0,"value":3},
340 | {"source":6,"target":6,"value":3},
341 | {"source":6,"target":2,"value":3},
342 | {"source":6,"target":3,"value":3},
343 | {"source":6,"target":4,"value":3},
344 | {"source":6,"target":5,"value":3},
345 | {"source":6,"target":7,"value":3},
346 | {"source":6,"target":8,"value":3},
347 | {"source":6,"target":9,"value":3},
348 | {"source":6,"target":10,"value":3},
349 | {"source":6,"target":11,"value":3},
350 | {"source":6,"target":12,"value":3},
351 | {"source":6,"target":13,"value":3},
352 | {"source":6,"target":14,"value":3},
353 | {"source":6,"target":15,"value":3},
354 | {"source":6,"target":16,"value":3},
355 | {"source":6,"target":17,"value":3},
356 | {"source":6,"target":1,"value":3},
357 | {"source":11,"target":0,"value":3},
358 | {"source":11,"target":11,"value":3},
359 | {"source":11,"target":2,"value":3},
360 | {"source":11,"target":3,"value":3},
361 | {"source":11,"target":4,"value":3},
362 | {"source":11,"target":5,"value":3}]};


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/spider.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | import urllib.error
  3 | import ssl
  4 | from urllib.parse import urljoin
  5 | from urllib.parse import urlparse
  6 | from urllib.request import urlopen
  7 | from bs4 import BeautifulSoup
  8 | 
  9 | # Ignore SSL certificate errors
 10 | ctx = ssl.create_default_context()
 11 | ctx.check_hostname = False
 12 | ctx.verify_mode = ssl.CERT_NONE
 13 | 
 14 | conn = sqlite3.connect('spider.sqlite')
 15 | cur = conn.cursor()
 16 | 
 17 | cur.execute('''CREATE TABLE IF NOT EXISTS Pages
 18 |     (id INTEGER PRIMARY KEY, url TEXT UNIQUE, html TEXT,
 19 |      error INTEGER, old_rank REAL, new_rank REAL)''')
 20 | 
 21 | cur.execute('''CREATE TABLE IF NOT EXISTS Links
 22 |     (from_id INTEGER, to_id INTEGER)''')
 23 | 
 24 | cur.execute('''CREATE TABLE IF NOT EXISTS Webs (url TEXT UNIQUE)''')
 25 | 
 26 | # Check to see if we are already in progress...
 27 | cur.execute('SELECT id,url FROM Pages WHERE html is NULL and error is NULL ORDER BY RANDOM() LIMIT 1')
 28 | row = cur.fetchone()
 29 | if row is not None:
 30 |     print("Restarting existing crawl.  Remove spider.sqlite to start a fresh crawl.")
 31 | else :
 32 |     starturl = input('Enter web url or enter: ')
 33 |     if ( len(starturl) < 1 ) : starturl = 'http://www.dr-chuck.com/'
 34 |     if ( starturl.endswith('/') ) : starturl = starturl[:-1]
 35 |     web = starturl
 36 |     if ( starturl.endswith('.htm') or starturl.endswith('.html') ) :
 37 |         pos = starturl.rfind('/')
 38 |         web = starturl[:pos]
 39 | 
 40 |     if ( len(web) > 1 ) :
 41 |         cur.execute('INSERT OR IGNORE INTO Webs (url) VALUES ( ? )', ( web, ) )
 42 |         cur.execute('INSERT OR IGNORE INTO Pages (url, html, new_rank) VALUES ( ?, NULL, 1.0 )', ( starturl, ) )
 43 |         conn.commit()
 44 | 
 45 | # Get the current webs
 46 | cur.execute('''SELECT url FROM Webs''')
 47 | webs = list()
 48 | for row in cur:
 49 |     webs.append(str(row[0]))
 50 | 
 51 | print(webs)
 52 | 
 53 | many = 0
 54 | while True:
 55 |     if ( many < 1 ) :
 56 |         sval = input('How many pages:')
 57 |         if ( len(sval) < 1 ) : break
 58 |         many = int(sval)
 59 |     many = many - 1
 60 | 
 61 |     cur.execute('SELECT id,url FROM Pages WHERE html is NULL and error is NULL ORDER BY RANDOM() LIMIT 1')
 62 |     try:
 63 |         row = cur.fetchone()
 64 |         # print row
 65 |         fromid = row[0]
 66 |         url = row[1]
 67 |     except:
 68 |         print('No unretrieved HTML pages found')
 69 |         many = 0
 70 |         break
 71 | 
 72 |     print(fromid, url, end=' ')
 73 | 
 74 |     # If we are retrieving this page, there should be no links from it
 75 |     cur.execute('DELETE from Links WHERE from_id=?', (fromid, ) )
 76 |     try:
 77 |         document = urlopen(url, context=ctx)
 78 | 
 79 |         html = document.read()
 80 |         if document.getcode() != 200 :
 81 |             print("Error on page: ",document.getcode())
 82 |             cur.execute('UPDATE Pages SET error=? WHERE url=?', (document.getcode(), url) )
 83 | 
 84 |         if 'text/html' != document.info().get_content_type() :
 85 |             print("Ignore non text/html page")
 86 |             cur.execute('DELETE FROM Pages WHERE url=?', ( url, ) )
 87 |             cur.execute('UPDATE Pages SET error=0 WHERE url=?', (url, ) )
 88 |             conn.commit()
 89 |             continue
 90 | 
 91 |         print('('+str(len(html))+')', end=' ')
 92 | 
 93 |         soup = BeautifulSoup(html, "html.parser")
 94 |     except KeyboardInterrupt:
 95 |         print('')
 96 |         print('Program interrupted by user...')
 97 |         break
 98 |     except:
 99 |         print("Unable to retrieve or parse page")
100 |         cur.execute('UPDATE Pages SET error=-1 WHERE url=?', (url, ) )
101 |         conn.commit()
102 |         continue
103 | 
104 |     cur.execute('INSERT OR IGNORE INTO Pages (url, html, new_rank) VALUES ( ?, NULL, 1.0 )', ( url, ) )
105 |     cur.execute('UPDATE Pages SET html=? WHERE url=?', (memoryview(html), url ) )
106 |     conn.commit()
107 | 
108 |     # Retrieve all of the anchor tags
109 |     tags = soup('a')
110 |     count = 0
111 |     for tag in tags:
112 |         href = tag.get('href', None)
113 |         if ( href is None ) : continue
114 |         # Resolve relative references like href="/contact"
115 |         up = urlparse(href)
116 |         if ( len(up.scheme) < 1 ) :
117 |             href = urljoin(url, href)
118 |         ipos = href.find('#')
119 |         if ( ipos > 1 ) : href = href[:ipos]
120 |         if ( href.endswith('.png') or href.endswith('.jpg') or href.endswith('.gif') ) : continue
121 |         if ( href.endswith('/') ) : href = href[:-1]
122 |         # print href
123 |         if ( len(href) < 1 ) : continue
124 | 
125 | 		# Check if the URL is in any of the webs
126 |         found = False
127 |         for web in webs:
128 |             if ( href.startswith(web) ) :
129 |                 found = True
130 |                 break
131 |         if not found : continue
132 | 
133 |         cur.execute('INSERT OR IGNORE INTO Pages (url, html, new_rank) VALUES ( ?, NULL, 1.0 )', ( href, ) )
134 |         count = count + 1
135 |         conn.commit()
136 | 
137 |         cur.execute('SELECT id FROM Pages WHERE url=? LIMIT 1', ( href, ))
138 |         try:
139 |             row = cur.fetchone()
140 |             toid = row[0]
141 |         except:
142 |             print('Could not retrieve id')
143 |             continue
144 |         # print fromid, toid
145 |         cur.execute('INSERT OR IGNORE INTO Links (from_id, to_id) VALUES ( ?, ? )', ( fromid, toid ) )
146 | 
147 | 
148 |     print(count)
149 | 
150 | cur.close()
151 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/spider.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/spider.sqlite


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/spjson.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | 
 3 | conn = sqlite3.connect('spider.sqlite')
 4 | cur = conn.cursor()
 5 | 
 6 | print("Creating JSON output on spider.js...")
 7 | howmany = int(input("How many nodes? "))
 8 | 
 9 | cur.execute('''SELECT COUNT(from_id) AS inbound, old_rank, new_rank, id, url 
10 |     FROM Pages JOIN Links ON Pages.id = Links.to_id
11 |     WHERE html IS NOT NULL AND ERROR IS NULL
12 |     GROUP BY id ORDER BY id,inbound''')
13 | 
14 | fhand = open('spider.js','w')
15 | nodes = list()
16 | maxrank = None
17 | minrank = None
18 | for row in cur :
19 |     nodes.append(row)
20 |     rank = row[2]
21 |     if maxrank is None or maxrank < rank: maxrank = rank
22 |     if minrank is None or minrank > rank : minrank = rank
23 |     if len(nodes) > howmany : break
24 | 
25 | if maxrank == minrank or maxrank is None or minrank is None:
26 |     print("Error - please run sprank.py to compute page rank")
27 |     quit()
28 | 
29 | fhand.write('spiderJson = {"nodes":[\n')
30 | count = 0
31 | map = dict()
32 | ranks = dict()
33 | for row in nodes :
34 |     if count > 0 : fhand.write(',\n')
35 |     # print row
36 |     rank = row[2]
37 |     rank = 19 * ( (rank - minrank) / (maxrank - minrank) ) 
38 |     fhand.write('{'+'"weight":'+str(row[0])+',"rank":'+str(rank)+',')
39 |     fhand.write(' "id":'+str(row[3])+', "url":"'+row[4]+'"}')
40 |     map[row[3]] = count
41 |     ranks[row[3]] = rank
42 |     count = count + 1
43 | fhand.write('],\n')
44 | 
45 | cur.execute('''SELECT DISTINCT from_id, to_id FROM Links''')
46 | fhand.write('"links":[\n')
47 | 
48 | count = 0
49 | for row in cur :
50 |     # print row
51 |     if row[0] not in map or row[1] not in map : continue
52 |     if count > 0 : fhand.write(',\n')
53 |     rank = ranks[row[0]]
54 |     srank = 19 * ( (rank - minrank) / (maxrank - minrank) ) 
55 |     fhand.write('{"source":'+str(map[row[0]])+',"target":'+str(map[row[1]])+',"value":3}')
56 |     count = count + 1
57 | fhand.write(']};')
58 | fhand.close()
59 | cur.close()
60 | 
61 | print("Open force.html in a browser to view the visualization")
62 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/sprank.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | 
  3 | conn = sqlite3.connect('spider.sqlite')
  4 | cur = conn.cursor()
  5 | 
  6 | # Find the ids that send out page rank - we only are interested
  7 | # in pages in the SCC that have in and out links
  8 | cur.execute('''SELECT DISTINCT from_id FROM Links''')
  9 | from_ids = list()
 10 | for row in cur: 
 11 |     from_ids.append(row[0])
 12 | 
 13 | # Find the ids that receive page rank 
 14 | to_ids = list()
 15 | links = list()
 16 | cur.execute('''SELECT DISTINCT from_id, to_id FROM Links''')
 17 | for row in cur:
 18 |     from_id = row[0]
 19 |     to_id = row[1]
 20 |     if from_id == to_id : continue
 21 |     if from_id not in from_ids : continue
 22 |     if to_id not in from_ids : continue
 23 |     links.append(row)
 24 |     if to_id not in to_ids : to_ids.append(to_id)
 25 | 
 26 | # Get latest page ranks for strongly connected component
 27 | prev_ranks = dict()
 28 | for node in from_ids:
 29 |     cur.execute('''SELECT new_rank FROM Pages WHERE id = ?''', (node, ))
 30 |     row = cur.fetchone()
 31 |     prev_ranks[node] = row[0]
 32 | 
 33 | sval = input('How many iterations:')
 34 | many = 1
 35 | if ( len(sval) > 0 ) : many = int(sval)
 36 | 
 37 | # Sanity check
 38 | if len(prev_ranks) < 1 : 
 39 |     print("Nothing to page rank.  Check data.")
 40 |     quit()
 41 | 
 42 | # Lets do Page Rank in memory so it is really fast
 43 | for i in range(many):
 44 |     # print prev_ranks.items()[:5]
 45 |     next_ranks = dict();
 46 |     total = 0.0
 47 |     for (node, old_rank) in list(prev_ranks.items()):
 48 |         total = total + old_rank
 49 |         next_ranks[node] = 0.0
 50 |     # print total
 51 | 
 52 |     # Find the number of outbound links and sent the page rank down each
 53 |     for (node, old_rank) in list(prev_ranks.items()):
 54 |         # print node, old_rank
 55 |         give_ids = list()
 56 |         for (from_id, to_id) in links:
 57 |             if from_id != node : continue
 58 |            #  print '   ',from_id,to_id
 59 | 
 60 |             if to_id not in to_ids: continue
 61 |             give_ids.append(to_id)
 62 |         if ( len(give_ids) < 1 ) : continue
 63 |         amount = old_rank / len(give_ids)
 64 |         # print node, old_rank,amount, give_ids
 65 |     
 66 |         for id in give_ids:
 67 |             next_ranks[id] = next_ranks[id] + amount
 68 |     
 69 |     newtot = 0
 70 |     for (node, next_rank) in list(next_ranks.items()):
 71 |         newtot = newtot + next_rank
 72 |     evap = (total - newtot) / len(next_ranks)
 73 | 
 74 |     # print newtot, evap
 75 |     for node in next_ranks:
 76 |         next_ranks[node] = next_ranks[node] + evap
 77 | 
 78 |     newtot = 0
 79 |     for (node, next_rank) in list(next_ranks.items()):
 80 |         newtot = newtot + next_rank
 81 | 
 82 |     # Compute the per-page average change from old rank to new rank
 83 |     # As indication of convergence of the algorithm
 84 |     totdiff = 0
 85 |     for (node, old_rank) in list(prev_ranks.items()):
 86 |         new_rank = next_ranks[node]
 87 |         diff = abs(old_rank-new_rank)
 88 |         totdiff = totdiff + diff
 89 | 
 90 |     avediff = totdiff / len(prev_ranks)
 91 |     print(i+1, avediff)
 92 | 
 93 |     # rotate
 94 |     prev_ranks = next_ranks
 95 | 
 96 | # Put the final ranks back into the database
 97 | print(list(next_ranks.items())[:5])
 98 | cur.execute('''UPDATE Pages SET old_rank=new_rank''')
 99 | for (id, new_rank) in list(next_ranks.items()) :
100 |     cur.execute('''UPDATE Pages SET new_rank=? WHERE id=?''', (new_rank, id))
101 | conn.commit()
102 | cur.close()
103 | 
104 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/spreset.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | 
 3 | conn = sqlite3.connect('spider.sqlite')
 4 | cur = conn.cursor()
 5 | 
 6 | cur.execute('''UPDATE Pages SET new_rank=1.0, old_rank=0.0''')
 7 | conn.commit()
 8 | 
 9 | cur.close()
10 | 
11 | print("All pages set to a rank of 1.0")
12 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/variance-site-dump.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/variance-site-dump.jpg


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/variance-top25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex17/variance-top25.jpg


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/README.txt:
--------------------------------------------------------------------------------
  1 | Analyzing an EMAIL Archive from gmane and vizualizing the data
  2 | using the D3 JavaScript library
  3 | 
  4 | This is a set of tools that allow you to pull down an archive
  5 | of a gmane repository using the instructions at:
  6 | 
  7 | http://gmane.org/export.php
  8 | 
  9 | In order not to overwhelm the gmane.org server, I have put up 
 10 | my own copy of the messages at: 
 11 | 
 12 | http://mbox.dr-chuck.net/
 13 | 
 14 | This server will be faster and take a lot of load off the 
 15 | gmane.org server.
 16 | 
 17 | You should install the SQLite browser to view and modify the databases from:
 18 | 
 19 | http://sqlitebrowser.org/
 20 | 
 21 | The first step is to spider the gmane repository.  The base URL 
 22 | is hard-coded in the gmane.py and is hard-coded to the Sakai
 23 | developer list.  You can spider another repository by changing that
 24 | base url.   Make sure to delete the content.sqlite file if you 
 25 | switch the base url.  The gmane.py file operates as a spider in 
 26 | that it runs slowly and retrieves one mail message per second so 
 27 | as to avoid getting throttled by gmane.org.   It stores all of
 28 | its data in a database and can be interrupted and re-started 
 29 | as often as needed.   It may take many hours to pull all the data
 30 | down.  So you may need to restart several times.
 31 | 
 32 | To give you a head-start, I have put up 600MB of pre-spidered Sakai 
 33 | email here:
 34 | 
 35 | https://online.dr-chuck.com/files/sakai/email/content.sqlite
 36 | 
 37 | If you download this, you can "catch up with the latest" by
 38 | running gmane.py.
 39 | 
 40 | Navigate to the folder where you extracted the gmane.zip
 41 | 
 42 | Note: Windows has difficulty in displaying UTF-8 characters
 43 | in the console so for each console window you open, you may need
 44 | to type the following command before running this code:
 45 | 
 46 |     chcp 65001
 47 | 
 48 | http://stackoverflow.com/questions/388490/unicode-characters-in-windows-command-line-how
 49 | 
 50 | Here is a run of gmane.py getting the last five messages of the
 51 | sakai developer list:
 52 | 
 53 | Mac: python3 gmane.py 
 54 | Win: gmane.py 
 55 | 
 56 | How many messages:10
 57 | http://mbox.dr-chuck.net/sakai.devel/1/2 2662
 58 |     ggolden@umich.edu 2005-12-08T23:34:30-06:00 call for participation: developers documentation
 59 | http://mbox.dr-chuck.net/sakai.devel/2/3 2434
 60 |     csev@umich.edu 2005-12-09T00:58:01-05:00 report from the austin conference:  sakai developers break into song
 61 | http://mbox.dr-chuck.net/sakai.devel/3/4 3055
 62 |     kevin.carpenter@rsmart.com 2005-12-09T09:01:49-07:00 cas and sakai 1.5
 63 | http://mbox.dr-chuck.net/sakai.devel/4/5 11721
 64 |     michael.feldstein@suny.edu 2005-12-09T09:43:12-05:00 re: lms/vle rants/comments
 65 | http://mbox.dr-chuck.net/sakai.devel/5/6 9443
 66 |     john@caret.cam.ac.uk 2005-12-09T13:32:29+00:00 re: lms/vle rants/comments
 67 | Does not start with From 
 68 | 
 69 | The program scans content.sqlite from 1 up to the first message number not
 70 | already spidered and starts spidering at that message.  It continues spidering
 71 | until it has spidered the desired number of messages or it reaches a page
 72 | that does not appear to be a properly formatted message.
 73 | 
 74 | Sometimes gmane.org is missing a message.  Perhaps administrators can delete messages
 75 | or perhaps they get lost - I don't know.   If your spider stops, and it seems it has hit
 76 | a missing message, go into the SQLite Manager and add a row with the missing id - leave
 77 | all the other fields blank - and then restart gmane.py.   This will unstick the 
 78 | spidering process and allow it to continue.  These empty messages will be ignored in the next
 79 | phase of the process.
 80 | 
 81 | One nice thing is that once you have spidered all of the messages and have them in 
 82 | content.sqlite, you can run gmane.py again to get new messages as they get sent to the
 83 | list.  gmane.py will quickly scan to the end of the already-spidered pages and check 
 84 | if there are new messages and then quickly retrieve those messages and add them 
 85 | to content.sqlite.
 86 | 
 87 | The content.sqlite data is pretty raw, with an innefficient data model, and not compressed.
 88 | This is intentional as it allows you to look at content.sqlite to debug the process.
 89 | It would be a bad idea to run any queries against this database as they would be 
 90 | slow.
 91 | 
 92 | The second process is running the program gmodel.py.  gmodel.py reads the rough/raw 
 93 | data from content.sqlite and produces a cleaned-up and well-modeled version of the 
 94 | data in the file index.sqlite.  The file index.sqlite will be much smaller (often 10X
 95 | smaller) than content.sqlite because it also compresses the header and body text.
 96 | 
 97 | Each time gmodel.py runs - it completely wipes out and re-builds index.sqlite, allowing
 98 | you to adjust its parameters and edit the mapping tables in content.sqlite to tweak the 
 99 | data cleaning process.
100 | 
101 | Running gmodel.py works as follows:
102 | 
103 | Mac: python3 gmodel.py
104 | Win: gmodel.py
105 | 
106 | Loaded allsenders 1588 and mapping 28 dns mapping 1
107 | 1 2005-12-08T23:34:30-06:00 ggolden22@mac.com
108 | 251 2005-12-22T10:03:20-08:00 tpamsler@ucdavis.edu
109 | 501 2006-01-12T11:17:34-05:00 lance@indiana.edu
110 | 751 2006-01-24T11:13:28-08:00 vrajgopalan@ucmerced.edu
111 | ...
112 | 
113 | The gmodel.py program does a number of data cleaing steps
114 | 
115 | Domain names are truncated to two levels for .com, .org, .edu, and .net 
116 | other domain names are truncated to three levels.  So si.umich.edu becomes
117 | umich.edu and caret.cam.ac.uk becomes cam.ac.uk.   Also mail addresses are
118 | forced to lower case and some of the @gmane.org address like the following
119 | 
120 |    arwhyte-63aXycvo3TyHXe+LvDLADg@public.gmane.org
121 | 
122 | are converted to the real address whenever there is a matching real email
123 | address elsewhere in the message corpus.
124 | 
125 | If you look in the content.sqlite database there are two tables that allow
126 | you to map both domain names and individual email addresses that change over 
127 | the lifetime of the email list.  For example, Steve Githens used the following
128 | email addresses over the life of the Sakai developer list:
129 | 
130 | s-githens@northwestern.edu
131 | sgithens@cam.ac.uk
132 | swgithen@mtu.edu
133 | 
134 | We can add two entries to the Mapping table
135 | 
136 | s-githens@northwestern.edu ->  swgithen@mtu.edu
137 | sgithens@cam.ac.uk -> swgithen@mtu.edu
138 | 
139 | And so all the mail messages will be collected under one sender even if 
140 | they used several email addresses over the lifetime of the mailing list.
141 | 
142 | You can also make similar entries in the DNSMapping table if there are multiple
143 | DNS names you want mapped to a single DNS.  In the Sakai data I add the following
144 | mapping:
145 | 
146 | iupui.edu -> indiana.edu
147 | 
148 | So all the folks from the various Indiana University campuses are tracked together
149 | 
150 | You can re-run the gmodel.py over and over as you look at the data, and add mappings
151 | to make the data cleaner and cleaner.   When you are done, you will have a nicely
152 | indexed version of the email in index.sqlite.   This is the file to use to do data
153 | analysis.   With this file, data analysis will be really quick.
154 | 
155 | The first, simplest data analysis is to do a "who does the most" and "which 
156 | organzation does the most"?  This is done using gbasic.py:
157 | 
158 | Mac: python3 gbasic.py 
159 | Win: gbasic.py 
160 | 
161 | How many to dump? 5
162 | Loaded messages= 51330 subjects= 25033 senders= 1584
163 | 
164 | Top 5 Email list participants
165 | steve.swinsburg@gmail.com 2657
166 | azeckoski@unicon.net 1742
167 | ieb@tfd.co.uk 1591
168 | csev@umich.edu 1304
169 | david.horwitz@uct.ac.za 1184
170 | 
171 | Top 5 Email list organizations
172 | gmail.com 7339
173 | umich.edu 6243
174 | uct.ac.za 2451
175 | indiana.edu 2258
176 | unicon.net 2055
177 | 
178 | You can look at the data in index.sqlite and if you find a problem, you 
179 | can update the Mapping table and DNSMapping table in content.sqlite and
180 | re-run gmodel.py.
181 | 
182 | There is a simple vizualization of the word frequence in the subject lines
183 | in the file gword.py:
184 | 
185 | Mac: python3 gword.py
186 | Win: gword.py
187 | 
188 | Range of counts: 33229 129
189 | Output written to gword.js
190 | 
191 | This produces the file gword.js which you can visualize using the file 
192 | gword.htm.
193 | 
194 | A second visualization is in gline.py.  It visualizes email participation by 
195 | organizations over time.
196 | 
197 | Mac: python3 gline.py 
198 | Win: gline.py 
199 | 
200 | Loaded messages= 51330 subjects= 25033 senders= 1584
201 | Top 10 Oranizations
202 | ['gmail.com', 'umich.edu', 'uct.ac.za', 'indiana.edu', 'unicon.net', 'tfd.co.uk', 'berkeley.edu', 'longsight.com', 'stanford.edu', 'ox.ac.uk']
203 | Output written to gline.js
204 | 
205 | Its output is written to gline.js which is visualized using gline.htm.
206 | 
207 | Some URLs for visualization ideas:
208 | 
209 | https://developers.google.com/chart/
210 | 
211 | https://developers.google.com/chart/interactive/docs/gallery/motionchart
212 | 
213 | https://code.google.com/apis/ajax/playground/?type=visualization#motion_chart_time_formats
214 | 
215 | https://developers.google.com/chart/interactive/docs/gallery/annotatedtimeline
216 | 
217 | http://bost.ocks.org/mike/uberdata/
218 | 
219 | http://mbostock.github.io/d3/talk/20111018/calendar.html
220 | 
221 | http://nltk.org/install.html
222 | 
223 | As always - comments welcome.
224 | 
225 | -- Dr. Chuck
226 | Sun Sep 29 00:11:01 EDT 2013
227 | 
228 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/content.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/content.sqlite


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/content.sqlite-journal.temp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/content.sqlite-journal.temp


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/content.sqlite.first.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/content.sqlite.first.jpg


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/d3.layout.cloud.js:
--------------------------------------------------------------------------------
  1 | // Word cloud layout by Jason Davies, http://www.jasondavies.com/word-cloud/
  2 | // Algorithm due to Jonathan Feinberg, http://static.mrfeinberg.com/bv_ch03.pdf
  3 | (function(exports) {
  4 |   function cloud() {
  5 |     var size = [256, 256],
  6 |         text = cloudText,
  7 |         font = cloudFont,
  8 |         fontSize = cloudFontSize,
  9 |         fontStyle = cloudFontNormal,
 10 |         fontWeight = cloudFontNormal,
 11 |         rotate = cloudRotate,
 12 |         padding = cloudPadding,
 13 |         spiral = archimedeanSpiral,
 14 |         words = [],
 15 |         timeInterval = Infinity,
 16 |         event = d3.dispatch("word", "end"),
 17 |         timer = null,
 18 |         cloud = {};
 19 | 
 20 |     cloud.start = function() {
 21 |       var board = zeroArray((size[0] >> 5) * size[1]),
 22 |           bounds = null,
 23 |           n = words.length,
 24 |           i = -1,
 25 |           tags = [],
 26 |           data = words.map(function(d, i) {
 27 |             d.text = text.call(this, d, i);
 28 |             d.font = font.call(this, d, i);
 29 |             d.style = fontStyle.call(this, d, i);
 30 |             d.weight = fontWeight.call(this, d, i);
 31 |             d.rotate = rotate.call(this, d, i);
 32 |             d.size = ~~fontSize.call(this, d, i);
 33 |             d.padding = cloudPadding.call(this, d, i);
 34 |             return d;
 35 |           }).sort(function(a, b) { return b.size - a.size; });
 36 | 
 37 |       if (timer) clearInterval(timer);
 38 |       timer = setInterval(step, 0);
 39 |       step();
 40 | 
 41 |       return cloud;
 42 | 
 43 |       function step() {
 44 |         var start = +new Date,
 45 |             d;
 46 |         while (+new Date - start < timeInterval && ++i < n && timer) {
 47 |           d = data[i];
 48 |           d.x = (size[0] * (Math.random() + .5)) >> 1;
 49 |           d.y = (size[1] * (Math.random() + .5)) >> 1;
 50 |           cloudSprite(d, data, i);
 51 |           if (place(board, d, bounds)) {
 52 |             tags.push(d);
 53 |             event.word(d);
 54 |             if (bounds) cloudBounds(bounds, d);
 55 |             else bounds = [{x: d.x + d.x0, y: d.y + d.y0}, {x: d.x + d.x1, y: d.y + d.y1}];
 56 |             // Temporary hack
 57 |             d.x -= size[0] >> 1;
 58 |             d.y -= size[1] >> 1;
 59 |           }
 60 |         }
 61 |         if (i >= n) {
 62 |           cloud.stop();
 63 |           event.end(tags, bounds);
 64 |         }
 65 |       }
 66 |     }
 67 | 
 68 |     cloud.stop = function() {
 69 |       if (timer) {
 70 |         clearInterval(timer);
 71 |         timer = null;
 72 |       }
 73 |       return cloud;
 74 |     };
 75 | 
 76 |     cloud.timeInterval = function(x) {
 77 |       if (!arguments.length) return timeInterval;
 78 |       timeInterval = x == null ? Infinity : x;
 79 |       return cloud;
 80 |     };
 81 | 
 82 |     function place(board, tag, bounds) {
 83 |       var perimeter = [{x: 0, y: 0}, {x: size[0], y: size[1]}],
 84 |           startX = tag.x,
 85 |           startY = tag.y,
 86 |           maxDelta = Math.sqrt(size[0] * size[0] + size[1] * size[1]),
 87 |           s = spiral(size),
 88 |           dt = Math.random() < .5 ? 1 : -1,
 89 |           t = -dt,
 90 |           dxdy,
 91 |           dx,
 92 |           dy;
 93 | 
 94 |       while (dxdy = s(t += dt)) {
 95 |         dx = ~~dxdy[0];
 96 |         dy = ~~dxdy[1];
 97 | 
 98 |         if (Math.min(dx, dy) > maxDelta) break;
 99 | 
100 |         tag.x = startX + dx;
101 |         tag.y = startY + dy;
102 | 
103 |         if (tag.x + tag.x0 < 0 || tag.y + tag.y0 < 0 ||
104 |             tag.x + tag.x1 > size[0] || tag.y + tag.y1 > size[1]) continue;
105 |         // TODO only check for collisions within current bounds.
106 |         if (!bounds || !cloudCollide(tag, board, size[0])) {
107 |           if (!bounds || collideRects(tag, bounds)) {
108 |             var sprite = tag.sprite,
109 |                 w = tag.width >> 5,
110 |                 sw = size[0] >> 5,
111 |                 lx = tag.x - (w << 4),
112 |                 sx = lx & 0x7f,
113 |                 msx = 32 - sx,
114 |                 h = tag.y1 - tag.y0,
115 |                 x = (tag.y + tag.y0) * sw + (lx >> 5),
116 |                 last;
117 |             for (var j = 0; j < h; j++) {
118 |               last = 0;
119 |               for (var i = 0; i <= w; i++) {
120 |                 board[x + i] |= (last << msx) | (i < w ? (last = sprite[j * w + i]) >>> sx : 0);
121 |               }
122 |               x += sw;
123 |             }
124 |             delete tag.sprite;
125 |             return true;
126 |           }
127 |         }
128 |       }
129 |       return false;
130 |     }
131 | 
132 |     cloud.words = function(x) {
133 |       if (!arguments.length) return words;
134 |       words = x;
135 |       return cloud;
136 |     };
137 | 
138 |     cloud.size = function(x) {
139 |       if (!arguments.length) return size;
140 |       size = [+x[0], +x[1]];
141 |       return cloud;
142 |     };
143 | 
144 |     cloud.font = function(x) {
145 |       if (!arguments.length) return font;
146 |       font = d3.functor(x);
147 |       return cloud;
148 |     };
149 | 
150 |     cloud.fontStyle = function(x) {
151 |       if (!arguments.length) return fontStyle;
152 |       fontStyle = d3.functor(x);
153 |       return cloud;
154 |     };
155 | 
156 |     cloud.fontWeight = function(x) {
157 |       if (!arguments.length) return fontWeight;
158 |       fontWeight = d3.functor(x);
159 |       return cloud;
160 |     };
161 | 
162 |     cloud.rotate = function(x) {
163 |       if (!arguments.length) return rotate;
164 |       rotate = d3.functor(x);
165 |       return cloud;
166 |     };
167 | 
168 |     cloud.text = function(x) {
169 |       if (!arguments.length) return text;
170 |       text = d3.functor(x);
171 |       return cloud;
172 |     };
173 | 
174 |     cloud.spiral = function(x) {
175 |       if (!arguments.length) return spiral;
176 |       spiral = spirals[x + ""] || x;
177 |       return cloud;
178 |     };
179 | 
180 |     cloud.fontSize = function(x) {
181 |       if (!arguments.length) return fontSize;
182 |       fontSize = d3.functor(x);
183 |       return cloud;
184 |     };
185 | 
186 |     cloud.padding = function(x) {
187 |       if (!arguments.length) return padding;
188 |       padding = d3.functor(x);
189 |       return cloud;
190 |     };
191 | 
192 |     return d3.rebind(cloud, event, "on");
193 |   }
194 | 
195 |   function cloudText(d) {
196 |     return d.text;
197 |   }
198 | 
199 |   function cloudFont() {
200 |     return "serif";
201 |   }
202 | 
203 |   function cloudFontNormal() {
204 |     return "normal";
205 |   }
206 | 
207 |   function cloudFontSize(d) {
208 |     return Math.sqrt(d.value);
209 |   }
210 | 
211 |   function cloudRotate() {
212 |     return (~~(Math.random() * 6) - 3) * 30;
213 |   }
214 | 
215 |   function cloudPadding() {
216 |     return 1;
217 |   }
218 | 
219 |   // Fetches a monochrome sprite bitmap for the specified text.
220 |   // Load in batches for speed.
221 |   function cloudSprite(d, data, di) {
222 |     if (d.sprite) return;
223 |     c.clearRect(0, 0, (cw << 5) / ratio, ch / ratio);
224 |     var x = 0,
225 |         y = 0,
226 |         maxh = 0,
227 |         n = data.length;
228 |     di--;
229 |     while (++di < n) {
230 |       d = data[di];
231 |       c.save();
232 |       c.font = d.style + " " + d.weight + " " + ~~((d.size + 1) / ratio) + "px " + d.font;
233 |       var w = c.measureText(d.text + "m").width * ratio,
234 |           h = d.size << 1;
235 |       if (d.rotate) {
236 |         var sr = Math.sin(d.rotate * cloudRadians),
237 |             cr = Math.cos(d.rotate * cloudRadians),
238 |             wcr = w * cr,
239 |             wsr = w * sr,
240 |             hcr = h * cr,
241 |             hsr = h * sr;
242 |         w = (Math.max(Math.abs(wcr + hsr), Math.abs(wcr - hsr)) + 0x1f) >> 5 << 5;
243 |         h = ~~Math.max(Math.abs(wsr + hcr), Math.abs(wsr - hcr));
244 |       } else {
245 |         w = (w + 0x1f) >> 5 << 5;
246 |       }
247 |       if (h > maxh) maxh = h;
248 |       if (x + w >= (cw << 5)) {
249 |         x = 0;
250 |         y += maxh;
251 |         maxh = 0;
252 |       }
253 |       if (y + h >= ch) break;
254 |       c.translate((x + (w >> 1)) / ratio, (y + (h >> 1)) / ratio);
255 |       if (d.rotate) c.rotate(d.rotate * cloudRadians);
256 |       c.fillText(d.text, 0, 0);
257 |       c.restore();
258 |       d.width = w;
259 |       d.height = h;
260 |       d.xoff = x;
261 |       d.yoff = y;
262 |       d.x1 = w >> 1;
263 |       d.y1 = h >> 1;
264 |       d.x0 = -d.x1;
265 |       d.y0 = -d.y1;
266 |       x += w;
267 |     }
268 |     var pixels = c.getImageData(0, 0, (cw << 5) / ratio, ch / ratio).data,
269 |         sprite = [];
270 |     while (--di >= 0) {
271 |       d = data[di];
272 |       var w = d.width,
273 |           w32 = w >> 5,
274 |           h = d.y1 - d.y0,
275 |           p = d.padding;
276 |       // Zero the buffer
277 |       for (var i = 0; i < h * w32; i++) sprite[i] = 0;
278 |       x = d.xoff;
279 |       if (x == null) return;
280 |       y = d.yoff;
281 |       var seen = 0,
282 |           seenRow = -1;
283 |       for (var j = 0; j < h; j++) {
284 |         for (var i = 0; i < w; i++) {
285 |           var k = w32 * j + (i >> 5),
286 |               m = pixels[((y + j) * (cw << 5) + (x + i)) << 2] ? 1 << (31 - (i % 32)) : 0;
287 |           if (p) {
288 |             if (j) sprite[k - w32] |= m;
289 |             if (j < w - 1) sprite[k + w32] |= m;
290 |             m |= (m << 1) | (m >> 1);
291 |           }
292 |           sprite[k] |= m;
293 |           seen |= m;
294 |         }
295 |         if (seen) seenRow = j;
296 |         else {
297 |           d.y0++;
298 |           h--;
299 |           j--;
300 |           y++;
301 |         }
302 |       }
303 |       d.y1 = d.y0 + seenRow;
304 |       d.sprite = sprite.slice(0, (d.y1 - d.y0) * w32);
305 |     }
306 |   }
307 | 
308 |   // Use mask-based collision detection.
309 |   function cloudCollide(tag, board, sw) {
310 |     sw >>= 5;
311 |     var sprite = tag.sprite,
312 |         w = tag.width >> 5,
313 |         lx = tag.x - (w << 4),
314 |         sx = lx & 0x7f,
315 |         msx = 32 - sx,
316 |         h = tag.y1 - tag.y0,
317 |         x = (tag.y + tag.y0) * sw + (lx >> 5),
318 |         last;
319 |     for (var j = 0; j < h; j++) {
320 |       last = 0;
321 |       for (var i = 0; i <= w; i++) {
322 |         if (((last << msx) | (i < w ? (last = sprite[j * w + i]) >>> sx : 0))
323 |             & board[x + i]) return true;
324 |       }
325 |       x += sw;
326 |     }
327 |     return false;
328 |   }
329 | 
330 |   function cloudBounds(bounds, d) {
331 |     var b0 = bounds[0],
332 |         b1 = bounds[1];
333 |     if (d.x + d.x0 < b0.x) b0.x = d.x + d.x0;
334 |     if (d.y + d.y0 < b0.y) b0.y = d.y + d.y0;
335 |     if (d.x + d.x1 > b1.x) b1.x = d.x + d.x1;
336 |     if (d.y + d.y1 > b1.y) b1.y = d.y + d.y1;
337 |   }
338 | 
339 |   function collideRects(a, b) {
340 |     return a.x + a.x1 > b[0].x && a.x + a.x0 < b[1].x && a.y + a.y1 > b[0].y && a.y + a.y0 < b[1].y;
341 |   }
342 | 
343 |   function archimedeanSpiral(size) {
344 |     var e = size[0] / size[1];
345 |     return function(t) {
346 |       return [e * (t *= .1) * Math.cos(t), t * Math.sin(t)];
347 |     };
348 |   }
349 | 
350 |   function rectangularSpiral(size) {
351 |     var dy = 4,
352 |         dx = dy * size[0] / size[1],
353 |         x = 0,
354 |         y = 0;
355 |     return function(t) {
356 |       var sign = t < 0 ? -1 : 1;
357 |       // See triangular numbers: T_n = n * (n + 1) / 2.
358 |       switch ((Math.sqrt(1 + 4 * sign * t) - sign) & 3) {
359 |         case 0:  x += dx; break;
360 |         case 1:  y += dy; break;
361 |         case 2:  x -= dx; break;
362 |         default: y -= dy; break;
363 |       }
364 |       return [x, y];
365 |     };
366 |   }
367 | 
368 |   // TODO reuse arrays?
369 |   function zeroArray(n) {
370 |     var a = [],
371 |         i = -1;
372 |     while (++i < n) a[i] = 0;
373 |     return a;
374 |   }
375 | 
376 |   var cloudRadians = Math.PI / 180,
377 |       cw = 1 << 11 >> 5,
378 |       ch = 1 << 11,
379 |       canvas,
380 |       ratio = 1;
381 | 
382 |   if (typeof document !== "undefined") {
383 |     canvas = document.createElement("canvas");
384 |     canvas.width = 1;
385 |     canvas.height = 1;
386 |     ratio = Math.sqrt(canvas.getContext("2d").getImageData(0, 0, 1, 1).data.length >> 2);
387 |     canvas.width = (cw << 5) / ratio;
388 |     canvas.height = ch / ratio;
389 |   } else {
390 |     // node-canvas support
391 |     var Canvas = require("canvas");
392 |     canvas = new Canvas(cw << 5, ch);
393 |   }
394 | 
395 |   var c = canvas.getContext("2d"),
396 |       spirals = {
397 |         archimedean: archimedeanSpiral,
398 |         rectangular: rectangularSpiral
399 |       };
400 |   c.fillStyle = "red";
401 |   c.textAlign = "center";
402 | 
403 |   exports.cloud = cloud;
404 | })(typeof exports === "undefined" ? d3.layout || (d3.layout = {}) : exports);
405 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gbasic.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | import time
 3 | import zlib
 4 | 
 5 | howmany = int(input("How many to dump? "))
 6 | 
 7 | conn = sqlite3.connect('index.sqlite')
 8 | cur = conn.cursor()
 9 | 
10 | cur.execute('SELECT id, sender FROM Senders')
11 | senders = dict()
12 | for message_row in cur :
13 |     senders[message_row[0]] = message_row[1]
14 | 
15 | cur.execute('SELECT id, subject FROM Subjects')
16 | subjects = dict()
17 | for message_row in cur :
18 |     subjects[message_row[0]] = message_row[1]
19 | 
20 | # cur.execute('SELECT id, guid,sender_id,subject_id,headers,body FROM Messages')
21 | cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
22 | messages = dict()
23 | for message_row in cur :
24 |     messages[message_row[0]] = (message_row[1],message_row[2],message_row[3],message_row[4])
25 | 
26 | print("Loaded messages=",len(messages),"subjects=",len(subjects),"senders=",len(senders))
27 | 
28 | sendcounts = dict()
29 | sendorgs = dict()
30 | for (message_id, message) in list(messages.items()):
31 |     sender = message[1]
32 |     sendcounts[sender] = sendcounts.get(sender,0) + 1
33 |     pieces = senders[sender].split("@")
34 |     if len(pieces) != 2 : continue
35 |     dns = pieces[1]
36 |     sendorgs[dns] = sendorgs.get(dns,0) + 1
37 | 
38 | print('')
39 | print('Top',howmany,'Email list participants')
40 | 
41 | x = sorted(sendcounts, key=sendcounts.get, reverse=True)
42 | for k in x[:howmany]:
43 |     print(senders[k], sendcounts[k])
44 |     if sendcounts[k] < 10 : break
45 | 
46 | print('')
47 | print('Top',howmany,'Email list organizations')
48 | 
49 | x = sorted(sendorgs, key=sendorgs.get, reverse=True)
50 | for k in x[:howmany]:
51 |     print(k, sendorgs[k])
52 |     if sendorgs[k] < 10 : break
53 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gbasic.py.running.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gbasic.py.running.jpg


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gbasic.py.running2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gbasic.py.running2.jpg


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gline.htm:
--------------------------------------------------------------------------------
 1 | <html>
 2 |   <head>
 3 |     <script type="text/javascript" src="gline.js"></script>
 4 |     <script type="text/javascript" src="https://www.google.com/jsapi"></script>
 5 |     <script type="text/javascript">
 6 |       google.load("visualization", "1", {packages:["corechart"]});
 7 |       google.setOnLoadCallback(drawChart);
 8 |       function drawChart() {
 9 |         var data = google.visualization.arrayToDataTable( gline );
10 | 
11 |         var options = {
12 |           title: 'Sakai Developer Email Participation by Organization'
13 |         };
14 | 
15 |         var chart = new google.visualization.LineChart(document.getElementById('chart_div'));
16 |         chart.draw(data, options);
17 |       }
18 |     </script>
19 |   </head>
20 |   <body>
21 |     <div id="chart_div" style="width: 1400px; height: 800px;"></div>
22 |   </body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gline.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gline.jpg


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gline.js:
--------------------------------------------------------------------------------
1 | gline = [ ['Year','umich.edu','indiana.edu','ucdavis.edu','ufp.pt','uct.ac.za','berkeley.edu','columbia.edu','etudes.org','gmail.com','mac.com'],
2 | ['2005-12',57,12,11,10,14,12,13,5,10,12],
3 | ['2006-01',93,29,28,29,25,25,22,26,16,12]
4 | ];
5 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gline.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | import time
 3 | import zlib
 4 | 
 5 | conn = sqlite3.connect('index.sqlite')
 6 | cur = conn.cursor()
 7 | 
 8 | cur.execute('SELECT id, sender FROM Senders')
 9 | senders = dict()
10 | for message_row in cur :
11 |     senders[message_row[0]] = message_row[1]
12 | 
13 | cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
14 | messages = dict()
15 | for message_row in cur :
16 |     messages[message_row[0]] = (message_row[1],message_row[2],message_row[3],message_row[4])
17 | 
18 | print("Loaded messages=",len(messages),"senders=",len(senders))
19 | 
20 | sendorgs = dict()
21 | for (message_id, message) in list(messages.items()):
22 |     sender = message[1]
23 |     pieces = senders[sender].split("@")
24 |     if len(pieces) != 2 : continue
25 |     dns = pieces[1]
26 |     sendorgs[dns] = sendorgs.get(dns,0) + 1
27 | 
28 | # pick the top schools
29 | orgs = sorted(sendorgs, key=sendorgs.get, reverse=True)
30 | orgs = orgs[:10]
31 | print("Top 10 Oranizations")
32 | print(orgs)
33 | 
34 | counts = dict()
35 | months = list()
36 | # cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
37 | for (message_id, message) in list(messages.items()):
38 |     sender = message[1]
39 |     pieces = senders[sender].split("@")
40 |     if len(pieces) != 2 : continue
41 |     dns = pieces[1]
42 |     if dns not in orgs : continue
43 |     month = message[3][:7]
44 |     if month not in months : months.append(month)
45 |     key = (month, dns)
46 |     counts[key] = counts.get(key,0) + 1
47 | 
48 | months.sort()
49 | # print counts
50 | # print months
51 | 
52 | fhand = open('gline.js','w')
53 | fhand.write("gline = [ ['Year'")
54 | for org in orgs:
55 |     fhand.write(",'"+org+"'")
56 | fhand.write("]")
57 | 
58 | for month in months:
59 |     fhand.write(",\n['"+month+"'")
60 |     for org in orgs:
61 |         key = (month, org)
62 |         val = counts.get(key,0)
63 |         fhand.write(","+str(val))
64 |     fhand.write("]");
65 | 
66 | fhand.write("\n];\n")
67 | fhand.close()
68 | 
69 | print("Output written to gline.js")
70 | print("Open gline.htm to visualize the data")
71 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gmane.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | import time
  3 | import ssl
  4 | import urllib.request, urllib.parse, urllib.error
  5 | from urllib.parse import urljoin
  6 | from urllib.parse import urlparse
  7 | import re
  8 | from datetime import datetime, timedelta
  9 | 
 10 | # Not all systems have this so conditionally define parser
 11 | try:
 12 |     import dateutil.parser as parser
 13 | except:
 14 |     pass
 15 | 
 16 | def parsemaildate(md) :
 17 |     # See if we have dateutil
 18 |     try:
 19 |         pdate = parser.parse(tdate)
 20 |         test_at = pdate.isoformat()
 21 |         return test_at
 22 |     except:
 23 |         pass
 24 | 
 25 |     # Non-dateutil version - we try our best
 26 | 
 27 |     pieces = md.split()
 28 |     notz = " ".join(pieces[:4]).strip()
 29 | 
 30 |     # Try a bunch of format variations - strptime() is *lame*
 31 |     dnotz = None
 32 |     for form in [ '%d %b %Y %H:%M:%S', '%d %b %Y %H:%M:%S',
 33 |         '%d %b %Y %H:%M', '%d %b %Y %H:%M', '%d %b %y %H:%M:%S',
 34 |         '%d %b %y %H:%M:%S', '%d %b %y %H:%M', '%d %b %y %H:%M' ] :
 35 |         try:
 36 |             dnotz = datetime.strptime(notz, form)
 37 |             break
 38 |         except:
 39 |             continue
 40 | 
 41 |     if dnotz is None :
 42 |         # print 'Bad Date:',md
 43 |         return None
 44 | 
 45 |     iso = dnotz.isoformat()
 46 | 
 47 |     tz = "+0000"
 48 |     try:
 49 |         tz = pieces[4]
 50 |         ival = int(tz) # Only want numeric timezone values
 51 |         if tz == '-0000' : tz = '+0000'
 52 |         tzh = tz[:3]
 53 |         tzm = tz[3:]
 54 |         tz = tzh+":"+tzm
 55 |     except:
 56 |         pass
 57 | 
 58 |     return iso+tz
 59 | 
 60 | # Ignore SSL certificate errors
 61 | ctx = ssl.create_default_context()
 62 | ctx.check_hostname = False
 63 | ctx.verify_mode = ssl.CERT_NONE
 64 | 
 65 | conn = sqlite3.connect('content.sqlite')
 66 | cur = conn.cursor()
 67 | 
 68 | baseurl = "http://mbox.dr-chuck.net/sakai.devel/"
 69 | 
 70 | cur.execute('''CREATE TABLE IF NOT EXISTS Messages
 71 |     (id INTEGER UNIQUE, email TEXT, sent_at TEXT,
 72 |      subject TEXT, headers TEXT, body TEXT)''')
 73 | 
 74 | # Pick up where we left off
 75 | start = None
 76 | cur.execute('SELECT max(id) FROM Messages' )
 77 | try:
 78 |     row = cur.fetchone()
 79 |     if row is None :
 80 |         start = 0
 81 |     else:
 82 |         start = row[0]
 83 | except:
 84 |     start = 0
 85 | 
 86 | if start is None : start = 0
 87 | 
 88 | many = 0
 89 | count = 0
 90 | fail = 0
 91 | while True:
 92 |     if ( many < 1 ) :
 93 |         conn.commit()
 94 |         sval = input('How many messages:')
 95 |         if ( len(sval) < 1 ) : break
 96 |         many = int(sval)
 97 | 
 98 |     start = start + 1
 99 |     cur.execute('SELECT id FROM Messages WHERE id=?', (start,) )
100 |     try:
101 |         row = cur.fetchone()
102 |         if row is not None : continue
103 |     except:
104 |         row = None
105 | 
106 |     many = many - 1
107 |     url = baseurl + str(start) + '/' + str(start + 1)
108 | 
109 |     text = "None"
110 |     try:
111 |         # Open with a timeout of 30 seconds
112 |         document = urllib.request.urlopen(url, None, 30, context=ctx)
113 |         text = document.read().decode()
114 |         if document.getcode() != 200 :
115 |             print("Error code=",document.getcode(), url)
116 |             break
117 |     except KeyboardInterrupt:
118 |         print('')
119 |         print('Program interrupted by user...')
120 |         break
121 |     except Exception as e:
122 |         print("Unable to retrieve or parse page",url)
123 |         print("Error",e)
124 |         fail = fail + 1
125 |         if fail > 5 : break
126 |         continue
127 | 
128 |     print(url,len(text))
129 |     count = count + 1
130 | 
131 |     if not text.startswith("From "):
132 |         print(text)
133 |         print("Did not find From ")
134 |         fail = fail + 1
135 |         if fail > 5 : break
136 |         continue
137 | 
138 |     pos = text.find("\n\n")
139 |     if pos > 0 :
140 |         hdr = text[:pos]
141 |         body = text[pos+2:]
142 |     else:
143 |         print(text)
144 |         print("Could not find break between headers and body")
145 |         fail = fail + 1
146 |         if fail > 5 : break
147 |         continue
148 | 
149 |     email = None
150 |     x = re.findall('\nFrom: .* <(\S+@\S+)>\n', hdr)
151 |     if len(x) == 1 :
152 |         email = x[0];
153 |         email = email.strip().lower()
154 |         email = email.replace("<","")
155 |     else:
156 |         x = re.findall('\nFrom: (\S+@\S+)\n', hdr)
157 |         if len(x) == 1 :
158 |             email = x[0];
159 |             email = email.strip().lower()
160 |             email = email.replace("<","")
161 | 
162 |     date = None
163 |     y = re.findall('\Date: .*, (.*)\n', hdr)
164 |     if len(y) == 1 :
165 |         tdate = y[0]
166 |         tdate = tdate[:26]
167 |         try:
168 |             sent_at = parsemaildate(tdate)
169 |         except:
170 |             print(text)
171 |             print("Parse fail",tdate)
172 |             fail = fail + 1
173 |             if fail > 5 : break
174 |             continue
175 | 
176 |     subject = None
177 |     z = re.findall('\Subject: (.*)\n', hdr)
178 |     if len(z) == 1 : subject = z[0].strip().lower();
179 | 
180 |     # Reset the fail counter
181 |     fail = 0
182 |     print("   ",email,sent_at,subject)
183 |     cur.execute('''INSERT OR IGNORE INTO Messages (id, email, sent_at, subject, headers, body)
184 |         VALUES ( ?, ?, ?, ?, ?, ? )''', ( start, email, sent_at, subject, hdr, body))
185 |     if count % 50 == 0 : conn.commit()
186 |     if count % 100 == 0 : time.sleep(1)
187 | 
188 | conn.commit()
189 | cur.close()
190 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gmodel.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | import time
  3 | import re
  4 | import zlib
  5 | from datetime import datetime, timedelta
  6 | 
  7 | # Not all systems have this
  8 | try:
  9 |     import dateutil.parser as parser
 10 | except:
 11 |     pass
 12 | 
 13 | dnsmapping = dict()
 14 | mapping = dict()
 15 | 
 16 | def fixsender(sender,allsenders=None) :
 17 |     global dnsmapping
 18 |     global mapping
 19 |     if sender is None : return None
 20 |     sender = sender.strip().lower()
 21 |     sender = sender.replace('<','').replace('>','')
 22 | 
 23 |     # Check if we have a hacked gmane.org from address
 24 |     if allsenders is not None and sender.endswith('gmane.org') :
 25 |         pieces = sender.split('-')
 26 |         realsender = None
 27 |         for s in allsenders:
 28 |             if s.startswith(pieces[0]) :
 29 |                 realsender = sender
 30 |                 sender = s
 31 |                 # print(realsender, sender)
 32 |                 break
 33 |         if realsender is None :
 34 |             for s in mapping:
 35 |                 if s.startswith(pieces[0]) :
 36 |                     realsender = sender
 37 |                     sender = mapping[s]
 38 |                     # print(realsender, sender)
 39 |                     break
 40 |         if realsender is None : sender = pieces[0]
 41 | 
 42 |     mpieces = sender.split("@")
 43 |     if len(mpieces) != 2 : return sender
 44 |     dns = mpieces[1]
 45 |     x = dns
 46 |     pieces = dns.split(".")
 47 |     if dns.endswith(".edu") or dns.endswith(".com") or dns.endswith(".org") or dns.endswith(".net") :
 48 |         dns = ".".join(pieces[-2:])
 49 |     else:
 50 |         dns = ".".join(pieces[-3:])
 51 |     # if dns != x : print(x,dns)
 52 |     # if dns != dnsmapping.get(dns,dns) : print(dns,dnsmapping.get(dns,dns))
 53 |     dns = dnsmapping.get(dns,dns)
 54 |     return mpieces[0] + '@' + dns
 55 | 
 56 | def parsemaildate(md) :
 57 |     # See if we have dateutil
 58 |     try:
 59 |         pdate = parser.parse(tdate)
 60 |         test_at = pdate.isoformat()
 61 |         return test_at
 62 |     except:
 63 |         pass
 64 | 
 65 |     # Non-dateutil version - we try our best
 66 | 
 67 |     pieces = md.split()
 68 |     notz = " ".join(pieces[:4]).strip()
 69 | 
 70 |     # Try a bunch of format variations - strptime() is *lame*
 71 |     dnotz = None
 72 |     for form in [ '%d %b %Y %H:%M:%S', '%d %b %Y %H:%M:%S',
 73 |         '%d %b %Y %H:%M', '%d %b %Y %H:%M', '%d %b %y %H:%M:%S',
 74 |         '%d %b %y %H:%M:%S', '%d %b %y %H:%M', '%d %b %y %H:%M' ] :
 75 |         try:
 76 |             dnotz = datetime.strptime(notz, form)
 77 |             break
 78 |         except:
 79 |             continue
 80 | 
 81 |     if dnotz is None :
 82 |         # print('Bad Date:',md)
 83 |         return None
 84 | 
 85 |     iso = dnotz.isoformat()
 86 | 
 87 |     tz = "+0000"
 88 |     try:
 89 |         tz = pieces[4]
 90 |         ival = int(tz) # Only want numeric timezone values
 91 |         if tz == '-0000' : tz = '+0000'
 92 |         tzh = tz[:3]
 93 |         tzm = tz[3:]
 94 |         tz = tzh+":"+tzm
 95 |     except:
 96 |         pass
 97 | 
 98 |     return iso+tz
 99 | 
100 | # Parse out the info...
101 | def parseheader(hdr, allsenders=None):
102 |     if hdr is None or len(hdr) < 1 : return None
103 |     sender = None
104 |     x = re.findall('\nFrom: .* <(\S+@\S+)>\n', hdr)
105 |     if len(x) >= 1 :
106 |         sender = x[0]
107 |     else:
108 |         x = re.findall('\nFrom: (\S+@\S+)\n', hdr)
109 |         if len(x) >= 1 :
110 |             sender = x[0]
111 | 
112 |     # normalize the domain name of Email addresses
113 |     sender = fixsender(sender, allsenders)
114 | 
115 |     date = None
116 |     y = re.findall('\nDate: .*, (.*)\n', hdr)
117 |     sent_at = None
118 |     if len(y) >= 1 :
119 |         tdate = y[0]
120 |         tdate = tdate[:26]
121 |         try:
122 |             sent_at = parsemaildate(tdate)
123 |         except Exception as e:
124 |             # print('Date ignored ',tdate, e)
125 |             return None
126 | 
127 |     subject = None
128 |     z = re.findall('\nSubject: (.*)\n', hdr)
129 |     if len(z) >= 1 : subject = z[0].strip().lower()
130 | 
131 |     guid = None
132 |     z = re.findall('\nMessage-ID: (.*)\n', hdr)
133 |     if len(z) >= 1 : guid = z[0].strip().lower()
134 | 
135 |     if sender is None or sent_at is None or subject is None or guid is None :
136 |         return None
137 |     return (guid, sender, subject, sent_at)
138 | 
139 | conn = sqlite3.connect('index.sqlite')
140 | cur = conn.cursor()
141 | 
142 | cur.execute('''DROP TABLE IF EXISTS Messages ''')
143 | cur.execute('''DROP TABLE IF EXISTS Senders ''')
144 | cur.execute('''DROP TABLE IF EXISTS Subjects ''')
145 | cur.execute('''DROP TABLE IF EXISTS Replies ''')
146 | 
147 | cur.execute('''CREATE TABLE IF NOT EXISTS Messages
148 |     (id INTEGER PRIMARY KEY, guid TEXT UNIQUE, sent_at INTEGER,
149 |      sender_id INTEGER, subject_id INTEGER,
150 |      headers BLOB, body BLOB)''')
151 | cur.execute('''CREATE TABLE IF NOT EXISTS Senders
152 |     (id INTEGER PRIMARY KEY, sender TEXT UNIQUE)''')
153 | cur.execute('''CREATE TABLE IF NOT EXISTS Subjects
154 |     (id INTEGER PRIMARY KEY, subject TEXT UNIQUE)''')
155 | cur.execute('''CREATE TABLE IF NOT EXISTS Replies
156 |     (from_id INTEGER, to_id INTEGER)''')
157 | 
158 | conn_1 = sqlite3.connect('mapping.sqlite')
159 | cur_1 = conn_1.cursor()
160 | 
161 | cur_1.execute('''SELECT old,new FROM DNSMapping''')
162 | for message_row in cur_1 :
163 |     dnsmapping[message_row[0].strip().lower()] = message_row[1].strip().lower()
164 | 
165 | mapping = dict()
166 | cur_1.execute('''SELECT old,new FROM Mapping''')
167 | for message_row in cur_1 :
168 |     old = fixsender(message_row[0])
169 |     new = fixsender(message_row[1])
170 |     mapping[old] = fixsender(new)
171 | 
172 | # Done with mapping.sqlite
173 | conn_1.close()
174 | 
175 | # Open the main content (Read only)
176 | conn_1 = sqlite3.connect('file:content.sqlite?mode=ro', uri=True)
177 | cur_1 = conn_1.cursor()
178 | 
179 | allsenders = list()
180 | cur_1.execute('''SELECT email FROM Messages''')
181 | for message_row in cur_1 :
182 |     sender = fixsender(message_row[0])
183 |     if sender is None : continue
184 |     if 'gmane.org' in sender : continue
185 |     if sender in allsenders: continue
186 |     allsenders.append(sender)
187 | 
188 | print("Loaded allsenders",len(allsenders),"and mapping",len(mapping),"dns mapping",len(dnsmapping))
189 | 
190 | cur_1.execute('''SELECT headers, body, sent_at
191 |     FROM Messages ORDER BY sent_at''')
192 | 
193 | senders = dict()
194 | subjects = dict()
195 | guids = dict()
196 | 
197 | count = 0
198 | 
199 | for message_row in cur_1 :
200 |     hdr = message_row[0]
201 |     parsed = parseheader(hdr, allsenders)
202 |     if parsed is None: continue
203 |     (guid, sender, subject, sent_at) = parsed
204 | 
205 |     # Apply the sender mapping
206 |     sender = mapping.get(sender,sender)
207 | 
208 |     count = count + 1
209 |     if count % 250 == 1 : print(count,sent_at, sender)
210 |     # print(guid, sender, subject, sent_at)
211 | 
212 |     if 'gmane.org' in sender:
213 |         print("Error in sender ===", sender)
214 | 
215 |     sender_id = senders.get(sender,None)
216 |     subject_id = subjects.get(subject,None)
217 |     guid_id = guids.get(guid,None)
218 | 
219 |     if sender_id is None :
220 |         cur.execute('INSERT OR IGNORE INTO Senders (sender) VALUES ( ? )', ( sender, ) )
221 |         conn.commit()
222 |         cur.execute('SELECT id FROM Senders WHERE sender=? LIMIT 1', ( sender, ))
223 |         try:
224 |             row = cur.fetchone()
225 |             sender_id = row[0]
226 |             senders[sender] = sender_id
227 |         except:
228 |             print('Could not retrieve sender id',sender)
229 |             break
230 |     if subject_id is None :
231 |         cur.execute('INSERT OR IGNORE INTO Subjects (subject) VALUES ( ? )', ( subject, ) )
232 |         conn.commit()
233 |         cur.execute('SELECT id FROM Subjects WHERE subject=? LIMIT 1', ( subject, ))
234 |         try:
235 |             row = cur.fetchone()
236 |             subject_id = row[0]
237 |             subjects[subject] = subject_id
238 |         except:
239 |             print('Could not retrieve subject id',subject)
240 |             break
241 |     # print(sender_id, subject_id)
242 |     cur.execute('INSERT OR IGNORE INTO Messages (guid,sender_id,subject_id,sent_at,headers,body) VALUES ( ?,?,?,datetime(?),?,? )',
243 |             ( guid, sender_id, subject_id, sent_at,
244 |             zlib.compress(message_row[0].encode()), zlib.compress(message_row[1].encode())) )
245 |     conn.commit()
246 |     cur.execute('SELECT id FROM Messages WHERE guid=? LIMIT 1', ( guid, ))
247 |     try:
248 |         row = cur.fetchone()
249 |         message_id = row[0]
250 |         guids[guid] = message_id
251 |     except:
252 |         print('Could not retrieve guid id',guid)
253 |         break
254 | 
255 | cur.close()
256 | cur_1.close()
257 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gmodel.py.running.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gmodel.py.running.jpg


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gword.htm:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <meta charset="utf-8">
 3 | <script src="d3.v2.js"></script>
 4 | <script src="d3.layout.cloud.js"></script>
 5 | <script src="gword.js"></script>
 6 | <body>
 7 | <script>
 8 |   var fill = d3.scale.category20();
 9 | 
10 |   d3.layout.cloud().size([700, 700])
11 |       .words(gword)
12 |       .rotate(function() { return ~~(Math.random() * 2) * 90; })
13 |       .font("Impact")
14 |       .fontSize(function(d) { return d.size; })
15 |       .on("end", draw)
16 |       .start();
17 | 
18 |   function draw(words) {
19 |     d3.select("body").append("svg")
20 |         .attr("width", 700)
21 |         .attr("height", 700)
22 |       .append("g")
23 |         .attr("transform", "translate(350,350)")
24 |       .selectAll("text")
25 |         .data(words)
26 |       .enter().append("text")
27 |         .style("font-size", function(d) { return d.size + "px"; })
28 |         .style("font-family", "Impact")
29 |         .style("fill", function(d, i) { return fill(i); })
30 |         .attr("text-anchor", "middle")
31 |         .attr("transform", function(d) {
32 |           return "translate(" + [d.x, d.y] + ")rotate(" + d.rotate + ")";
33 |         })
34 |         .text(function(d) { return d.text; });
35 |   }
36 | </script>
37 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gword.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gword.jpg


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gword.js:
--------------------------------------------------------------------------------
  1 | gword = [{text: 'sakai', size: 100},
  2 | {text: 'with', size: 38},
  3 | {text: 'tool', size: 36},
  4 | {text: 'error', size: 35},
  5 | {text: 'webdav', size: 34},
  6 | {text: 'resources', size: 32},
  7 | {text: 'mysql', size: 30},
  8 | {text: 'problems', size: 29},
  9 | {text: 'changes', size: 28},
 10 | {text: 'problem', size: 26},
 11 | {text: 'working', size: 25},
 12 | {text: 'message', size: 25},
 13 | {text: 'into', size: 24},
 14 | {text: 'content', size: 24},
 15 | {text: 'site', size: 24},
 16 | {text: 'workspace', size: 24},
 17 | {text: 'melete', size: 24},
 18 | {text: 'course', size: 23},
 19 | {text: 'broken', size: 23},
 20 | {text: 'from', size: 23},
 21 | {text: 'password', size: 23},
 22 | {text: 'forgotten', size: 23},
 23 | {text: 'feature', size: 23},
 24 | {text: 'profile', size: 23},
 25 | {text: 'rutgers', size: 23},
 26 | {text: 'accessservlet', size: 23},
 27 | {text: 'aliases', size: 23},
 28 | {text: 'unexpectedly', size: 23},
 29 | {text: 'taken', size: 23},
 30 | {text: 'portalxlogin', size: 23},
 31 | {text: 'samigo', size: 23},
 32 | {text: 'oracle', size: 23},
 33 | {text: 'eclipse', size: 23},
 34 | {text: 'view', size: 23},
 35 | {text: 'tools', size: 23},
 36 | {text: 'update', size: 23},
 37 | {text: 'version', size: 23},
 38 | {text: 'maven', size: 22},
 39 | {text: 'email', size: 22},
 40 | {text: 'center', size: 22},
 41 | {text: 'jforum', size: 22},
 42 | {text: 'files', size: 22},
 43 | {text: 'syllabus', size: 22},
 44 | {text: 'desktop', size: 21},
 45 | {text: 'connection', size: 21},
 46 | {text: 'file', size: 21},
 47 | {text: 'worksite', size: 21},
 48 | {text: 'portal', size: 21},
 49 | {text: 'visual', size: 21},
 50 | {text: 'basic', size: 21},
 51 | {text: 'different', size: 21},
 52 | {text: 'missing', size: 21},
 53 | {text: 'upload', size: 21},
 54 | {text: 'importing', size: 21},
 55 | {text: 'option', size: 21},
 56 | {text: 'information', size: 21},
 57 | {text: 'creating', size: 21},
 58 | {text: 'staleobjectstateexception', size: 21},
 59 | {text: 'updating', size: 21},
 60 | {text: 'sakaiiframemyworkspace', size: 21},
 61 | {text: 'memory', size: 20},
 62 | {text: 'collab', size: 20},
 63 | {text: 'code', size: 20},
 64 | {text: 'section', size: 20},
 65 | {text: 'question', size: 20},
 66 | {text: 'status', size: 20},
 67 | {text: 'production', size: 20},
 68 | {text: 'extending', size: 20},
 69 | {text: 'javaxsqlbasedatasource', size: 20},
 70 | {text: 'apis', size: 20},
 71 | {text: 'wiki', size: 20},
 72 | {text: 'using', size: 20},
 73 | {text: 'tests', size: 20},
 74 | {text: 'branch', size: 20},
 75 | {text: 'permissions', size: 20},
 76 | {text: 'support', size: 20},
 77 | {text: 'size', size: 20},
 78 | {text: 'page', size: 20},
 79 | {text: 'users', size: 20},
 80 | {text: 'sakaiperson', size: 20},
 81 | {text: 'database', size: 20},
 82 | {text: 'casfilter', size: 20},
 83 | {text: 'html', size: 20},
 84 | {text: 'editors', size: 20},
 85 | {text: 'reordering', size: 20},
 86 | {text: 'suppressing', size: 20},
 87 | {text: 'annoying', size: 20},
 88 | {text: 'macos', size: 20},
 89 | {text: 'limit', size: 20},
 90 | {text: 'exceeded', size: 20},
 91 | {text: 'without', size: 20},
 92 | {text: 'uploading', size: 20},
 93 | {text: 'documentation', size: 20},
 94 | {text: 'provider', size: 20},
 95 | {text: 'cannot', size: 20},
 96 | {text: 'development', size: 20},
 97 | {text: 'sakaiscript', size: 20},
 98 | {text: 'again', size: 20},
 99 | {text: 'assigning', size: 20},
100 | {text: 'quota', size: 20}
101 | ];
102 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gword.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | import time
 3 | import zlib
 4 | import string
 5 | 
 6 | conn = sqlite3.connect('index.sqlite')
 7 | cur = conn.cursor()
 8 | 
 9 | cur.execute('SELECT id, subject FROM Subjects')
10 | subjects = dict()
11 | for message_row in cur :
12 |     subjects[message_row[0]] = message_row[1]
13 | 
14 | # cur.execute('SELECT id, guid,sender_id,subject_id,headers,body FROM Messages')
15 | cur.execute('SELECT subject_id FROM Messages')
16 | counts = dict()
17 | for message_row in cur :
18 |     text = subjects[message_row[0]]
19 |     text = text.translate(str.maketrans('','',string.punctuation))
20 |     text = text.translate(str.maketrans('','','1234567890'))
21 |     text = text.strip()
22 |     text = text.lower()
23 |     words = text.split()
24 |     for word in words:
25 |         if len(word) < 4 : continue
26 |         counts[word] = counts.get(word,0) + 1
27 | 
28 | x = sorted(counts, key=counts.get, reverse=True)
29 | highest = None
30 | lowest = None
31 | for k in x[:100]:
32 |     if highest is None or highest < counts[k] :
33 |         highest = counts[k]
34 |     if lowest is None or lowest > counts[k] :
35 |         lowest = counts[k]
36 | print('Range of counts:',highest,lowest)
37 | 
38 | # Spread the font sizes across 20-100 based on the count
39 | bigsize = 80
40 | smallsize = 20
41 | 
42 | fhand = open('gword.js','w')
43 | fhand.write("gword = [")
44 | first = True
45 | for k in x[:100]:
46 |     if not first : fhand.write( ",\n")
47 |     first = False
48 |     size = counts[k]
49 |     size = (size - lowest) / float(highest - lowest)
50 |     size = int((size * bigsize) + smallsize)
51 |     fhand.write("{text: '"+k+"', size: "+str(size)+"}")
52 | fhand.write( "\n];\n")
53 | fhand.close()
54 | 
55 | print("Output written to gword.js")
56 | print("Open gword.htm in a browser to see the vizualization")
57 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/gyear.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | import time
 3 | import urllib.request, urllib.parse, urllib.error
 4 | import zlib
 5 | 
 6 | conn = sqlite3.connect('index.sqlite')
 7 | cur = conn.cursor()
 8 | 
 9 | cur.execute('SELECT id, sender FROM Senders')
10 | senders = dict()
11 | for message_row in cur :
12 |     senders[message_row[0]] = message_row[1]
13 | 
14 | cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
15 | messages = dict()
16 | for message_row in cur :
17 |     messages[message_row[0]] = (message_row[1],message_row[2],message_row[3],message_row[4])
18 | 
19 | print("Loaded messages=",len(messages),"senders=",len(senders))
20 | 
21 | sendorgs = dict()
22 | for (message_id, message) in list(messages.items()):
23 |     sender = message[1]
24 |     pieces = senders[sender].split("@")
25 |     if len(pieces) != 2 : continue
26 |     dns = pieces[1]
27 |     sendorgs[dns] = sendorgs.get(dns,0) + 1
28 | 
29 | # pick the top schools
30 | orgs = sorted(sendorgs, key=sendorgs.get, reverse=True)
31 | orgs = orgs[:10]
32 | print("Top 10 Oranizations")
33 | print(orgs)
34 | # orgs = ['total'] + orgs
35 | 
36 | counts = dict()
37 | months = list()
38 | # cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
39 | for (message_id, message) in list(messages.items()):
40 |     sender = message[1]
41 |     pieces = senders[sender].split("@")
42 |     if len(pieces) != 2 : continue
43 |     dns = pieces[1]
44 |     if dns not in orgs : continue
45 |     month = message[3][:4]
46 |     if month not in months : months.append(month)
47 |     key = (month, dns)
48 |     counts[key] = counts.get(key,0) + 1
49 |     tkey = (month, 'total')
50 |     counts[tkey] = counts.get(tkey,0) + 1
51 |     
52 | months.sort()
53 | # print counts
54 | # print months
55 | 
56 | fhand = open('gline.js','w')
57 | fhand.write("gline = [ ['Year'")
58 | for org in orgs:
59 |     fhand.write(",'"+org+"'")
60 | fhand.write("]")
61 | 
62 | for month in months[1:-1]:
63 |     fhand.write(",\n['"+month+"'")
64 |     for org in orgs:
65 |         key = (month, org)
66 |         val = counts.get(key,0)
67 |         fhand.write(","+str(val))
68 |     fhand.write("]");
69 | 
70 | fhand.write("\n];\n")
71 | fhand.close()
72 | 
73 | print("Output written to gline.js")
74 | print("Open gline.htm to visualize the data")
75 | 
76 | 


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/index.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/index.sqlite


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/index.sqlite.second.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/index.sqlite.second.jpg


--------------------------------------------------------------------------------
/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/mapping.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Course 5 - Capstone - Retrieving, processing and visualising data with Python/ex18/gmane/mapping.sqlite


--------------------------------------------------------------------------------
/Python for Everybody.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thmstm/py4e/1ac22d81d43c30f09afff4ad0b53296f3bcf45f1/Python for Everybody.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # py4e
2 | Coursera - Python for Everybody codes
3 | https://www.coursera.org/specializations/python
4 | 


--------------------------------------------------------------------------------