├── 01-simple.py
├── 02-basics.py
├── 03-api.py
├── 04-web-scraping.py
├── 05-pandas.py
├── README.md
├── drinks.csv
├── u.data
└── u.item


/01-simple.py:
--------------------------------------------------------------------------------
1 | a = 5
2 | b = 3
3 | print a + b
4 | 


--------------------------------------------------------------------------------
/02-basics.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Multi-line comments go between 3 quotation marks.
 3 | You can use single or double quotes.
 4 | '''
 5 | 
 6 | # One-line comments are preceded by the pound symbol
 7 | 
 8 | 
 9 | # BASIC DATA TYPES
10 | 
11 | x = 5               # creates an object
12 | print type(x)       # check the type: int (not declared explicitly)
13 | type(x)             # automatically prints
14 | type(5)             # assigning it to a variable is not required
15 | 
16 | type(5.0)           # float
17 | type('five')        # str
18 | type(True)          # bool
19 | 
20 | 
21 | # LISTS
22 | 
23 | nums = [5, 5.0, 'five']     # multiple data types
24 | nums                        # print the list
25 | type(nums)                  # check the type: list
26 | len(nums)                   # check the length: 3
27 | nums[0]                     # print first element
28 | nums[0] = 6                 # replace a list element
29 | 
30 | nums.append(7)              # list 'method' that modifies the list
31 | help(nums.append)           # help on this method
32 | help(nums)                  # help on a list object
33 | nums.remove('five')         # another list method
34 | 
35 | sorted(nums)                # 'function' that does not modify the list
36 | nums                        # it was not affected
37 | nums = sorted(nums)         # overwrite the original list
38 | sorted(nums, reverse=True)  # optional argument
39 | 
40 | 
41 | # FUNCTIONS
42 | 
43 | def give_me_five():         # function definition ends with colon
44 |     return 5                # indentation required for function body
45 | 
46 | give_me_five()              # prints the return value (5)
47 | num = give_me_five()        # assigns return value to a variable, doesn't print it
48 | 
49 | def calc(x, y, op):         # three arguments (without any defaults)
50 |     if op == 'add':         # conditional statement
51 |         return x + y
52 |     elif op == 'subtract':
53 |         return x - y
54 |     else:
55 |         print 'Valid operations: add, subtract'
56 | 
57 | calc(5, 3, 'add')
58 | calc(5, 3, 'subtract')
59 | calc(5, 3, 'multiply')
60 | calc(5, 3)
61 | 


--------------------------------------------------------------------------------
/03-api.py:
--------------------------------------------------------------------------------
 1 | import requests     # import module (make its functions available)
 2 | 
 3 | # request the data from the API
 4 | r = requests.get('http://developer.echonest.com/api/v4/artist/top_hottt?api_key=YOUR_API_KEY&format=json')
 5 | top = r.json()      # decode JSON and store in a dictionary
 6 | 
 7 | # pretty print for easier readability
 8 | import pprint
 9 | pprint.pprint(top)
10 | 
11 | # pull out the artist data
12 | artists = top['response']['artists']    # list of 15 dictionaries
13 | 
14 | # reformat data into a table structure
15 | artists_header = artists[0].keys()                      # list of 2 strings
16 | artists_data = [artist.values() for artist in artists]  # list of 15 lists
17 | 


--------------------------------------------------------------------------------
/04-web-scraping.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | 
 4 | # read in a page and convert requests text into 'soup' object
 5 | r = requests.get('http://www.chicagoreader.com/chicago/best-of-chicago-2011-food-drink/BestOf?oid=4106228')
 6 | soup = BeautifulSoup(r.text)
 7 | 
 8 | # find the section of relevant links and then parse into iterable rows
 9 | links_section = soup.find(name='dl', attrs={'class':'boccat'})
10 | link_rows = links_section.find_all(name='dd')
11 | 
12 | # create a list of category links
13 | category_links = ['http://chicagoreader.com' + row.a['href'] for row in link_rows]
14 | 
15 | # function that takes a link and returns a dictionary of info about that page
16 | def get_category_winners(category_link):
17 |     r = requests.get(category_link)
18 |     soup = BeautifulSoup(r.text)
19 |     return {"category":     soup.find(name='h1', attrs={'class':'headline'}).string,
20 |             "winners":      [h2.string for h2 in soup.find_all(name='h2', attrs={'class':'boc1'})],
21 |             "runners_up":   [h2.string for h2 in soup.find_all(name='h2', attrs={'class':'boc2'})]
22 |             }
23 | 
24 | # create list of dictionaries for the first three links
25 | from time import sleep
26 | winners = []
27 | for category_link in category_links[0:3]:
28 |     winners.append(get_category_winners(category_link))
29 |     print '.'
30 |     sleep(1)
31 | 
32 | # 'pretty print' the winners data
33 | from pprint import pprint
34 | pprint(winners)
35 | 


--------------------------------------------------------------------------------
/05-pandas.py:
--------------------------------------------------------------------------------
  1 | # imports
  2 | import pandas as pd
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | 
  7 | '''
  8 | Pandas Basics: Reading Files, Summarizing, Handling Missing Values, Filtering, Sorting
  9 | '''
 10 | 
 11 | # read in the CSV file from a URL
 12 | drinks = pd.read_csv('drinks.csv')
 13 | drinks = pd.read_csv('https://raw.githubusercontent.com/justmarkham/python-data-analysis-workshop/master/drinks.csv')
 14 | type(drinks)
 15 | 
 16 | # examine the data
 17 | drinks                  # print the first 30 and last 30 rows
 18 | drinks.head()           # print the first 5 rows
 19 | drinks.tail()           # print the last 5 rows
 20 | drinks.describe()       # describe any numeric columns
 21 | drinks.info()           # concise summary
 22 | 
 23 | # find missing values in a DataFrame
 24 | drinks.isnull()         # DataFrame of booleans
 25 | drinks.isnull().sum()   # convert booleans to integers and add
 26 | 
 27 | # handling missing values
 28 | drinks.dropna()             # drop a row if ANY values are missing
 29 | drinks.fillna(value='NA')   # fill in missing values
 30 | 
 31 | # fix the original import
 32 | drinks = pd.read_csv('https://raw.githubusercontent.com/justmarkham/python-data-analysis-workshop/master/drinks.csv', na_filter=False)
 33 | drinks.isnull().sum()
 34 | 
 35 | # selecting a column ('Series')
 36 | drinks['continent']
 37 | drinks.continent            # equivalent
 38 | type(drinks.continent)
 39 | 
 40 | # summarizing a non-numeric column
 41 | drinks.continent.describe()
 42 | drinks.continent.value_counts()
 43 | 
 44 | # selecting multiple columns
 45 | drinks[['country', 'beer_servings']]
 46 | my_cols = ['country', 'beer_servings']
 47 | drinks[my_cols]
 48 | 
 49 | # add a new column as a function of existing columns
 50 | drinks['total_servings'] = drinks.beer_servings + drinks.spirit_servings + drinks.wine_servings
 51 | drinks.head()
 52 | 
 53 | # logical filtering and sorting
 54 | drinks[drinks.continent=='NA']
 55 | drinks[['country', 'total_servings']][drinks.continent=='NA']
 56 | drinks[['country', 'total_servings']][drinks.continent=='NA'].sort_index(by='total_servings')
 57 | drinks[drinks.wine_servings > 300]
 58 | drinks[drinks.wine_servings > drinks.beer_servings]
 59 | drinks.sort_index(by='beer_servings').tail()
 60 | drinks.beer_servings[drinks.continent=='NA'].mean()
 61 | drinks.beer_servings[drinks.continent=='EU'].mean()
 62 | 
 63 | 
 64 | '''
 65 | Split-Apply-Combine
 66 | '''
 67 | 
 68 | # for each continent, calculate mean beer servings
 69 | drinks.groupby('continent').beer_servings.mean()
 70 | 
 71 | # for each continent, count number of occurrences
 72 | drinks.groupby('continent').continent.count()
 73 | drinks.continent.value_counts()
 74 | 
 75 | # for each continent, calculate the min, max, and range for total servings
 76 | drinks.groupby('continent').total_servings.min()
 77 | drinks.groupby('continent').total_servings.max()
 78 | drinks.groupby('continent').total_servings.apply(lambda x: x.max() - x.min())
 79 | 
 80 | 
 81 | '''
 82 | Plotting
 83 | '''
 84 | 
 85 | # bar plot of number of countries in each continent
 86 | drinks.continent.value_counts().plot(kind='bar', title='Countries per Continent')
 87 | plt.xlabel('Continent')
 88 | plt.ylabel('Count')
 89 | plt.show()
 90 | 
 91 | # bar plot of average number of beer servings by continent
 92 | drinks.groupby('continent').beer_servings.mean().plot(kind='bar')
 93 | 
 94 | # histogram of beer servings
 95 | drinks.beer_servings.hist(bins=20)
 96 | 
 97 | # grouped histogram of beer servings
 98 | drinks.beer_servings.hist(by=drinks.continent, sharex=True)
 99 | 
100 | # density plot of beer servings
101 | drinks.beer_servings.plot(kind='density', xlim=(0,500))
102 | 
103 | # boxplot of beer servings by continent
104 | drinks.boxplot(column='beer_servings', by='continent')
105 | 
106 | # scatterplot of beer servings versus wine servings
107 | drinks.plot(x='beer_servings', y='wine_servings', kind='scatter', alpha=0.3)
108 | 
109 | # same scatterplot, except all European countries are colored red
110 | colors = np.where(drinks.continent=='EU', 'r', 'b')
111 | drinks.plot(x='beer_servings', y='wine_servings', kind='scatter', c=colors)
112 | 
113 | 
114 | '''
115 | Joining Data
116 | '''
117 | 
118 | # read 'u.data' into 'ratings'
119 | r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
120 | ratings = pd.read_table('u.data', header=None, names=r_cols, sep='\t')
121 | 
122 | # read 'u.item' into 'movies'
123 | m_cols = ['movie_id', 'title']
124 | movies = pd.read_table('u.item', header=None, names=m_cols, sep='|', usecols=[0,1])
125 | 
126 | # merge 'movies' and 'ratings' (inner join on 'movie_id')
127 | movies.head()
128 | ratings.head()
129 | movie_ratings = pd.merge(movies, ratings)
130 | movie_ratings.head()
131 | 
132 | 
133 | '''
134 | Further Exploration
135 | '''
136 | 
137 | # for each movie, count number of ratings
138 | movie_ratings.title.value_counts()
139 | 
140 | # for each movie, calculate mean rating
141 | movie_ratings.groupby('title').rating.mean().order(ascending=False)
142 | 
143 | # for each movie, count number of ratings and calculate mean rating
144 | movie_ratings.groupby('title').rating.count()
145 | movie_ratings.groupby('title').rating.mean()
146 | movie_stats = movie_ratings.groupby('title').agg({'rating': [np.size, np.mean]})
147 | movie_stats.head()  # hierarchical index
148 | 
149 | # limit results to movies with more than 100 ratings
150 | movie_stats[movie_stats.rating.size > 100].sort_index(by=('rating', 'mean'))
151 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Intro to Python for Data Analysis
 2 | 
 3 | [Workshop](https://generalassemb.ly/education/intro-to-python-for-data-analysis/washington-dc/8161) at General Assembly (Washington, DC) on October 20, 2014.
 4 | 
 5 | Instructor: [Kevin Markham](http://www.dataschool.io/about/)
 6 | 
 7 | ### Agenda
 8 | 
 9 | 1. Why Python? (10 min.)
10 |     * Characteristics of Python
11 |     * Python vs. R
12 |     * Why [Anaconda](http://continuum.io/downloads)?
13 | 2. Just Enough Python Basics (45 min.)
14 |     * Python interpreter (aka "Python shell"), [IPython shell](http://ipython.org/ipython-doc/stable/index.html)
15 |     * Running a simple script ([code](01-simple.py))
16 |     * [Spyder IDE](https://code.google.com/p/spyderlib/)
17 |     * Exploring data types, lists, functions ([code](02-basics.py))
18 | 3. Getting Data (20 min.)
19 |     * [Public datasets](https://github.com/justmarkham/DAT3/blob/master/public_data.md) in structured formats
20 |     * Accessing APIs ([code](03-api.py), [documentation](http://developer.echonest.com/))
21 |     * Scraping websites ([code](04-web-scraping.py), [pages](http://www.chicagoreader.com/chicago/best-of-chicago-2011-food-drink/BestOf?oid=4106228))
22 | 4. Looking at Data (5 min.)
23 |     * Data from FiveThirtyEight ([GitHub repository](https://github.com/fivethirtyeight/data))
24 |     * Alcohol consumption ([article](http://fivethirtyeight.com/datalab/dear-mona-followup-where-do-people-drink-the-most-beer-wine-and-spirits/), [modified data](drinks.csv))
25 | 5. [Pandas](http://pandas.pydata.org/pandas-docs/stable/index.html) for Data Exploration (70 min.)
26 |     * Exploring alcohol data: examining, summarizing, filtering, sorting, handling missing values, [split-apply-combine](http://i.imgur.com/yjNkiwL.png) ([code](05-pandas.py))
27 |     * If time permits, also explore movie ratings data ([description](http://files.grouplens.org/datasets/movielens/ml-100k-README.txt)): [joins](http://www.gregreda.com/2013/10/26/working-with-pandas-dataframes/#joining), [plotting](http://nbviewer.ipython.org/github/fonnesbeck/Bios366/blob/master/notebooks/Section2_7-Plotting-with-Pandas.ipynb)
28 | 6. Brief Tour of Other Modules for Data Science (5 min.)
29 |     * [numpy](http://www.numpy.org/)
30 |     * [scikit-learn](http://scikit-learn.org/stable/)
31 |     * [statsmodels](http://statsmodels.sourceforge.net/)
32 | 7. Recommended Resources for Self-Learning (10 min.)
33 |     * Basic Python: [Codecademy](http://www.codecademy.com/en/tracks/python), [Google's Python Class](https://developers.google.com/edu/python/), [Python Tutor](http://pythontutor.com/) (to visualize code execution)
34 |     * Pandas: [tutorial](http://www.gregreda.com/2013/10/26/intro-to-pandas-data-structures/), [book: "Python for Data Analysis"](http://shop.oreilly.com/product/0636920023784.do) (includes numpy and basic Python)
35 |     * Web scraping: [tutorial](http://www.gregreda.com/2013/03/03/web-scraping-101-with-python/)
36 |     * Command line: [tutorial](http://seankross.com/notes/cli/cli.html)
37 |     * Git and GitHub: [video series](https://www.youtube.com/playlist?list=PL5-da3qGB5IBLMp7LtN8Nc3Efd4hJq0kD)
38 |     * Machine learning: [book and videos: "An Introduction to Statistical Learning"](http://www.dataschool.io/15-hours-of-expert-machine-learning-videos/), [scikit-learn tutorials](http://scikit-learn.org/stable/tutorial/index.html), [Data Science as a Sport](https://www.youtube.com/watch?v=8w4UY66GKcM) (video), [Kaggle Titanic competition](http://www.kaggle.com/c/titanic-gettingStarted)
39 |     * Data science in general: [ebook: "Analyzing the Analyzers"](http://cdn.oreillystatic.com/oreilly/radarreport/0636920029014/Analyzing_the_Analyzers.pdf)
40 |     * Data-focused newsletters: [Center for Data Innovation](http://www.datainnovation.org/), [O'Reilly Data Newsletter](http://www.oreilly.com/data/index.html), [Data Community DC](http://datacommunitydc.org/blog/newsletter/)
41 |     * Full-fledged courses: [Data Science Specialization](https://www.coursera.org/specialization/jhudatascience/1) (9 short courses by JHU in R), [Machine Learning](https://www.coursera.org/course/ml) (1 course by Andrew Ng in Matlab/Octave), [Learning from Data](http://work.caltech.edu/telecourse.html) (1 course, programming language not specified)
42 | 8. General Assembly's Data Science Course (5 min.)
43 |     * [Official page](https://generalassemb.ly/education/data-science/washington-dc/)
44 |     * [Current repo](https://github.com/justmarkham/DAT3)
45 |     * [Project examples](https://github.com/justmarkham/DAT-project-examples)
46 | 9. Ask Me Anything
47 | 


--------------------------------------------------------------------------------
/drinks.csv:
--------------------------------------------------------------------------------
  1 | country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
  2 | Afghanistan,0,0,0,0.0,AS
  3 | Albania,89,132,54,4.9,EU
  4 | Algeria,25,0,14,0.7,AF
  5 | Andorra,245,138,312,12.4,EU
  6 | Angola,217,57,45,5.9,AF
  7 | Antigua & Barbuda,102,128,45,4.9,NA
  8 | Argentina,193,25,221,8.3,SA
  9 | Armenia,21,179,11,3.8,EU
 10 | Australia,261,72,212,10.4,OC
 11 | Austria,279,75,191,9.7,EU
 12 | Azerbaijan,21,46,5,1.3,EU
 13 | Bahamas,122,176,51,6.3,NA
 14 | Bahrain,42,63,7,2.0,AS
 15 | Bangladesh,0,0,0,0.0,AS
 16 | Barbados,143,173,36,6.3,NA
 17 | Belarus,142,373,42,14.4,EU
 18 | Belgium,295,84,212,10.5,EU
 19 | Belize,263,114,8,6.8,NA
 20 | Benin,34,4,13,1.1,AF
 21 | Bhutan,23,0,0,0.4,AS
 22 | Bolivia,167,41,8,3.8,SA
 23 | Bosnia-Herzegovina,76,173,8,4.6,EU
 24 | Botswana,173,35,35,5.4,AF
 25 | Brazil,245,145,16,7.2,SA
 26 | Brunei,31,2,1,0.6,AS
 27 | Bulgaria,231,252,94,10.3,EU
 28 | Burkina Faso,25,7,7,4.3,AF
 29 | Burundi,88,0,0,6.3,AF
 30 | Cote d'Ivoire,37,1,7,4.0,AF
 31 | Cabo Verde,144,56,16,4.0,AF
 32 | Cambodia,57,65,1,2.2,AS
 33 | Cameroon,147,1,4,5.8,AF
 34 | Canada,240,122,100,8.2,NA
 35 | Central African Republic,17,2,1,1.8,AF
 36 | Chad,15,1,1,0.4,AF
 37 | Chile,130,124,172,7.6,SA
 38 | China,79,192,8,5.0,AS
 39 | Colombia,159,76,3,4.2,SA
 40 | Comoros,1,3,1,0.1,AF
 41 | Congo,76,1,9,1.7,AF
 42 | Cook Islands,0,254,74,5.9,OC
 43 | Costa Rica,149,87,11,4.4,NA
 44 | Croatia,230,87,254,10.2,EU
 45 | Cuba,93,137,5,4.2,NA
 46 | Cyprus,192,154,113,8.2,EU
 47 | Czech Republic,361,170,134,11.8,EU
 48 | North Korea,0,0,0,0.0,AS
 49 | DR Congo,32,3,1,2.3,AF
 50 | Denmark,224,81,278,10.4,EU
 51 | Djibouti,15,44,3,1.1,AF
 52 | Dominica,52,286,26,6.6,NA
 53 | Dominican Republic,193,147,9,6.2,NA
 54 | Ecuador,162,74,3,4.2,SA
 55 | Egypt,6,4,1,0.2,AF
 56 | El Salvador,52,69,2,2.2,NA
 57 | Equatorial Guinea,92,0,233,5.8,AF
 58 | Eritrea,18,0,0,0.5,AF
 59 | Estonia,224,194,59,9.5,EU
 60 | Ethiopia,20,3,0,0.7,AF
 61 | Fiji,77,35,1,2.0,OC
 62 | Finland,263,133,97,10.0,EU
 63 | France,127,151,370,11.8,EU
 64 | Gabon,347,98,59,8.9,AF
 65 | Gambia,8,0,1,2.4,AF
 66 | Georgia,52,100,149,5.4,EU
 67 | Germany,346,117,175,11.3,EU
 68 | Ghana,31,3,10,1.8,AF
 69 | Greece,133,112,218,8.3,EU
 70 | Grenada,199,438,28,11.9,NA
 71 | Guatemala,53,69,2,2.2,NA
 72 | Guinea,9,0,2,0.2,AF
 73 | Guinea-Bissau,28,31,21,2.5,AF
 74 | Guyana,93,302,1,7.1,SA
 75 | Haiti,1,326,1,5.9,NA
 76 | Honduras,69,98,2,3.0,NA
 77 | Hungary,234,215,185,11.3,EU
 78 | Iceland,233,61,78,6.6,EU
 79 | India,9,114,0,2.2,AS
 80 | Indonesia,5,1,0,0.1,AS
 81 | Iran,0,0,0,0.0,AS
 82 | Iraq,9,3,0,0.2,AS
 83 | Ireland,313,118,165,11.4,EU
 84 | Israel,63,69,9,2.5,AS
 85 | Italy,85,42,237,6.5,EU
 86 | Jamaica,82,97,9,3.4,NA
 87 | Japan,77,202,16,7.0,AS
 88 | Jordan,6,21,1,0.5,AS
 89 | Kazakhstan,124,246,12,6.8,AS
 90 | Kenya,58,22,2,1.8,AF
 91 | Kiribati,21,34,1,1.0,OC
 92 | Kuwait,0,0,0,0.0,AS
 93 | Kyrgyzstan,31,97,6,2.4,AS
 94 | Laos,62,0,123,6.2,AS
 95 | Latvia,281,216,62,10.5,EU
 96 | Lebanon,20,55,31,1.9,AS
 97 | Lesotho,82,29,0,2.8,AF
 98 | Liberia,19,152,2,3.1,AF
 99 | Libya,0,0,0,0.0,AF
100 | Lithuania,343,244,56,12.9,EU
101 | Luxembourg,236,133,271,11.4,EU
102 | Madagascar,26,15,4,0.8,AF
103 | Malawi,8,11,1,1.5,AF
104 | Malaysia,13,4,0,0.3,AS
105 | Maldives,0,0,0,0.0,AS
106 | Mali,5,1,1,0.6,AF
107 | Malta,149,100,120,6.6,EU
108 | Marshall Islands,0,0,0,0.0,OC
109 | Mauritania,0,0,0,0.0,AF
110 | Mauritius,98,31,18,2.6,AF
111 | Mexico,238,68,5,5.5,NA
112 | Micronesia,62,50,18,2.3,OC
113 | Monaco,0,0,0,0.0,EU
114 | Mongolia,77,189,8,4.9,AS
115 | Montenegro,31,114,128,4.9,EU
116 | Morocco,12,6,10,0.5,AF
117 | Mozambique,47,18,5,1.3,AF
118 | Myanmar,5,1,0,0.1,AS
119 | Namibia,376,3,1,6.8,AF
120 | Nauru,49,0,8,1.0,OC
121 | Nepal,5,6,0,0.2,AS
122 | Netherlands,251,88,190,9.4,EU
123 | New Zealand,203,79,175,9.3,OC
124 | Nicaragua,78,118,1,3.5,NA
125 | Niger,3,2,1,0.1,AF
126 | Nigeria,42,5,2,9.1,AF
127 | Niue,188,200,7,7.0,OC
128 | Norway,169,71,129,6.7,EU
129 | Oman,22,16,1,0.7,AS
130 | Pakistan,0,0,0,0.0,AS
131 | Palau,306,63,23,6.9,OC
132 | Panama,285,104,18,7.2,NA
133 | Papua New Guinea,44,39,1,1.5,OC
134 | Paraguay,213,117,74,7.3,SA
135 | Peru,163,160,21,6.1,SA
136 | Philippines,71,186,1,4.6,AS
137 | Poland,343,215,56,10.9,EU
138 | Portugal,194,67,339,11.0,EU
139 | Qatar,1,42,7,0.9,AS
140 | South Korea,140,16,9,9.8,AS
141 | Moldova,109,226,18,6.3,EU
142 | Romania,297,122,167,10.4,EU
143 | Russian Federation,247,326,73,11.5,AS
144 | Rwanda,43,2,0,6.8,AF
145 | St. Kitts & Nevis,194,205,32,7.7,NA
146 | St. Lucia,171,315,71,10.1,NA
147 | St. Vincent & the Grenadines,120,221,11,6.3,NA
148 | Samoa,105,18,24,2.6,OC
149 | San Marino,0,0,0,0.0,EU
150 | Sao Tome & Principe,56,38,140,4.2,AF
151 | Saudi Arabia,0,5,0,0.1,AS
152 | Senegal,9,1,7,0.3,AF
153 | Serbia,283,131,127,9.6,EU
154 | Seychelles,157,25,51,4.1,AF
155 | Sierra Leone,25,3,2,6.7,AF
156 | Singapore,60,12,11,1.5,AS
157 | Slovakia,196,293,116,11.4,EU
158 | Slovenia,270,51,276,10.6,EU
159 | Solomon Islands,56,11,1,1.2,OC
160 | Somalia,0,0,0,0.0,AF
161 | South Africa,225,76,81,8.2,AF
162 | Spain,284,157,112,10.0,EU
163 | Sri Lanka,16,104,0,2.2,AS
164 | Sudan,8,13,0,1.7,AF
165 | Suriname,128,178,7,5.6,SA
166 | Swaziland,90,2,2,4.7,AF
167 | Sweden,152,60,186,7.2,EU
168 | Switzerland,185,100,280,10.2,EU
169 | Syria,5,35,16,1.0,AS
170 | Tajikistan,2,15,0,0.3,AS
171 | Thailand,99,258,1,6.4,AS
172 | Macedonia,106,27,86,3.9,EU
173 | Timor-Leste,1,1,4,0.1,AS
174 | Togo,36,2,19,1.3,AF
175 | Tonga,36,21,5,1.1,OC
176 | Trinidad & Tobago,197,156,7,6.4,NA
177 | Tunisia,51,3,20,1.3,AF
178 | Turkey,51,22,7,1.4,AS
179 | Turkmenistan,19,71,32,2.2,AS
180 | Tuvalu,6,41,9,1.0,OC
181 | Uganda,45,9,0,8.3,AF
182 | Ukraine,206,237,45,8.9,EU
183 | United Arab Emirates,16,135,5,2.8,AS
184 | United Kingdom,219,126,195,10.4,EU
185 | Tanzania,36,6,1,5.7,AF
186 | USA,249,158,84,8.7,NA
187 | Uruguay,115,35,220,6.6,SA
188 | Uzbekistan,25,101,8,2.4,AS
189 | Vanuatu,21,18,11,0.9,OC
190 | Venezuela,333,100,3,7.7,SA
191 | Vietnam,111,2,1,2.0,AS
192 | Yemen,6,0,0,0.1,AS
193 | Zambia,32,19,4,2.5,AF
194 | Zimbabwe,64,18,4,4.7,AF
195 | 


--------------------------------------------------------------------------------
/u.item:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/justmarkham/python-data-analysis-workshop/84f08baad86bd1adf47e81ea089ad8d42de04396/u.item


--------------------------------------------------------------------------------