├── firstform
├── tests
│ ├── __init__.py
│ ├── tools.py
│ └── app_tests.py
├── firstform
│ ├── __init__.py
│ └── __init__.pyc
├── bin
│ ├── app.pyc
│ ├── tools.py
│ ├── __init__.py
│ └── app.py
├── templates
│ ├── layout.html
│ ├── index.html
│ └── hello_form.html
└── setup.py
├── gothonweb
├── bin
│ ├── __init__.py
│ ├── app.py
│ └── map.py
├── tests
│ ├── __init__.py
│ ├── tools.py
│ ├── app_tests.py
│ └── map_tests.py
├── templates
│ ├── you_died.html
│ ├── layout.html
│ └── show_room.html
├── sessions
│ ├── 40ad7454d4b4cbacedaa449f7e2c8fb04165ecf4
│ ├── 5524b4c1828de273b8ae4c70bbbe0e631e031e4a
│ └── 6adbe20488a3ffd0040abc4ac06991d1d79c97d0
└── setup.py
├── .gitignore
├── tutorials
├── exercism_py3
│ ├── leap
│ │ ├── .cache
│ │ │ └── v
│ │ │ │ └── cache
│ │ │ │ └── lastfailed
│ │ ├── year5.py
│ │ ├── leap.py
│ │ ├── year4.py
│ │ ├── year.py
│ │ ├── leap_test.py
│ │ └── README.md
│ ├── hello-world
│ │ ├── .cache
│ │ │ └── v
│ │ │ │ └── cache
│ │ │ │ └── lastfailed
│ │ ├── hello_world2.py
│ │ ├── hello_world.py
│ │ ├── hello_world_test.py
│ │ ├── hello_world_test2.py
│ │ └── README.md
│ ├── Ex5_hamming
│ │ ├── hamming2.py
│ │ └── hamming.py
│ ├── dna
│ │ ├── dna2.py
│ │ └── dna.py
│ ├── word_count
│ │ ├── wordcount3.py
│ │ ├── wordcount2.py
│ │ ├── README.md
│ │ └── word_count_test.py
│ └── pangram
│ │ ├── pangram.py
│ │ ├── pangram2.py
│ │ └── pangram_detailed.py
├── ThinkBayes
│ ├── thinkbayesLoco.png
│ ├── thinkbayesLoco2.png
│ ├── thinkbayeseuro.png
│ ├── thinkbayeseuro2.png
│ ├── thinkbayesprice.png
│ ├── thinkbayesprice2.png
│ ├── thinkbayesprice3.png
│ ├── .ipynb_checkpoints
│ │ ├── 046-ImplimentingSuite-checkpoint.ipynb
│ │ ├── 056 - Chap6DecisionAnalysis-checkpoint.ipynb
│ │ ├── 046-Suite_m&m-checkpoint.ipynb
│ │ ├── 046-MontyHall_framework-checkpoint.ipynb
│ │ ├── 047-Dice-checkpoint.ipynb
│ │ ├── 049-Credible_intervals_cdfs-checkpoint.ipynb
│ │ └── 043-Distributions-checkpoint.ipynb
│ ├── 056 - Chap6DecisionAnalysis.ipynb
│ ├── 046-ImplimentingSuite.ipynb
│ ├── 046-Suite_m&m.ipynb
│ ├── 046-MontyHall_framework.ipynb
│ ├── 047-Dice.ipynb
│ ├── 049-Credible_intervals_cdfs.ipynb
│ └── 043-Distributions.ipynb
├── algorithms
│ ├── notebooks
│ │ ├── .ipynb_checkpoints
│ │ │ ├── 068-Lesson2-checkpoint.ipynb
│ │ │ └── Lesson1-checkpoint.ipynb
│ │ ├── 068-Lesson2.ipynb
│ │ └── Lesson1.ipynb
│ └── scripts
│ │ ├── L1_Eulerian_Q10.py
│ │ └── L1_EulerianPath.py
├── KaggleNLP
│ └── word_vectors.py
├── K-means
│ └── kmeans.py
├── Samsung
│ └── notebooks
│ │ ├── 029-Samsung_cleanup.ipynb
│ │ └── 031-Samsung_cleanup.ipynb
└── 026-Linear_Regression_Analysis.ipynb
├── windspeed
├── plots
│ ├── WSahel.png
│ ├── 038-62124Sebha.png
│ └── 038-62124Sebha_2.png
├── scripts
│ ├── 012-ws_tseries.py
│ ├── 030-group_tseries.py
│ ├── 037-group_tseries.py
│ ├── 013-ws_tseries.py
│ ├── 038-group_tseries.py
│ ├── 039-group_tseries.py
│ └── 040-group_tseries.py
└── notebooks
│ └── 010_1-windspeed.ipynb
├── SQL
└── galaXQL_17.sql
├── 001-git-basics.md
├── monkeylearn
└── 015-selectdata.py
├── DSFromScratch
├── Chap13
│ └── machine_learning.py
└── Chap6
│ ├── 064-Chap6.ipynb
│ └── .ipynb_checkpoints
│ └── 064-Chap6-checkpoint.ipynb
├── Titanic
└── bin
│ ├── clean_test.py
│ └── clean_test_53.py
└── TOdo.md
/firstform/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gothonweb/bin/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gothonweb/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/firstform/firstform/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 |
3 | API_key.txt
--------------------------------------------------------------------------------
/tutorials/exercism_py3/leap/.cache/v/cache/lastfailed:
--------------------------------------------------------------------------------
1 | {}
--------------------------------------------------------------------------------
/tutorials/exercism_py3/hello-world/.cache/v/cache/lastfailed:
--------------------------------------------------------------------------------
1 | {}
--------------------------------------------------------------------------------
/firstform/bin/app.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SophMC/notechain/HEAD/firstform/bin/app.pyc
--------------------------------------------------------------------------------
/windspeed/plots/WSahel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SophMC/notechain/HEAD/windspeed/plots/WSahel.png
--------------------------------------------------------------------------------
/firstform/firstform/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SophMC/notechain/HEAD/firstform/firstform/__init__.pyc
--------------------------------------------------------------------------------
/windspeed/plots/038-62124Sebha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SophMC/notechain/HEAD/windspeed/plots/038-62124Sebha.png
--------------------------------------------------------------------------------
/tutorials/exercism_py3/Ex5_hamming/hamming2.py:
--------------------------------------------------------------------------------
1 | def distance(dna1, dna2):
2 | return sum(d1 != d2 for d1, d2 in zip(dna1, dna2))
--------------------------------------------------------------------------------
/windspeed/plots/038-62124Sebha_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SophMC/notechain/HEAD/windspeed/plots/038-62124Sebha_2.png
--------------------------------------------------------------------------------
/tutorials/ThinkBayes/thinkbayesLoco.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SophMC/notechain/HEAD/tutorials/ThinkBayes/thinkbayesLoco.png
--------------------------------------------------------------------------------
/tutorials/ThinkBayes/thinkbayesLoco2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SophMC/notechain/HEAD/tutorials/ThinkBayes/thinkbayesLoco2.png
--------------------------------------------------------------------------------
/tutorials/ThinkBayes/thinkbayeseuro.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SophMC/notechain/HEAD/tutorials/ThinkBayes/thinkbayeseuro.png
--------------------------------------------------------------------------------
/tutorials/ThinkBayes/thinkbayeseuro2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SophMC/notechain/HEAD/tutorials/ThinkBayes/thinkbayeseuro2.png
--------------------------------------------------------------------------------
/tutorials/ThinkBayes/thinkbayesprice.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SophMC/notechain/HEAD/tutorials/ThinkBayes/thinkbayesprice.png
--------------------------------------------------------------------------------
/tutorials/ThinkBayes/thinkbayesprice2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SophMC/notechain/HEAD/tutorials/ThinkBayes/thinkbayesprice2.png
--------------------------------------------------------------------------------
/tutorials/ThinkBayes/thinkbayesprice3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SophMC/notechain/HEAD/tutorials/ThinkBayes/thinkbayesprice3.png
--------------------------------------------------------------------------------
/gothonweb/templates/you_died.html:
--------------------------------------------------------------------------------
1 |
You Died!
2 |
3 | Looks like you bit the dust.
4 | Play Again
5 |
--------------------------------------------------------------------------------
/tutorials/exercism_py3/dna/dna2.py:
--------------------------------------------------------------------------------
1 | DNA_TO_RNA = str.maketrans("GCTA", "CGAU")
2 |
3 | def to_rna(dna):
4 | return dna.translate(DNA_TO_RNA)
--------------------------------------------------------------------------------
/tutorials/ThinkBayes/.ipynb_checkpoints/046-ImplimentingSuite-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 0
6 | }
7 |
--------------------------------------------------------------------------------
/tutorials/algorithms/notebooks/.ipynb_checkpoints/068-Lesson2-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 0
6 | }
7 |
--------------------------------------------------------------------------------
/tutorials/ThinkBayes/.ipynb_checkpoints/056 - Chap6DecisionAnalysis-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 0
6 | }
7 |
--------------------------------------------------------------------------------
/tutorials/exercism_py3/Ex5_hamming/hamming.py:
--------------------------------------------------------------------------------
1 | def distance(x,y):
2 | count=0
3 | for i,x in enumerate(x):
4 | if x != y[i]: count += 1
5 | return count
6 |
7 |
--------------------------------------------------------------------------------
/tutorials/exercism_py3/hello-world/hello_world2.py:
--------------------------------------------------------------------------------
1 | #
2 | # Skeleton file for the Python "Hello World" exercise.
3 | #
4 |
5 | def hello(name=''):
6 | return 'Hello, %s!' % (name or 'World')
--------------------------------------------------------------------------------
/tutorials/exercism_py3/dna/dna.py:
--------------------------------------------------------------------------------
1 |
2 | def to_rna(dna):
3 |
4 | d={'G':'C','C':'G','T':'A','A':'U'}
5 |
6 | p = list(dna)
7 | return ''.join([d[m] for m in p])
8 |
9 |
--------------------------------------------------------------------------------
/gothonweb/sessions/40ad7454d4b4cbacedaa449f7e2c8fb04165ecf4:
--------------------------------------------------------------------------------
1 | KGRwMQpTJ2lwJwpwMgpWMTI3LjAuMC4xCnAzCnNTJ3Jvb20nCnA0Ck5zUydzZXNzaW9uX2lkJwpw
2 | NQpTJzQwYWQ3NDU0ZDRiNGNiYWNlZGFhNDQ5ZjdlMmM4ZmIwNDE2NWVjZjQnCnA2CnMu
3 |
--------------------------------------------------------------------------------
/tutorials/exercism_py3/word_count/wordcount3.py:
--------------------------------------------------------------------------------
1 | from collections import Counter
2 | import re
3 |
4 |
5 | def word_count(phrase):
6 | return Counter(re.findall(r"[\w]+", phrase.lower().replace('_', ' ')))
--------------------------------------------------------------------------------
/tutorials/exercism_py3/pangram/pangram.py:
--------------------------------------------------------------------------------
1 |
2 | # -*- coding: UTF-8 -*-
3 |
4 | import re
5 |
6 | def is_pangram(s):
7 |
8 | letters = re.sub('[^a-zA-Z]','',s)
9 |
10 | return len(list(set(letters.lower())))== 26
11 |
12 |
--------------------------------------------------------------------------------
/firstform/templates/layout.html:
--------------------------------------------------------------------------------
1 | $def with (content)
2 |
3 |
4 |
5 | My first form
6 |
7 |
8 |
9 |
10 |
11 | $:content
12 |
13 |
14 |
--------------------------------------------------------------------------------
/gothonweb/templates/layout.html:
--------------------------------------------------------------------------------
1 | $def with (content)
2 |
3 |
4 |
5 | Interactive Game
6 |
7 |
8 |
9 |
10 |
11 | $:content
12 |
13 |
14 |
--------------------------------------------------------------------------------
/tutorials/exercism_py3/leap/year5.py:
--------------------------------------------------------------------------------
1 |
2 | def is_leap_year(year):
3 | return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
4 |
5 | if __name__ == '__main__':
6 |
7 | year = int(input('Type in a year to test if it is a leap year\n> '))
8 | is_leap_year(year)
9 |
10 |
--------------------------------------------------------------------------------
/SQL/galaXQL_17.sql:
--------------------------------------------------------------------------------
1 | INSERT INTO hilight
2 | SELECT stars.starid AS starid
3 | FROM stars
4 | LEFT OUTER JOIN planets ON stars.starid == planets.starid
5 | LEFT OUTER JOIN moons ON planets.planetid == moons.planetid
6 | GROUP BY stars.starid ORDER BY (COUNT(planets.planetid) + COUNT(moons.moonid))
7 | DESC
8 | LIMIT 1
--------------------------------------------------------------------------------
/tutorials/exercism_py3/leap/leap.py:
--------------------------------------------------------------------------------
1 |
2 | def is_leap_year(year):
3 | if (year%400 !=0) & (year%4 != 0) & (year%100 != 0):
4 | print ("%d is not a leap year" % year)
5 | return False
6 | else:
7 | print ("%d is a leap year!" % year)
8 | return True
9 |
10 | year = int(input('Type in a year to test if it is a leap year\n> '))
11 | is_leap_year(year)
--------------------------------------------------------------------------------
/firstform/templates/index.html:
--------------------------------------------------------------------------------
1 | $def with (greeting)
2 |
3 | $if greeting:
4 | I would just like to say \
5 | $greeting.
6 | $else:
7 | Hello, world!
8 |
10 | Input Form takes you back to the
11 | submission form.
12 |
13 |
--------------------------------------------------------------------------------
/tutorials/exercism_py3/leap/year4.py:
--------------------------------------------------------------------------------
1 |
2 | def is_leap_year(year):
3 |
4 | if year % 4 ==0 and year % 100 != 0 or year % 400 == 0:
5 | print ("%d is a leap year! "% year)
6 | return True
7 |
8 | else:
9 | print ("%d is not a leap year" % year)
10 | return False
11 |
12 | if __name__ == '__main__':
13 |
14 | year = int(input('Type in a year to test if it is a leap year\n> '))
15 | is_leap_year(year)
16 |
17 |
--------------------------------------------------------------------------------
/firstform/templates/hello_form.html:
--------------------------------------------------------------------------------
1 | Fill Out This Form, Please
2 |
3 |
4 |
14 |
--------------------------------------------------------------------------------
/tutorials/exercism_py3/hello-world/hello_world.py:
--------------------------------------------------------------------------------
1 | #
2 | # Skeleton file for the Python "Hello World" exercise.
3 | #
4 | def hello(name=''):
5 |
6 | if name == '':
7 | greeting = "Hello, World!"
8 | print (greeting)
9 | return greeting
10 |
11 | else:
12 | greeting = 'Hello, %s!' % name
13 | print (greeting)
14 | return greeting
15 |
16 | if __name__ == '__main__':
17 |
18 | name = input('What is your name?\n> ')
19 | hello(name)
--------------------------------------------------------------------------------
/firstform/setup.py:
--------------------------------------------------------------------------------
1 | try:
2 | from setuptools import setup
3 | except ImportError:
4 | from distutils.core import setup
5 |
6 | config = {
7 | 'description': 'My Project',
8 | 'author': 'Sophie Cowie',
9 | 'url': 'URL to get it at.',
10 | 'download_url': 'Where to download it.',
11 | 'author_email': 'sophie_cowie@hotmail.com',
12 | 'version': '0.1',
13 | 'install_requires': ['nose'],
14 | 'packages': ['NAME'],
15 | 'scripts': [],
16 | 'name': 'gothonweb'
17 | }
18 |
19 | setup(**config)
--------------------------------------------------------------------------------
/gothonweb/setup.py:
--------------------------------------------------------------------------------
1 | try:
2 | from setuptools import setup
3 | except ImportError:
4 | from distutils.core import setup
5 |
6 | config = {
7 | 'description': 'My Project',
8 | 'author': 'Sophie Cowie',
9 | 'url': 'URL to get it at.',
10 | 'download_url': 'Where to download it.',
11 | 'author_email': 'sophie_cowie@hotmail.com',
12 | 'version': '0.1',
13 | 'install_requires': ['nose'],
14 | 'packages': ['NAME'],
15 | 'scripts': [],
16 | 'name': 'projectname'
17 | }
18 |
19 | setup(**config)
--------------------------------------------------------------------------------
/tutorials/exercism_py3/leap/year.py:
--------------------------------------------------------------------------------
1 |
2 | def is_leap_year(year):
3 |
4 | b = (year%4 ==0)
5 | c = (year%100 != 0)
6 | d = (year%400 == 0)
7 |
8 | if b == True and c == True or d == True:
9 | print ("%d is a leap year! "% year)
10 | return True
11 |
12 | else:
13 | print ("%d is not a leap year" % year)
14 | return False
15 |
16 | if __name__ == '__main__':
17 |
18 | year = int(input('Type in a year to test if it is a leap year\n> '))
19 | is_leap_year(year)
20 |
21 |
--------------------------------------------------------------------------------
/tutorials/exercism_py3/pangram/pangram2.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 |
3 | ALPHABET = 'abcdefghijklmnopqrstuvwxyz '
4 |
5 |
6 | def is_pangram(s):
7 |
8 |
9 | return set(list(s.lower())) >= set(ALPHABET)
10 |
11 | if __name__ == '__main__':
12 |
13 | #is_pangram('the quick brown fox jumps over the lazy dog')
14 | # When I declare the encoding at the beginning, it doesnt throw up an error
15 | # with string here.
16 | string = 'Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich.'
17 | #new = string.encode('utf-8')
18 | is_pangram(string)
--------------------------------------------------------------------------------
/001-git-basics.md:
--------------------------------------------------------------------------------
1 | Some basic git commands I used today to set this up:
2 |
3 |
4 | `git init` initialises a local repository
5 |
6 |
7 | `git add` stages the work to Index
8 |
9 | `git commit -m "comment"` saves the work to the repository
10 |
11 |
12 | Now the locally saved work can be added to the remote repository.
13 |
14 | First you want to connect to the remote server:
15 | `git remote add origin git@github.com:SophMC/notechain`
16 |
17 |
18 | `git push -u origin master` -u is added the first time, after that you just
19 | need to be inside the local repo that you want to push and type
20 | `git push`
21 |
--------------------------------------------------------------------------------
/tutorials/exercism_py3/leap/leap_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from year5 import is_leap_year
4 |
5 |
6 | class YearTest(unittest.TestCase):
7 | def test_leap_year(self):
8 | self.assertIs(is_leap_year(1996), True)
9 |
10 | def test_non_leap_year(self):
11 | self.assertIs(is_leap_year(1997), False)
12 |
13 | def test_non_leap_even_year(self):
14 | self.assertIs(is_leap_year(1998), False)
15 |
16 | def test_century(self):
17 | self.assertIs(is_leap_year(1900), False)
18 |
19 | def test_exceptional_century(self):
20 | self.assertIs(is_leap_year(2400), True)
21 |
22 | if __name__ == '__main__':
23 | unittest.main()
24 |
--------------------------------------------------------------------------------
/firstform/bin/tools.py:
--------------------------------------------------------------------------------
1 | from nose.tools import *
2 | import re
3 |
4 | def assert_response(resp, contains=None, matches=None,headers=None,
5 | status="200"):
6 | assert status in resp.status, \
7 | "Expected response %r not in %r" \
8 | % (status, resp.status)
9 |
10 | if status == "200":
11 | assert resp.data, "Response data is empty."
12 |
13 | if contains:
14 | assert contains in resp.data, "Response does not contain %r"\
15 | % contains
16 |
17 | if matches:
18 | reg = re.compile(matches)
19 | assert reg.matches(resp.data), "Response does not match %r"\
20 | % matches
21 |
22 | if headers:
23 | assert_equal(resp.headers,headers)
--------------------------------------------------------------------------------
/firstform/bin/__init__.py:
--------------------------------------------------------------------------------
1 | from nose.tools import *
2 | import re
3 |
4 | def assert_response(resp, contains=None, matches=None,headers=None,
5 | status="200"):
6 | assert status in resp.status, \
7 | "Expected response %r not in %r" \
8 | % (status, resp.status)
9 |
10 | if status == "200":
11 | assert resp.data, "Response data is empty."
12 |
13 | if contains:
14 | assert contains in resp.data, "Response does not contain %r"\
15 | % contains
16 |
17 | if matches:
18 | reg = re.compile(matches)
19 | assert reg.matches(resp.data), "Response does not match %r"\
20 | % matches
21 |
22 | if headers:
23 | assert_equal(resp.headers,headers)
--------------------------------------------------------------------------------
/firstform/tests/tools.py:
--------------------------------------------------------------------------------
1 | from nose.tools import *
2 | import re
3 |
4 |
5 |
6 | def assert_response(resp, contains=None, matches=None,headers=None,
7 | status="200"):
8 | assert status in resp.status, \
9 | "Expected response %r not in %r" \
10 | % (status, resp.status)
11 |
12 | if status == "200":
13 | assert resp.data, "Response data is empty."
14 |
15 | if contains:
16 | assert contains in resp.data, "Response does not contain %r"\
17 | % contains
18 |
19 | if matches:
20 | reg = re.compile(matches)
21 | assert reg.matches(resp.data), "Response does not match %r"\
22 | % matches
23 |
24 | if headers:
25 | assert_equal(resp.headers,headers)
--------------------------------------------------------------------------------
/gothonweb/tests/tools.py:
--------------------------------------------------------------------------------
1 | from nose.tools import *
2 | import re
3 |
4 | def assert_response(resp, contains=None, matches=None,headers=None,
5 | status="200"):
6 | assert status in resp.status, \
7 | "Expected response %r not in %r" \
8 | % (status, resp.status)
9 |
10 | if status == "200":
11 | assert resp.data, "Response data is empty."
12 |
13 | if contains:
14 | #confirm that number x, is in resp.data and if now print out.."Response
15 | #does not contain...."
16 | assert contains in resp.data, "Response does not contain %r"\
17 | % contains
18 |
19 | if matches:
20 | reg = re.compile(matches)
21 | assert reg.matches(resp.data), "Response does not match %r"\
22 | % matches
23 |
24 | if headers:
25 | assert_equal(resp.headers,headers)
--------------------------------------------------------------------------------
/tutorials/exercism_py3/word_count/wordcount2.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import re
4 |
5 | def word_count(sentence):
6 |
7 | sentence = re.sub('[,_]',' ',sentence)
8 |
9 | # ^ to substitute things that are NOT \s(spaces) and \w(alphanumeric
10 | # characters-letters). r, means raw string notation.
11 | sentence = re.sub(r'[^\s\w_]+', '', sentence.lower())
12 | f = sentence.split()
13 |
14 | # Make a dictionary to store the pairs
15 | p = {}
16 |
17 | for x in f:
18 |
19 | # \b before and after helps to preserve whole words.
20 | matches = re.findall((r'\b%s\b'%x),' '.join(x for x in f))
21 |
22 | #match the key to the value in the dictionary
23 | p[x] = len(matches)
24 |
25 | return p
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/tutorials/exercism_py3/word_count/README.md:
--------------------------------------------------------------------------------
1 | # Word Count
2 |
3 | Write a program that given a phrase can count the occurrences of each word in that phrase.
4 |
5 | For example for the input `"olly olly in come free"`
6 |
7 | ```plain
8 | olly: 2
9 | in: 1
10 | come: 1
11 | free: 1
12 | ```
13 |
14 |
15 | ### Submitting Exercises
16 |
17 | Note that, when trying to submit an exercise, make sure the solution is in the `exercism/python/` directory.
18 |
19 | For example, if you're submitting `bob.py` for the Bob exercise, the submit command would be something like `exercism submit /python/bob/bob.py`.
20 |
21 |
22 | For more detailed information about running tests, code style and linting,
23 | please see the [help page](http://exercism.io/languages/python).
24 |
25 | ## Source
26 |
27 | This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour.
28 |
--------------------------------------------------------------------------------
/firstform/tests/app_tests.py:
--------------------------------------------------------------------------------
1 | from nose.tools import *
2 | #How to import an application and run it directly for the automated test!
3 | #Important!
4 | from bin.app import app
5 | #From dir tests, import assert_response function from tools.py
6 | from tests.tools import assert_response
7 |
8 | def test_index():
9 | # check that we get a 404 on the / URL
10 | resp = app.request("/")
11 | assert_response(resp,status="404")
12 |
13 | #test our first GET request to /hello
14 | resp = app.request("/hello")
15 | assert_response(resp)
16 |
17 | #make sure default values work for the form
18 | resp = app.request("/hello", method="POST")
19 | assert_response(resp, contains="Nobody")
20 |
21 | # test that we get expected values
22 | data = {'name':'Zed','greet':'Hola'}
23 | resp = app.request("/hello", method="POST",data=data)
24 | assert_response(resp,contains="Zed")
--------------------------------------------------------------------------------
/gothonweb/tests/app_tests.py:
--------------------------------------------------------------------------------
1 | from nose.tools import *
2 | #How to import an application and run it directly for the automated test!
3 | #Important!
4 | from bin.app import app
5 | #From dir tests, import assert_response function from tools.py
6 | from tests.tools import assert_response
7 |
8 | def test_index():
9 | # check that we get a 404 on the / URL
10 | resp = app.request("/")
11 | assert_response(resp,status="404")
12 |
13 | #test our first GET request to /hello
14 | resp = app.request("/game")
15 | assert_response(resp)
16 |
17 | #make sure default values work for the form
18 | #resp = app.request("/game", method="POST")
19 | #assert_response(resp, action=None)
20 |
21 | # test that we get expected values
22 | #data = {'name':'Zed','greet':'Hola'}
23 | #resp = app.request("/hello", method="POST",data=data)
24 | #assert_response(resp,contains="Zed")
--------------------------------------------------------------------------------
/tutorials/algorithms/notebooks/068-Lesson2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "m $\\in \\Theta$ (n)"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {
14 | "collapsed": true
15 | },
16 | "outputs": [],
17 | "source": []
18 | }
19 | ],
20 | "metadata": {
21 | "anaconda-cloud": {},
22 | "kernelspec": {
23 | "display_name": "Python [Root]",
24 | "language": "python",
25 | "name": "Python [Root]"
26 | },
27 | "language_info": {
28 | "codemirror_mode": {
29 | "name": "ipython",
30 | "version": 3
31 | },
32 | "file_extension": ".py",
33 | "mimetype": "text/x-python",
34 | "name": "python",
35 | "nbconvert_exporter": "python",
36 | "pygments_lexer": "ipython3",
37 | "version": "3.5.2"
38 | }
39 | },
40 | "nbformat": 4,
41 | "nbformat_minor": 0
42 | }
43 |
--------------------------------------------------------------------------------
/tutorials/exercism_py3/hello-world/hello_world_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import unicode_literals
4 | import unittest
5 |
6 | import hello_world2
7 |
8 |
9 | class HelloWorldTests(unittest.TestCase):
10 |
11 | def test_hello_without_name(self):
12 | self.assertEqual(
13 | 'Hello, World!',
14 | hello_world2.hello()
15 | )
16 |
17 | def test_hello_with_sample_name(self):
18 | self.assertEqual(
19 | 'Hello, Alice!',
20 | hello_world2.hello('Alice')
21 | )
22 |
23 | def test_hello_with_other_sample_name(self):
24 | self.assertEqual(
25 | 'Hello, Bob!',
26 | hello_world2.hello('Bob')
27 | )
28 |
29 | def test_hello_with_umlaut_name(self):
30 | self.assertEqual(
31 | 'Hello, Jürgen!',
32 | hello_world2.hello('Jürgen')
33 | )
34 |
35 | if __name__ == '__main__':
36 | unittest.main()
37 |
--------------------------------------------------------------------------------
/tutorials/exercism_py3/hello-world/hello_world_test2.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import unicode_literals
4 | import unittest
5 |
6 | import hello_world2
7 |
8 |
9 | class HelloWorldTests(unittest.TestCase):
10 |
11 | def test_hello_without_name(self):
12 | self.assertEqual(
13 | 'Hello, World!',
14 | hello_world2.hello()
15 | )
16 |
17 | def test_hello_with_sample_name(self):
18 | self.assertEqual(
19 | 'Hello, Alice!',
20 | hello_world2.hello('Alice')
21 | )
22 |
23 | def test_hello_with_other_sample_name(self):
24 | self.assertEqual(
25 | 'Hello, Bob!',
26 | hello_world2.hello('Bob')
27 | )
28 |
29 | def test_hello_with_umlaut_name(self):
30 | self.assertEqual(
31 | 'Hello, Jürgen!',
32 | hello_world2.hello('Jürgen')
33 | )
34 |
35 | if __name__ == '__main__':
36 | unittest.main()
37 |
--------------------------------------------------------------------------------
/firstform/bin/app.py:
--------------------------------------------------------------------------------
1 | import web
2 |
3 | #This is mapping /hello to the class index.
4 | #Whenever someone types in /hello they will
5 | #get sent to the index class first.
6 | urls = (
7 | '/hello', 'index'
8 | )
9 |
10 | '''Whenever /hello is accessed while this app is running, it will begin
11 | a chain of processes starting from here. /hello is the key for index'''
12 |
13 | app = web.application(urls, globals())
14 |
15 | render = web.template.render('templates/', base="layout")
16 |
17 | class index:
18 | def GET(self):
19 | #use render to display a page from the hello_form.html template
20 | return render.hello_form()
21 | #name="Nobody is the default if the information is not given
22 | #inputs=(name="Nobody")
23 | def POST(self):
24 | form = web.input(name="Nobody",greet="Hello")
25 | #forgot to put brackets around form.greet form.name!
26 | greeting = "%s, %s" % (form.greet, form.name)
27 | return render.index(greeting = greeting)
28 |
29 | if __name__ == "__main__":
30 | app.run()
--------------------------------------------------------------------------------
/gothonweb/templates/show_room.html:
--------------------------------------------------------------------------------
1 | $def with (room)
2 |
3 | $room.name
4 |
5 |
6 | $room.description
7 |
8 |
9 | $if room.name == "death":
10 | Play Again?
11 |
12 |
13 | $if room.name == "Central Corridor":
14 |
15 |
19 |
20 |
21 |
22 | $if room.name == "Laser Weapon Armory":
23 |
24 |
29 |
30 |
31 | $if room.name == "The Bridge":
32 |
33 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/tutorials/exercism_py3/pangram/pangram_detailed.py:
--------------------------------------------------------------------------------
1 |
2 | # -*- coding: UTF-8 -*-
3 |
4 | import re
5 |
6 | def is_pangram(s):
7 |
8 |
9 | # create a regular expression object(regex) to pull out only letters
10 | # from chars. ^ matches start of the string.
11 | regex = re.compile('[^a-zA-Z]')
12 |
13 | # Use regex object to substitute anything that doesn't match the pattern.
14 | # is the same as letters = re.sub('[^a-zA-Z]','',s)
15 | letters = regex.sub('', s)
16 |
17 |
18 | #break up the sentence into characters and extract the unique values
19 | if len(list(set(letters.lower())))== 26:
20 |
21 | print(list(set(letters)))
22 | print('This is a pangram')
23 | return True
24 | else:
25 | print(list(set(letters)))
26 | print('This is not a pangram')
27 | return False
28 |
29 |
30 | if __name__ == '__main__':
31 |
32 | #is_pangram('the quick brown fox jumps over the lazy dog')
33 | is_pangram('Victor jagt zwölf Boxkämpfer quer über den großen Sylter'
34 | 'Deich.')
35 |
36 | #set(list(s.lower())) >= set(ALPHABET)
--------------------------------------------------------------------------------
/monkeylearn/015-selectdata.py:
--------------------------------------------------------------------------------
1 | import json
2 | import pandas as pd
3 | import requests
4 |
5 | with open('API_key.txt') as f:
6 | API_KEY = f.read().strip()
7 |
8 | API_KEY = API_read
9 |
10 | raw_df = pd.read_csv('indeed_edin.csv', encoding='utf-8',
11 | error_bad_lines=False)
12 | #turnstilelink_link_1/_text
13 |
14 | df = raw_df[['location_value', 'turnstilelink_link_1/_text',
15 | 'summary_description']]
16 | df.columns = ['location', 'title', 'description']
17 |
18 | content_df = list(df.title + ' ' + df.description)
19 |
20 | categories = []
21 | step = 150
22 | for start in xrange(0, len(content_df), step):
23 | end = start + step
24 |
25 | response = requests.post(
26 |
27 | "https://api.monkeylearn.com/v2/classifiers/cl_4PFzSWVR/classify/",
28 | data=json.dumps({'text_list': content_df[start:end]}),
29 | headers={'Authorization': 'Token {}'.format(API_KEY),
30 | 'Content-Type': 'application/json'}).json()
31 |
32 | # We go through the results of the API call, storing the result on a list.
33 | for category in response['result']:
34 | categories.append(category[0]['label'])
35 |
36 | augmented_df = df.join(pd.DataFrame(categories, columns=['category']))
37 | augmented_df.to_csv('indeed_aug.csv', encoding='utf-8', index=False,
38 | header=False)
--------------------------------------------------------------------------------
/tutorials/exercism_py3/leap/README.md:
--------------------------------------------------------------------------------
1 | # Leap
2 |
3 | Write a program that will take a year and report if it is a leap year.
4 |
5 | The tricky thing here is that a leap year in the Gregorian calendar occurs:
6 |
7 | ```plain
8 | on every year that is evenly divisible by 4
9 | except every year that is evenly divisible by 100
10 | unless the year is also evenly divisible by 400
11 | ```
12 |
13 | For example, 1997 is not a leap year, but 1996 is. 1900 is not a leap
14 | year, but 2000 is.
15 |
16 | If your language provides a method in the standard library that does
17 | this look-up, pretend it doesn't exist and implement it yourself.
18 |
19 | ## Notes
20 |
21 | Though our exercise adopts some very simple rules, there is more to
22 | learn!
23 |
24 | For a delightful, four minute explanation of the whole leap year
25 | phenomenon, go watch [this youtube video][video].
26 |
27 | [video]: http://www.youtube.com/watch?v=xX96xng7sAE
28 |
29 | ### Submitting Exercises
30 |
31 | Note that, when trying to submit an exercise, make sure the solution is in the `exercism/python/` directory.
32 |
33 | For example, if you're submitting `bob.py` for the Bob exercise, the submit command would be something like `exercism submit /python/bob/bob.py`.
34 |
35 |
36 | For more detailed information about running tests, code style and linting,
37 | please see the [help page](http://exercism.io/languages/python).
38 |
39 | ## Source
40 |
41 | JavaRanch Cattle Drive, exercise 3 [http://www.javaranch.com/leap.jsp](http://www.javaranch.com/leap.jsp)
42 |
--------------------------------------------------------------------------------
/DSFromScratch/Chap13/machine_learning.py:
--------------------------------------------------------------------------------
1 | from collections import Counter
2 | import math, random
3 |
4 | #
5 | # data splitting
6 | #
7 |
8 | def split_data(data, prob):
9 | """split data into fractions [prob, 1 - prob]"""
10 | results = [], []
11 | for row in data:
12 | results[0 if random.random() < prob else 1].append(row)
13 | return results
14 |
15 | def train_test_split(x, y, test_pct):
16 | data = list(zip(x, y)) # pair corresponding values
17 | train, test = split_data(data, 1 - test_pct) # split the dataset of pairs
18 | x_train, y_train = list(zip(*train)) # magical un-zip trick
19 | x_test, y_test = list(zip(*test))
20 | return x_train, x_test, y_train, y_test
21 |
22 | #
23 | # correctness
24 | #
25 |
26 | def accuracy(tp, fp, fn, tn):
27 | correct = tp + tn
28 | total = tp + fp + fn + tn
29 | return correct / total
30 |
31 | def precision(tp, fp, fn, tn):
32 | return tp / (tp + fp)
33 |
34 | def recall(tp, fp, fn, tn):
35 | return tp / (tp + fn)
36 |
37 | def f1_score(tp, fp, fn, tn):
38 | p = precision(tp, fp, fn, tn)
39 | r = recall(tp, fp, fn, tn)
40 |
41 | return 2 * p * r / (p + r)
42 |
43 | if __name__ == "__main__":
44 |
45 | print("accuracy(70, 4930, 13930, 981070)", accuracy(70, 4930, 13930,
46 | 981070))
47 | print("precision(70, 4930, 13930, 981070)", precision(70, 4930, 13930,
48 | 981070))
49 | print("recall(70, 4930, 13930, 981070)", recall(70, 4930, 13930, 981070))
50 | print("f1_score(70, 4930, 13930, 981070)", f1_score(70, 4930, 13930,
51 | 981070))
--------------------------------------------------------------------------------
/gothonweb/tests/map_tests.py:
--------------------------------------------------------------------------------
1 | from nose.tools import *
2 | #from map file in dir bin import everything in map file.
3 | #This originally had just a class but as it has class instances also
4 | # defined, we want to import everything in the file.
5 | from bin.map import *
6 |
7 | def test_room():
8 | gold = Room("GoldRoom","""This room has gold in it you can grab. There's a
9 | door to the north.""")
10 | assert_equal(gold.name, "GoldRoom")
11 | assert_equal(gold.paths,{})
12 |
13 | def test_room_paths():
14 | center = Room("Center", "Test room in the center.")
15 | north = Room("North", "Test room in the north.")
16 | south = Room("South", "Test room in the south.")
17 |
18 | center.add_paths({'north': north, 'south': south})
19 | assert_equal(center.go('north'), north)
20 | assert_equal(center.go('south'), south)
21 |
22 |
23 | def test_map():
24 | start = Room("Start", "You can go west and down a hole.")
25 | west = Room("Trees", "There are trees here, you can go east.")
26 | down = Room("Dungeon", "It's dark down here, you can go up.")
27 |
28 | start.add_paths({'west': west, 'down': down})
29 | west.add_paths({'east': start})
30 | down.add_paths({'up': start})
31 |
32 | assert_equal(start.go('west'), west)
33 | assert_equal(start.go('west').go('east'), start)
34 | assert_equal(start.go('down').go('up'), start)
35 |
36 | def test_gothon_game_map():
37 | assert_equal(START.go('shoot!'), generic_death)
38 | assert_equal(START.go('dodge!'), generic_death)
39 |
40 | room = START.go('tell a joke')
41 | assert_equal(room, laser_weapon_armory)
--------------------------------------------------------------------------------
/Titanic/bin/clean_test.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 |
4 | df = pd.read_csv('/home/sophie/projects/Titanic/data/test.csv', header=0)
5 |
6 | # Change Sex column to 1/0 in Gender
7 | df['Gender'] = df['Sex'].map({'female': 0, 'male': 1}).astype(float)
8 |
9 | #Drop columns
10 | df = df.drop(['Name','Cabin','Ticket','Sex'], axis=1)
11 |
12 | # Remove any rows which have a nan in the Embarked or Fare column
13 | df = df.dropna(subset = ['Embarked','Fare'])
14 |
15 | # Turn Embarked into float numbers
16 | df['Embarked'] = df['Embarked'].map({'C': 1 ,'Q': 2 ,'S': 3}).astype(float)
17 |
18 |
19 | ###Make guesses for Age. Use the medians for each class
20 | #Make a table filled with zeros
21 | median_ages = np.zeros((2,3)) # male/female for each class
22 |
23 | # Loop over the table to fill in the values
24 | for i in range(0, 2):
25 | for j in range(0, 3):
26 | median_ages[i,j] = df[(df['Gender'] == i) & (df['Pclass'] == j +
27 | 1)]['Age'].dropna().median()
28 |
29 | # Make a copy of Age
30 | df['AgeFill'] = df['Age']
31 |
32 |
33 | # Fill the new column with the correct values.
34 | for i in range(0, 2):
35 | for j in range(0, 3):
36 | # we need df.loc here to specify the row AND the column.
37 | # only where age is null, gender is 1/0 and class is 1-3, that AgeFill
38 | # will be set to the median age.
39 | df.loc[(df.Age.isnull()) & (df.Gender == i) & (df.Pclass == j + 1),
40 | 'AgeFill'] = median_ages[i,j]
41 |
42 | # We can drop the Age column now we have AgeFill
43 | df = df.drop(['Age'], axis=1)
44 |
45 | # Transform the whole dataframe into floats.
46 | df= df.astype(float)
47 |
48 | #Output this to csv to be read in for predicting values.
49 | df.to_csv('/home/sophie/projects/Titanic/data/clean_test.csv', sep = " ", index
50 | = False)
51 |
52 |
53 |
54 |
55 |
56 |
--------------------------------------------------------------------------------
/windspeed/scripts/012-ws_tseries.py:
--------------------------------------------------------------------------------
1 | #The aim of this script is to produce a timeseries of windspeed for each
2 | #station, with lines for winds at 0000, 0600, 1200 and 1800
3 |
4 | import glob,os
5 | import pandas as pd
6 |
7 | #change the directory in here first
8 | os.chdir("/home/sophie/projects/windspeed/data/")
9 | fname_list = glob.glob('*allwinds.txt')
10 |
11 | def read_file(fname):
12 | '''take a file and read it into a dataframe'''
13 |
14 | print """ %s please select the index of the following files to make a plot \
15 | of: """ % list(enumerate(fname))
16 | location = int(raw_input("> "))
17 |
18 | date_spec = {'date_time': [0,1,2]}
19 | column_names=["year","month","day","hour","ws"]
20 | dtype={"year":int,"month":int,"day":int,"hour":int,"ws":float}
21 |
22 | print fname[location]
23 | wind = pd.read_csv(fname[location], sep=" ",parse_dates=date_spec,
24 | keep_date_col=True, names=column_names, index_col=False)
25 | #Dealing with hour - going from 600, 1200 etc to 6,12, 18
26 | wind["hour"]=(wind["hour"]/100).astype(int)
27 |
28 | #adding a date_time column with timestamp data
29 | wind['date_time'] = pd.to_datetime(wind.date_time) + \
30 | wind.hour.astype('timedelta64[h]')
31 |
32 | print "here the data from %s will be split up" % fname_list[location]
33 | print "location index= %d" %location
34 | print "wind dataframe= %r" %wind[0:5]
35 | #data_subs(wind,location)
36 | return data_subs(wind,location)
37 |
38 |
39 | def data_subs(wind,location):
40 | '''Takes a dataframe and splits it into four new dataframes ready for
41 | plotting'''
42 | print wind[0:5]
43 | print location
44 | #print "here the data from %s will be split up" % fname_list[location]
45 | pass
46 |
47 | if __name__ == "__main__":
48 |
49 | data = read_file(fname_list)
50 | #data_subs(wind, location)
--------------------------------------------------------------------------------
/tutorials/ThinkBayes/056 - Chap6DecisionAnalysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "### Chap 6: Decision Analysis\n",
8 | "\n",
9 | "How to decide on the price of a showcase?\n",
10 | "Bayesian thinking towards an answer:\n",
11 | "1) Prior beliefs on what the showcase prices could be: Analyse previous prices on the show.\n",
12 | "2) Likelihood/Update: Seeing the prizes, how should you update? i.e. How to interpret the data?\n",
13 | "3) Results from Update on the Prior: the Posterior. How to choose from the posterior distribution?\n",
14 | "\n",
15 | "All of these steps require subjective decisions. \n",
16 | "\n",
17 | "**Modeling the contestants**\n",
18 | "If you were a contestant on the show you could use this distribution (fig 6.1) to quantify your prior belief about the price of each showcase (before you even see the prizes). \n",
19 | "To Update, we have to answer these questions:\n",
20 | "\n",
21 | "1) What data should we consider and how should we quantify it?\n",
22 | "2) Can we compute a likelihood function; ie.e for each hypo value of `price`, can we compute the conditional likelihood of the data?"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "metadata": {
29 | "collapsed": true
30 | },
31 | "outputs": [],
32 | "source": []
33 | }
34 | ],
35 | "metadata": {
36 | "kernelspec": {
37 | "display_name": "Python 3",
38 | "language": "python",
39 | "name": "python3"
40 | },
41 | "language_info": {
42 | "codemirror_mode": {
43 | "name": "ipython",
44 | "version": 3
45 | },
46 | "file_extension": ".py",
47 | "mimetype": "text/x-python",
48 | "name": "python",
49 | "nbconvert_exporter": "python",
50 | "pygments_lexer": "ipython3",
51 | "version": "3.5.1"
52 | }
53 | },
54 | "nbformat": 4,
55 | "nbformat_minor": 0
56 | }
57 |
--------------------------------------------------------------------------------
/tutorials/KaggleNLP/word_vectors.py:
--------------------------------------------------------------------------------
1 | # Download the punkt tokenizer for sentence splitting
2 | import nltk.data
3 |
4 | # Import various modules for string cleaning
5 | from bs4 import BeautifulSoup
6 | import re
7 | from nltk.corpus import stopwords
8 | import pandas as pd
9 |
10 | #Load the punkt tokenizer
11 | tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
12 |
13 | # Define a function to split a review into parsed sentences
14 | def review_to_sentences(review, tokenizer, remove_stopwords=False):
15 | """Split a review into parsed sentences. Returns a list of sentences,
16 | where each sentence is a list of words"""
17 | # 1. Use the NLTK tokenizer to split the paragraph into sentences
18 | raw_sentences = tokenizer.tokenize(review.strip())
19 |
20 | # 2. Loop over each sentence
21 | sentences = []
22 | for raw_sentence in raw_sentences:
23 | # If a sentence is empty, skip it
24 | if len(raw_sentence) > 0:
25 | # Otherwise, call review_to_wordlist to get a list of words
26 | sentences.append(review_to_wordlist(raw_sentence, remove_stopwords))
27 |
28 | # Return the list of sentences (each sentence is a list of words, so this
29 | # returns a list of lists)
30 | return sentences
31 |
32 |
33 | def review_to_wordlist(review, remove_stopwords=False):
34 | """Convert a document to a sequence of words, optionally removing stop words
35 | Returns a list of words"""
36 | # 1. Remove HTML
37 | review_text = BeautifulSoup(review).get_text()
38 | # 2. Remove non-letters
39 | review_text = re.sub("[^a-zA-Z]"," ", review_text)
40 | # 3. Convert words to lower case and split them
41 | words = review_text.lower().split()
42 | # 4. Optionally remove stop words (false by default)
43 | if remove_stopwords:
44 | stops = set(stopwords.words("english"))
45 | words = [w for w in words if not w in stops]
46 | # 5. Return a list of words
47 | return(words)
48 |
49 |
--------------------------------------------------------------------------------
/tutorials/K-means/kmeans.py:
--------------------------------------------------------------------------------
1 | # supporting lib for kmeans clustering
2 | # Nitin Borwankar
3 | # Open Data Science Training
4 |
5 | import numpy as np
6 | from scipy.cluster.vq import kmeans,vq
7 | from scipy.spatial.distance import cdist
8 | import matplotlib.pyplot as plt
9 |
10 |
11 | def load_data(fName = '../datasets/UN4col.csv'):
12 | fp = open(fName)
13 | XX = np.loadtxt(fp)
14 | fp.close()
15 | return XX
16 |
17 |
18 | def run_kmeans(X, n=10):
19 | _K = range(1,n)
20 |
21 | # scipy.cluster.vq.kmeans
22 | _KM = [kmeans(X,k) for k in _K] # apply kmeans 1 to 10
23 | _centroids = [cent for (cent,var) in _KM] # cluster centroids
24 |
25 | _D_k = [cdist(X, cent, 'euclidean') for cent in _centroids]
26 |
27 | _cIdx = [np.argmin(D,axis=1) for D in _D_k]
28 | _dist = [np.min(D,axis=1) for D in _D_k]
29 | _avgWithinSS = [sum(d)/X.shape[0] for d in _dist]
30 |
31 | return (_K, _KM, _centroids, _D_k, _cIdx, _dist, _avgWithinSS)
32 |
33 | def plot_elbow_curve(kIdx, K, avgWithinSS):
34 | fig = plt.figure()
35 | ax = fig.add_subplot(111)
36 | ax.plot(K, avgWithinSS, 'b*-')
37 | ax.plot(K[kIdx], avgWithinSS[kIdx], marker='o', markersize=12,
38 | markeredgewidth=2, markeredgecolor='r', markerfacecolor='None')
39 | plt.grid(True)
40 | plt.xlabel('Number of clusters')
41 | plt.ylabel('Average within-cluster sum of squares')
42 | tt = plt.title('Elbow for KMeans clustering')
43 | return(fig,ax)
44 |
45 | def plot_clusters(orig,pred,nx,ny,legend=True):
46 | data = orig
47 | import matplotlib.pyplot as plt
48 | ylabels = { 0:'Male life expectancy in yrs',1:'Female life expectancy in yrs',2:'Infant mortality, per 1000'}
49 | # plot data into three clusters based on value of c
50 | p0 = plt.plot(data[pred==0,nx],data[pred==0,ny],'ro',label='Underdeveloped')
51 | p2 = plt.plot(data[pred==2,nx],data[pred==2,ny],'go',label='Developing')
52 | p1 = plt.plot(data[pred==1,nx],data[pred==1,ny],'bo',label='Developed')
53 |
54 | lx = p1[0].axes.set_xlabel('Per Capita GDP in US$')
55 | ly = p1[0].axes.set_ylabel(ylabels[ny])
56 | tt= plt.title('UN countries Dataset, KMeans clustering with K=3')
57 | if legend:
58 | ll=plt.legend()
59 | return (p0,p1,p2)
60 |
61 |
--------------------------------------------------------------------------------
/gothonweb/bin/app.py:
--------------------------------------------------------------------------------
1 | import web
2 | import map
3 |
4 | urls = ("/game", "GameEngine", "/", "Index")
5 |
6 | app = web.application(urls, globals())
7 |
8 | #little hack so that debug mode works with sessions
9 | #
10 | if web.config.get('_session') is None:
11 | store = web.session.DiskStore('sessions')
12 | session = web.session.Session(app, store, initializer={'room': None})
13 |
14 | web.config._session = session
15 | else:
16 | session = web.config._session
17 |
18 | render = web.template.render('templates/', base="layout")
19 |
20 | class Index(object):
21 | def GET(self):
22 | # this is used to "setup" the session with starting values
23 | #Give us the first session.room = central_corridor
24 | session.room = map.START
25 | #Sends you on your way to GameEngine class
26 | web.seeother("/game")
27 |
28 | class GameEngine(object):
29 | #inside the html page you have standard
30 | def GET(self):
31 | #session.room should = TRUE, either because it has been through Index,
32 | #or been given another link
33 | if session.room:
34 | #make html page from show_room.html. Take session.room as the
35 | #variable in the html page, accessed by $
36 | return render.show_room(room=session.room)
37 | else:
38 | # why is there here? do you need it?
39 | #if something is passed to session.room which is not recognised, it
40 | #won't fail
41 | return render.you_died()
42 |
43 | def POST(self):
44 | #inside