├── .gitignore ├── 0-search ├── degrees │ ├── degrees.py │ ├── small │ │ ├── movies.csv │ │ ├── people.csv │ │ └── stars.csv │ └── util.py └── tictactoe │ ├── OpenSans-Regular.ttf │ ├── requirements.txt │ ├── runner.py │ └── tictactoe.py ├── 1-knowledge ├── knights │ ├── logic.py │ └── puzzle.py └── minesweeper │ ├── assets │ ├── fonts │ │ └── OpenSans-Regular.ttf │ └── images │ │ ├── flag.png │ │ └── mine.png │ ├── minesweeper.py │ ├── requirements.txt │ └── runner.py ├── 2-uncertainity ├── heredity │ ├── data │ │ ├── family0.csv │ │ ├── family1.csv │ │ └── family2.csv │ └── heredity.py └── pagerank │ ├── corpus0 │ ├── 1.html │ ├── 2.html │ ├── 3.html │ └── 4.html │ ├── corpus1 │ ├── bfs.html │ ├── dfs.html │ ├── games.html │ ├── minesweeper.html │ ├── minimax.html │ ├── search.html │ └── tictactoe.html │ ├── corpus2 │ ├── ai.html │ ├── algorithms.html │ ├── c.html │ ├── inference.html │ ├── logic.html │ ├── programming.html │ ├── python.html │ └── recursion.html │ └── pagerank.py ├── 3-optimization └── crossword │ ├── assets │ └── fonts │ │ └── OpenSans-Regular.ttf │ ├── crossword.py │ ├── data │ ├── structure0.txt │ ├── structure1.txt │ ├── structure2.txt │ ├── words0.txt │ ├── words1.txt │ └── words2.txt │ └── generate.py ├── 4-learning ├── nim │ ├── nim.py │ └── play.py └── shopping │ ├── shopping.csv │ └── shopping.py ├── 5-neural-networks └── traffic │ ├── requirements.txt │ └── traffic.py └── 6-language ├── parser ├── parser.py ├── requirements.txt └── sentences │ ├── 1.txt │ ├── 10.txt │ ├── 2.txt │ ├── 3.txt │ ├── 4.txt │ ├── 5.txt │ ├── 6.txt │ ├── 7.txt │ ├── 8.txt │ └── 9.txt └── questions ├── corpus ├── artificial_intelligence.txt ├── machine_learning.txt ├── natural_language_processing.txt ├── neural_network.txt ├── probability.txt └── python.txt ├── questions.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | env*/ 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask instance folder 58 | instance/ 59 | 60 | # Sphinx documentation 61 | docs/_build/ 62 | 63 | # PyBuilder 64 | target/ 65 | 66 | # IPython Notebook 67 | .ipynb_checkpoints 68 | 69 | # pyenv 70 | .python-version 71 | 72 | # For pycharm 73 | .idea/ 74 | 75 | *.sqlite3 76 | 77 | # user-added 78 | bower_components/ 79 | node_modules/ 80 | !assets/src/ 81 | !assets/dist/ 82 | !assets/themes/*/src/ 83 | !assets/themes/*/dist/ 84 | media/ 85 | !fab/local_settings.py 86 | .DS_Store 87 | static/ 88 | 89 | .vscode -------------------------------------------------------------------------------- /0-search/degrees/degrees.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import sys 3 | 4 | from util import Node, StackFrontier, QueueFrontier 5 | 6 | # Maps names to a set of corresponding person_ids 7 | names = {} 8 | 9 | # Maps person_ids to a dictionary of: name, birth, movies (a set of movie_ids) 10 | people = {} 11 | 12 | # Maps movie_ids to a dictionary of: title, year, stars (a set of person_ids) 13 | movies = {} 14 | 15 | 16 | def load_data(directory): 17 | """ 18 | Load data from CSV files into memory. 19 | """ 20 | # Load people 21 | with open(f"{directory}/people.csv", encoding="utf-8") as f: 22 | reader = csv.DictReader(f) 23 | for row in reader: 24 | people[row["id"]] = { 25 | "name": row["name"], 26 | "birth": row["birth"], 27 | "movies": set() 28 | } 29 | if row["name"].lower() not in names: 30 | names[row["name"].lower()] = {row["id"]} 31 | else: 32 | names[row["name"].lower()].add(row["id"]) 33 | 34 | # Load movies 35 | with open(f"{directory}/movies.csv", encoding="utf-8") as f: 36 | reader = csv.DictReader(f) 37 | for row in reader: 38 | movies[row["id"]] = { 39 | "title": row["title"], 40 | "year": row["year"], 41 | "stars": set() 42 | } 43 | 44 | # Load stars 45 | with open(f"{directory}/stars.csv", encoding="utf-8") as f: 46 | reader = csv.DictReader(f) 47 | for row in reader: 48 | try: 49 | people[row["person_id"]]["movies"].add(row["movie_id"]) 50 | movies[row["movie_id"]]["stars"].add(row["person_id"]) 51 | except KeyError: 52 | pass 53 | 54 | 55 | def main(): 56 | if len(sys.argv) > 2: 57 | sys.exit("Usage: python degrees.py [directory]") 58 | directory = sys.argv[1] if len(sys.argv) == 2 else "large" 59 | 60 | # Load data from files into memory 61 | print("Loading data...") 62 | load_data(directory) 63 | print("Data loaded.") 64 | 65 | source = person_id_for_name(input("Name: ")) 66 | if source is None: 67 | sys.exit("Person not found.") 68 | target = person_id_for_name(input("Name: ")) 69 | if target is None: 70 | sys.exit("Person not found.") 71 | 72 | path = shortest_path(source, target) 73 | 74 | if path is None: 75 | print("Not connected.") 76 | else: 77 | degrees = len(path) 78 | print(f"{degrees} degrees of separation.") 79 | path = [(None, source)] + path 80 | for i in range(degrees): 81 | person1 = people[path[i][1]]["name"] 82 | person2 = people[path[i + 1][1]]["name"] 83 | movie = movies[path[i + 1][0]]["title"] 84 | print(f"{i + 1}: {person1} and {person2} starred in {movie}") 85 | 86 | 87 | def get_solution(node, target): 88 | # If node is the goal, then we have a solution 89 | if node.state == target: 90 | path = [] 91 | while node.parent is not None: 92 | path.append((node.action, node.state)) 93 | node = node.parent 94 | path.reverse() 95 | return path 96 | return None 97 | 98 | 99 | def shortest_path(source, target): 100 | """ 101 | Returns the shortest list of (movie_id, person_id) pairs 102 | that connect the source to the target. 103 | 104 | If no possible path, returns None. 105 | """ 106 | # TODO 107 | # Keep track of number of states explored 108 | num_explored = 0 109 | 110 | # Initialize frontier to just the starting position 111 | start = Node(state=source, parent=None, action=None) 112 | frontier = QueueFrontier() 113 | frontier.add(start) 114 | 115 | # Initialize an empty explored set 116 | explored = set() 117 | 118 | # Keep looping until solution found 119 | while True: 120 | 121 | # If nothing left in frontier, then no path 122 | if frontier.empty(): 123 | return None 124 | 125 | # Choose a node from the frontier 126 | node = frontier.remove() 127 | num_explored += 1 128 | 129 | solution = get_solution(node, target) 130 | if solution: 131 | return solution 132 | 133 | # Mark node as explored 134 | explored.add(node.state) 135 | 136 | # Add neighbors to frontier 137 | for movie, state in neighbors_for_person(node.state): 138 | if not frontier.contains_state(state) and state not in explored: 139 | child = Node(state=state, parent=node, action=movie) 140 | solution = get_solution(child, target) 141 | if solution: 142 | return solution 143 | frontier.add(child) 144 | 145 | return None 146 | 147 | 148 | def person_id_for_name(name): 149 | """ 150 | Returns the IMDB id for a person's name, 151 | resolving ambiguities as needed. 152 | """ 153 | person_ids = list(names.get(name.lower(), set())) 154 | if len(person_ids) == 0: 155 | return None 156 | elif len(person_ids) > 1: 157 | print(f"Which '{name}'?") 158 | for person_id in person_ids: 159 | person = people[person_id] 160 | name = person["name"] 161 | birth = person["birth"] 162 | print(f"ID: {person_id}, Name: {name}, Birth: {birth}") 163 | try: 164 | person_id = input("Intended Person ID: ") 165 | if person_id in person_ids: 166 | return person_id 167 | except ValueError: 168 | pass 169 | return None 170 | else: 171 | return person_ids[0] 172 | 173 | 174 | def neighbors_for_person(person_id): 175 | """ 176 | Returns (movie_id, person_id) pairs for people 177 | who starred with a given person. 178 | """ 179 | movie_ids = people[person_id]["movies"] 180 | neighbors = set() 181 | for movie_id in movie_ids: 182 | for person_id in movies[movie_id]["stars"]: 183 | neighbors.add((movie_id, person_id)) 184 | return neighbors 185 | 186 | 187 | if __name__ == "__main__": 188 | main() 189 | -------------------------------------------------------------------------------- /0-search/degrees/small/movies.csv: -------------------------------------------------------------------------------- 1 | id,title,year 2 | 112384,"Apollo 13",1995 3 | 104257,"A Few Good Men",1992 4 | 109830,"Forrest Gump",1994 5 | 93779,"The Princess Bride",1987 6 | 95953,"Rain Man",1988 7 | -------------------------------------------------------------------------------- /0-search/degrees/small/people.csv: -------------------------------------------------------------------------------- 1 | id,name,birth 2 | 102,"Kevin Bacon",1958 3 | 129,"Tom Cruise",1962 4 | 144,"Cary Elwes",1962 5 | 158,"Tom Hanks",1956 6 | 1597,"Mandy Patinkin",1952 7 | 163,"Dustin Hoffman",1937 8 | 1697,"Chris Sarandon",1942 9 | 193,"Demi Moore",1962 10 | 197,"Jack Nicholson",1937 11 | 200,"Bill Paxton",1955 12 | 398,"Sally Field",1946 13 | 420,"Valeria Golino",1965 14 | 596520,"Gerald R. Molen",1935 15 | 641,"Gary Sinise",1955 16 | 705,"Robin Wright",1966 17 | 914612,"Emma Watson",1990 18 | -------------------------------------------------------------------------------- /0-search/degrees/small/stars.csv: -------------------------------------------------------------------------------- 1 | person_id,movie_id 2 | 102,104257 3 | 102,112384 4 | 129,104257 5 | 129,95953 6 | 144,93779 7 | 158,109830 8 | 158,112384 9 | 1597,93779 10 | 163,95953 11 | 1697,93779 12 | 193,104257 13 | 197,104257 14 | 200,112384 15 | 398,109830 16 | 420,95953 17 | 596520,95953 18 | 641,109830 19 | 641,112384 20 | 705,109830 21 | 705,93779 22 | -------------------------------------------------------------------------------- /0-search/degrees/util.py: -------------------------------------------------------------------------------- 1 | class Node(): 2 | def __init__(self, state, parent, action): 3 | self.state = state 4 | self.parent = parent 5 | self.action = action 6 | 7 | 8 | class StackFrontier(): 9 | def __init__(self): 10 | self.frontier = [] 11 | 12 | def add(self, node): 13 | self.frontier.append(node) 14 | 15 | def contains_state(self, state): 16 | return any(node.state == state for node in self.frontier) 17 | 18 | def empty(self): 19 | return len(self.frontier) == 0 20 | 21 | def remove(self): 22 | if self.empty(): 23 | raise Exception("empty frontier") 24 | else: 25 | node = self.frontier[-1] 26 | self.frontier = self.frontier[:-1] 27 | return node 28 | 29 | 30 | class QueueFrontier(StackFrontier): 31 | 32 | def remove(self): 33 | if self.empty(): 34 | raise Exception("empty frontier") 35 | else: 36 | node = self.frontier[0] 37 | self.frontier = self.frontier[1:] 38 | return node 39 | -------------------------------------------------------------------------------- /0-search/tictactoe/OpenSans-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amangarg078/cs50ai/3cdecbfaeda1440b829b7c0913bcf2a991108936/0-search/tictactoe/OpenSans-Regular.ttf -------------------------------------------------------------------------------- /0-search/tictactoe/requirements.txt: -------------------------------------------------------------------------------- 1 | pygame 2 | -------------------------------------------------------------------------------- /0-search/tictactoe/runner.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import sys 3 | import time 4 | 5 | import tictactoe as ttt 6 | 7 | pygame.init() 8 | size = width, height = 600, 400 9 | 10 | # Colors 11 | black = (0, 0, 0) 12 | white = (255, 255, 255) 13 | 14 | screen = pygame.display.set_mode(size) 15 | 16 | mediumFont = pygame.font.Font("OpenSans-Regular.ttf", 28) 17 | largeFont = pygame.font.Font("OpenSans-Regular.ttf", 40) 18 | moveFont = pygame.font.Font("OpenSans-Regular.ttf", 60) 19 | 20 | user = None 21 | board = ttt.initial_state() 22 | ai_turn = False 23 | 24 | while True: 25 | 26 | for event in pygame.event.get(): 27 | if event.type == pygame.QUIT: 28 | sys.exit() 29 | 30 | screen.fill(black) 31 | 32 | # Let user choose a player. 33 | if user is None: 34 | 35 | # Draw title 36 | title = largeFont.render("Play Tic-Tac-Toe", True, white) 37 | titleRect = title.get_rect() 38 | titleRect.center = ((width / 2), 50) 39 | screen.blit(title, titleRect) 40 | 41 | # Draw buttons 42 | playXButton = pygame.Rect((width / 8), (height / 2), width / 4, 50) 43 | playX = mediumFont.render("Play as X", True, black) 44 | playXRect = playX.get_rect() 45 | playXRect.center = playXButton.center 46 | pygame.draw.rect(screen, white, playXButton) 47 | screen.blit(playX, playXRect) 48 | 49 | playOButton = pygame.Rect(5 * (width / 8), (height / 2), width / 4, 50) 50 | playO = mediumFont.render("Play as O", True, black) 51 | playORect = playO.get_rect() 52 | playORect.center = playOButton.center 53 | pygame.draw.rect(screen, white, playOButton) 54 | screen.blit(playO, playORect) 55 | 56 | # Check if button is clicked 57 | click, _, _ = pygame.mouse.get_pressed() 58 | if click == 1: 59 | mouse = pygame.mouse.get_pos() 60 | if playXButton.collidepoint(mouse): 61 | time.sleep(0.2) 62 | user = ttt.X 63 | elif playOButton.collidepoint(mouse): 64 | time.sleep(0.2) 65 | user = ttt.O 66 | 67 | else: 68 | 69 | # Draw game board 70 | tile_size = 80 71 | tile_origin = (width / 2 - (1.5 * tile_size), 72 | height / 2 - (1.5 * tile_size)) 73 | tiles = [] 74 | for i in range(3): 75 | row = [] 76 | for j in range(3): 77 | rect = pygame.Rect( 78 | tile_origin[0] + j * tile_size, 79 | tile_origin[1] + i * tile_size, 80 | tile_size, tile_size 81 | ) 82 | pygame.draw.rect(screen, white, rect, 3) 83 | 84 | if board[i][j] != ttt.EMPTY: 85 | move = moveFont.render(board[i][j], True, white) 86 | moveRect = move.get_rect() 87 | moveRect.center = rect.center 88 | screen.blit(move, moveRect) 89 | row.append(rect) 90 | tiles.append(row) 91 | 92 | game_over = ttt.terminal(board) 93 | player = ttt.player(board) 94 | 95 | # Show title 96 | if game_over: 97 | winner = ttt.winner(board) 98 | if winner is None: 99 | title = f"Game Over: Tie." 100 | else: 101 | title = f"Game Over: {winner} wins." 102 | elif user == player: 103 | title = f"Play as {user}" 104 | else: 105 | title = f"Computer thinking..." 106 | title = largeFont.render(title, True, white) 107 | titleRect = title.get_rect() 108 | titleRect.center = ((width / 2), 30) 109 | screen.blit(title, titleRect) 110 | 111 | # Check for AI move 112 | if user != player and not game_over: 113 | if ai_turn: 114 | time.sleep(0.5) 115 | move = ttt.minimax(board) 116 | board = ttt.result(board, move) 117 | ai_turn = False 118 | else: 119 | ai_turn = True 120 | 121 | # Check for a user move 122 | click, _, _ = pygame.mouse.get_pressed() 123 | if click == 1 and user == player and not game_over: 124 | mouse = pygame.mouse.get_pos() 125 | for i in range(3): 126 | for j in range(3): 127 | if (board[i][j] == ttt.EMPTY and tiles[i][j].collidepoint(mouse)): 128 | board = ttt.result(board, (i, j)) 129 | 130 | if game_over: 131 | againButton = pygame.Rect(width / 3, height - 65, width / 3, 50) 132 | again = mediumFont.render("Play Again", True, black) 133 | againRect = again.get_rect() 134 | againRect.center = againButton.center 135 | pygame.draw.rect(screen, white, againButton) 136 | screen.blit(again, againRect) 137 | click, _, _ = pygame.mouse.get_pressed() 138 | if click == 1: 139 | mouse = pygame.mouse.get_pos() 140 | if againButton.collidepoint(mouse): 141 | time.sleep(0.2) 142 | user = None 143 | board = ttt.initial_state() 144 | ai_turn = False 145 | 146 | pygame.display.flip() 147 | -------------------------------------------------------------------------------- /0-search/tictactoe/tictactoe.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tic Tac Toe Player 3 | """ 4 | 5 | import copy 6 | import math 7 | import random 8 | 9 | 10 | X = "X" 11 | O = "O" 12 | EMPTY = None 13 | 14 | 15 | def initial_state(): 16 | """ 17 | Returns starting state of the board. 18 | """ 19 | return [[EMPTY, EMPTY, EMPTY], 20 | [EMPTY, EMPTY, EMPTY], 21 | [EMPTY, EMPTY, EMPTY]] 22 | 23 | 24 | def player(board): 25 | """ 26 | Returns player who has the next turn on a board. 27 | """ 28 | count = 0 29 | for i in board: 30 | for j in i: 31 | if j: 32 | count += 1 33 | if count % 2 != 0: 34 | return O 35 | return X 36 | 37 | 38 | def actions(board): 39 | """ 40 | Returns set of all possible actions (i, j) available on the board. 41 | """ 42 | res = set() 43 | board_len = len(board) 44 | for i in range(board_len): 45 | for j in range(board_len): 46 | if board[i][j] == EMPTY: 47 | res.add((i, j)) 48 | return res 49 | 50 | 51 | def result(board, action): 52 | """ 53 | Returns the board that results from making move (i, j) on the board. 54 | """ 55 | curr_player = player(board) 56 | result_board = copy.deepcopy(board) 57 | (i, j) = action 58 | result_board[i][j] = curr_player 59 | return result_board 60 | 61 | 62 | def get_horizontal_winner(board): 63 | # check horizontally 64 | winner_val = None 65 | board_len = len(board) 66 | for i in range(board_len): 67 | winner_val = board[i][0] 68 | for j in range(board_len): 69 | if board[i][j] != winner_val: 70 | winner_val = None 71 | if winner_val: 72 | return winner_val 73 | return winner_val 74 | 75 | 76 | def get_vertical_winner(board): 77 | # check vertically 78 | winner_val = None 79 | board_len = len(board) 80 | for i in range(board_len): 81 | winner_val = board[0][i] 82 | for j in range(board_len): 83 | if board[j][i] != winner_val: 84 | winner_val = None 85 | if winner_val: 86 | return winner_val 87 | return winner_val 88 | 89 | 90 | def get_diagonal_winner(board): 91 | # check diagonally 92 | winner_val = None 93 | board_len = len(board) 94 | winner_val = board[0][0] 95 | for i in range(board_len): 96 | if board[i][i] != winner_val: 97 | winner_val = None 98 | if winner_val: 99 | return winner_val 100 | 101 | winner_val = board[0][board_len - 1] 102 | for i in range(board_len): 103 | j = board_len - 1 - i 104 | if board[i][j] != winner_val: 105 | winner_val = None 106 | 107 | return winner_val 108 | 109 | 110 | def winner(board): 111 | """ 112 | Returns the winner of the game, if there is one. 113 | """ 114 | winner_val = get_horizontal_winner(board) or get_vertical_winner(board) or get_diagonal_winner(board) or None 115 | return winner_val 116 | 117 | 118 | def terminal(board): 119 | """ 120 | Returns True if game is over, False otherwise. 121 | """ 122 | if winner(board) != None: 123 | return True 124 | 125 | for i in board: 126 | for j in i: 127 | if j == EMPTY: 128 | return False 129 | return True 130 | 131 | def utility(board): 132 | """ 133 | Returns 1 if X has won the game, -1 if O has won, 0 otherwise. 134 | """ 135 | winner_val = winner(board) 136 | if winner_val == X: 137 | return 1 138 | elif winner_val == O: 139 | return -1 140 | return 0 141 | 142 | 143 | def max_val(board): 144 | # get max-value 145 | if terminal(board): 146 | return utility(board) 147 | v = -math.inf 148 | for action in actions(board): 149 | v = max(v, min_val(result(board, action))) 150 | return v 151 | 152 | 153 | def min_val(board): 154 | # get min-value 155 | if terminal(board): 156 | return utility(board) 157 | v = math.inf 158 | for action in actions(board): 159 | v = min(v, max_val(result(board, action))) 160 | return v 161 | 162 | 163 | def minimax(board): 164 | """ 165 | Returns the optimal action for the current player on the board. 166 | """ 167 | if board == initial_state(): 168 | return (random.randint(0, 2), random.randint(0, 2)) 169 | curr_player = player(board) 170 | action_to_return = None 171 | if curr_player == X: 172 | val = -math.inf 173 | for action in actions(board): 174 | min_val_result = min_val(result(board, action)) 175 | if val < min_val_result: 176 | val = min_val_result 177 | action_to_return = action 178 | elif curr_player == O: 179 | val = math.inf 180 | for action in actions(board): 181 | max_val_result = max_val(result(board, action)) 182 | if val > max_val_result: 183 | val = max_val_result 184 | action_to_return = action 185 | return action_to_return 186 | -------------------------------------------------------------------------------- /1-knowledge/knights/logic.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | 4 | class Sentence(): 5 | 6 | def evaluate(self, model): 7 | """Evaluates the logical sentence.""" 8 | raise Exception("nothing to evaluate") 9 | 10 | def formula(self): 11 | """Returns string formula representing logical sentence.""" 12 | return "" 13 | 14 | def symbols(self): 15 | """Returns a set of all symbols in the logical sentence.""" 16 | return set() 17 | 18 | @classmethod 19 | def validate(cls, sentence): 20 | if not isinstance(sentence, Sentence): 21 | raise TypeError("must be a logical sentence") 22 | 23 | @classmethod 24 | def parenthesize(cls, s): 25 | """Parenthesizes an expression if not already parenthesized.""" 26 | def balanced(s): 27 | """Checks if a string has balanced parentheses.""" 28 | count = 0 29 | for c in s: 30 | if c == "(": 31 | count += 1 32 | elif c == ")": 33 | if count <= 0: 34 | return False 35 | count -= 1 36 | return count == 0 37 | if not len(s) or s.isalpha() or ( 38 | s[0] == "(" and s[-1] == ")" and balanced(s[1:-1]) 39 | ): 40 | return s 41 | else: 42 | return f"({s})" 43 | 44 | 45 | class Symbol(Sentence): 46 | 47 | def __init__(self, name): 48 | self.name = name 49 | 50 | def __eq__(self, other): 51 | return isinstance(other, Symbol) and self.name == other.name 52 | 53 | def __hash__(self): 54 | return hash(("symbol", self.name)) 55 | 56 | def __repr__(self): 57 | return self.name 58 | 59 | def evaluate(self, model): 60 | try: 61 | return bool(model[self.name]) 62 | except KeyError: 63 | raise Exception(f"variable {self.name} not in model") 64 | 65 | def formula(self): 66 | return self.name 67 | 68 | def symbols(self): 69 | return {self.name} 70 | 71 | 72 | class Not(Sentence): 73 | def __init__(self, operand): 74 | Sentence.validate(operand) 75 | self.operand = operand 76 | 77 | def __eq__(self, other): 78 | return isinstance(other, Not) and self.operand == other.operand 79 | 80 | def __hash__(self): 81 | return hash(("not", hash(self.operand))) 82 | 83 | def __repr__(self): 84 | return f"Not({self.operand})" 85 | 86 | def evaluate(self, model): 87 | return not self.operand.evaluate(model) 88 | 89 | def formula(self): 90 | return "¬" + Sentence.parenthesize(self.operand.formula()) 91 | 92 | def symbols(self): 93 | return self.operand.symbols() 94 | 95 | 96 | class And(Sentence): 97 | def __init__(self, *conjuncts): 98 | for conjunct in conjuncts: 99 | Sentence.validate(conjunct) 100 | self.conjuncts = list(conjuncts) 101 | 102 | def __eq__(self, other): 103 | return isinstance(other, And) and self.conjuncts == other.conjuncts 104 | 105 | def __hash__(self): 106 | return hash( 107 | ("and", tuple(hash(conjunct) for conjunct in self.conjuncts)) 108 | ) 109 | 110 | def __repr__(self): 111 | conjunctions = ", ".join( 112 | [str(conjunct) for conjunct in self.conjuncts] 113 | ) 114 | return f"And({conjunctions})" 115 | 116 | def add(self, conjunct): 117 | Sentence.validate(conjunct) 118 | self.conjuncts.append(conjunct) 119 | 120 | def evaluate(self, model): 121 | return all(conjunct.evaluate(model) for conjunct in self.conjuncts) 122 | 123 | def formula(self): 124 | if len(self.conjuncts) == 1: 125 | return self.conjuncts[0].formula() 126 | return " ∧ ".join([Sentence.parenthesize(conjunct.formula()) 127 | for conjunct in self.conjuncts]) 128 | 129 | def symbols(self): 130 | return set.union(*[conjunct.symbols() for conjunct in self.conjuncts]) 131 | 132 | 133 | class Or(Sentence): 134 | def __init__(self, *disjuncts): 135 | for disjunct in disjuncts: 136 | Sentence.validate(disjunct) 137 | self.disjuncts = list(disjuncts) 138 | 139 | def __eq__(self, other): 140 | return isinstance(other, Or) and self.disjuncts == other.disjuncts 141 | 142 | def __hash__(self): 143 | return hash( 144 | ("or", tuple(hash(disjunct) for disjunct in self.disjuncts)) 145 | ) 146 | 147 | def __repr__(self): 148 | disjuncts = ", ".join([str(disjunct) for disjunct in self.disjuncts]) 149 | return f"Or({disjuncts})" 150 | 151 | def evaluate(self, model): 152 | return any(disjunct.evaluate(model) for disjunct in self.disjuncts) 153 | 154 | def formula(self): 155 | if len(self.disjuncts) == 1: 156 | return self.disjuncts[0].formula() 157 | return " ∨ ".join([Sentence.parenthesize(disjunct.formula()) 158 | for disjunct in self.disjuncts]) 159 | 160 | def symbols(self): 161 | return set.union(*[disjunct.symbols() for disjunct in self.disjuncts]) 162 | 163 | 164 | class Implication(Sentence): 165 | def __init__(self, antecedent, consequent): 166 | Sentence.validate(antecedent) 167 | Sentence.validate(consequent) 168 | self.antecedent = antecedent 169 | self.consequent = consequent 170 | 171 | def __eq__(self, other): 172 | return (isinstance(other, Implication) 173 | and self.antecedent == other.antecedent 174 | and self.consequent == other.consequent) 175 | 176 | def __hash__(self): 177 | return hash(("implies", hash(self.antecedent), hash(self.consequent))) 178 | 179 | def __repr__(self): 180 | return f"Implication({self.antecedent}, {self.consequent})" 181 | 182 | def evaluate(self, model): 183 | return ((not self.antecedent.evaluate(model)) 184 | or self.consequent.evaluate(model)) 185 | 186 | def formula(self): 187 | antecedent = Sentence.parenthesize(self.antecedent.formula()) 188 | consequent = Sentence.parenthesize(self.consequent.formula()) 189 | return f"{antecedent} => {consequent}" 190 | 191 | def symbols(self): 192 | return set.union(self.antecedent.symbols(), self.consequent.symbols()) 193 | 194 | 195 | class Biconditional(Sentence): 196 | def __init__(self, left, right): 197 | Sentence.validate(left) 198 | Sentence.validate(right) 199 | self.left = left 200 | self.right = right 201 | 202 | def __eq__(self, other): 203 | return (isinstance(other, Biconditional) 204 | and self.left == other.left 205 | and self.right == other.right) 206 | 207 | def __hash__(self): 208 | return hash(("biconditional", hash(self.left), hash(self.right))) 209 | 210 | def __repr__(self): 211 | return f"Biconditional({self.left}, {self.right})" 212 | 213 | def evaluate(self, model): 214 | return ((self.left.evaluate(model) 215 | and self.right.evaluate(model)) 216 | or (not self.left.evaluate(model) 217 | and not self.right.evaluate(model))) 218 | 219 | def formula(self): 220 | left = Sentence.parenthesize(str(self.left)) 221 | right = Sentence.parenthesize(str(self.right)) 222 | return f"{left} <=> {right}" 223 | 224 | def symbols(self): 225 | return set.union(self.left.symbols(), self.right.symbols()) 226 | 227 | 228 | def model_check(knowledge, query): 229 | """Checks if knowledge base entails query.""" 230 | 231 | def check_all(knowledge, query, symbols, model): 232 | """Checks if knowledge base entails query, given a particular model.""" 233 | 234 | # If model has an assignment for each symbol 235 | if not symbols: 236 | 237 | # If knowledge base is true in model, then query must also be true 238 | if knowledge.evaluate(model): 239 | return query.evaluate(model) 240 | return True 241 | else: 242 | 243 | # Choose one of the remaining unused symbols 244 | remaining = symbols.copy() 245 | p = remaining.pop() 246 | 247 | # Create a model where the symbol is true 248 | model_true = model.copy() 249 | model_true[p] = True 250 | 251 | # Create a model where the symbol is false 252 | model_false = model.copy() 253 | model_false[p] = False 254 | 255 | # Ensure entailment holds in both models 256 | return (check_all(knowledge, query, remaining, model_true) and 257 | check_all(knowledge, query, remaining, model_false)) 258 | 259 | # Get all symbols in both knowledge and query 260 | symbols = set.union(knowledge.symbols(), query.symbols()) 261 | 262 | # Check that knowledge entails query 263 | return check_all(knowledge, query, symbols, dict()) 264 | -------------------------------------------------------------------------------- /1-knowledge/knights/puzzle.py: -------------------------------------------------------------------------------- 1 | from logic import * 2 | 3 | AKnight = Symbol("A is a Knight") 4 | AKnave = Symbol("A is a Knave") 5 | 6 | BKnight = Symbol("B is a Knight") 7 | BKnave = Symbol("B is a Knave") 8 | 9 | CKnight = Symbol("C is a Knight") 10 | CKnave = Symbol("C is a Knave") 11 | 12 | # Puzzle 0 13 | # A says "I am both a knight and a knave." 14 | knowledge0 = And( 15 | Or(AKnight, AKnave), 16 | Implication(AKnight, Not(AKnave)), 17 | Implication(AKnave, Not(AKnight)), 18 | 19 | Biconditional(AKnight, And(AKnight, AKnave)) 20 | ) 21 | 22 | # Puzzle 1 23 | # A says "We are both knaves." 24 | # B says nothing. 25 | knowledge1 = And( 26 | Or(AKnight, AKnave), 27 | Or(BKnight, BKnave), 28 | Implication(AKnight, Not(AKnave)), 29 | Implication(AKnave, Not(AKnight)), 30 | Implication(BKnight, Not(BKnave)), 31 | Implication(BKnave, Not(BKnight)), 32 | 33 | Biconditional(AKnight, And(AKnave, BKnave)) 34 | ) 35 | 36 | # Puzzle 2 37 | # A says "We are the same kind." 38 | # B says "We are of different kinds." 39 | knowledge2 = And( 40 | Or(AKnight, AKnave), 41 | Or(BKnight, BKnave), 42 | Implication(AKnight, Not(AKnave)), 43 | Implication(AKnave, Not(AKnight)), 44 | Implication(BKnight, Not(BKnave)), 45 | Implication(BKnave, Not(BKnight)), 46 | 47 | Biconditional(AKnight, Or(And(AKnight, BKnight), And(AKnave, BKnave))), 48 | Biconditional(BKnight, Or(And(AKnight, BKnave), And(AKnave, BKnight))) 49 | ) 50 | 51 | # Puzzle 3 52 | # A says either "I am a knight." or "I am a knave.", but you don't know which. 53 | # B says "A said 'I am a knave'." 54 | # B says "C is a knave." 55 | # C says "A is a knight." 56 | knowledge3 = And( 57 | Or(AKnight, AKnave), 58 | Or(BKnight, BKnave), 59 | Or(CKnight, CKnave), 60 | Implication(AKnight, Not(AKnave)), 61 | Implication(AKnave, Not(AKnight)), 62 | Implication(BKnight, Not(BKnave)), 63 | Implication(BKnave, Not(BKnight)), 64 | Implication(CKnight, Not(CKnave)), 65 | Implication(CKnave, Not(CKnight)), 66 | 67 | Biconditional(AKnight, Or(AKnight, AKnave)), 68 | Biconditional(BKnight, Biconditional(AKnight, AKnave)), 69 | Biconditional(BKnight, CKnave), 70 | Biconditional(CKnight, AKnight), 71 | 72 | ) 73 | 74 | 75 | def main(): 76 | symbols = [AKnight, AKnave, BKnight, BKnave, CKnight, CKnave] 77 | puzzles = [ 78 | ("Puzzle 0", knowledge0), 79 | ("Puzzle 1", knowledge1), 80 | ("Puzzle 2", knowledge2), 81 | ("Puzzle 3", knowledge3) 82 | ] 83 | for puzzle, knowledge in puzzles: 84 | print(puzzle) 85 | if len(knowledge.conjuncts) == 0: 86 | print(" Not yet implemented.") 87 | else: 88 | for symbol in symbols: 89 | if model_check(knowledge, symbol): 90 | print(f" {symbol}") 91 | 92 | 93 | if __name__ == "__main__": 94 | main() 95 | -------------------------------------------------------------------------------- /1-knowledge/minesweeper/assets/fonts/OpenSans-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amangarg078/cs50ai/3cdecbfaeda1440b829b7c0913bcf2a991108936/1-knowledge/minesweeper/assets/fonts/OpenSans-Regular.ttf -------------------------------------------------------------------------------- /1-knowledge/minesweeper/assets/images/flag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amangarg078/cs50ai/3cdecbfaeda1440b829b7c0913bcf2a991108936/1-knowledge/minesweeper/assets/images/flag.png -------------------------------------------------------------------------------- /1-knowledge/minesweeper/assets/images/mine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amangarg078/cs50ai/3cdecbfaeda1440b829b7c0913bcf2a991108936/1-knowledge/minesweeper/assets/images/mine.png -------------------------------------------------------------------------------- /1-knowledge/minesweeper/minesweeper.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import random 3 | 4 | 5 | class Minesweeper(): 6 | """ 7 | Minesweeper game representation 8 | """ 9 | 10 | def __init__(self, height=8, width=8, mines=8): 11 | 12 | # Set initial width, height, and number of mines 13 | self.height = height 14 | self.width = width 15 | self.mines = set() 16 | 17 | # Initialize an empty field with no mines 18 | self.board = [] 19 | for i in range(self.height): 20 | row = [] 21 | for j in range(self.width): 22 | row.append(False) 23 | self.board.append(row) 24 | 25 | # Add mines randomly 26 | while len(self.mines) != mines: 27 | i = random.randrange(height) 28 | j = random.randrange(width) 29 | if not self.board[i][j]: 30 | self.mines.add((i, j)) 31 | self.board[i][j] = True 32 | 33 | # At first, player has found no mines 34 | self.mines_found = set() 35 | 36 | def print(self): 37 | """ 38 | Prints a text-based representation 39 | of where mines are located. 40 | """ 41 | for i in range(self.height): 42 | print("--" * self.width + "-") 43 | for j in range(self.width): 44 | if self.board[i][j]: 45 | print("|X", end="") 46 | else: 47 | print("| ", end="") 48 | print("|") 49 | print("--" * self.width + "-") 50 | 51 | def is_mine(self, cell): 52 | i, j = cell 53 | return self.board[i][j] 54 | 55 | def nearby_mines(self, cell): 56 | """ 57 | Returns the number of mines that are 58 | within one row and column of a given cell, 59 | not including the cell itself. 60 | """ 61 | 62 | # Keep count of nearby mines 63 | count = 0 64 | 65 | # Loop over all cells within one row and column 66 | for i in range(cell[0] - 1, cell[0] + 2): 67 | for j in range(cell[1] - 1, cell[1] + 2): 68 | 69 | # Ignore the cell itself 70 | if (i, j) == cell: 71 | continue 72 | 73 | # Update count if cell in bounds and is mine 74 | if 0 <= i < self.height and 0 <= j < self.width: 75 | if self.board[i][j]: 76 | count += 1 77 | 78 | return count 79 | 80 | def won(self): 81 | """ 82 | Checks if all mines have been flagged. 83 | """ 84 | return self.mines_found == self.mines 85 | 86 | 87 | class Sentence(): 88 | """ 89 | Logical statement about a Minesweeper game 90 | A sentence consists of a set of board cells, 91 | and a count of the number of those cells which are mines. 92 | """ 93 | 94 | def __init__(self, cells, count): 95 | self.cells = set(cells) 96 | self.count = count 97 | 98 | def __eq__(self, other): 99 | return self.cells == other.cells and self.count == other.count 100 | 101 | def __str__(self): 102 | return f"{self.cells} = {self.count}" 103 | 104 | def known_mines(self): 105 | """ 106 | Returns the set of all cells in self.cells known to be mines. 107 | """ 108 | if len(self.cells) == self.count: 109 | return set(self.cells) 110 | return set() 111 | 112 | def known_safes(self): 113 | """ 114 | Returns the set of all cells in self.cells known to be safe. 115 | """ 116 | if self.count == 0: 117 | return set(self.cells) 118 | return set() 119 | 120 | 121 | def mark_mine(self, cell): 122 | """ 123 | Updates internal knowledge representation given the fact that 124 | a cell is known to be a mine. 125 | """ 126 | if cell in self.cells: 127 | self.cells.remove(cell) 128 | self.count -= 1 129 | return 1 130 | return 0 131 | 132 | def mark_safe(self, cell): 133 | """ 134 | Updates internal knowledge representation given the fact that 135 | a cell is known to be safe. 136 | """ 137 | if cell in self.cells: 138 | self.cells.remove(cell) 139 | return 1 140 | return 0 141 | 142 | 143 | class MinesweeperAI(): 144 | """ 145 | Minesweeper game player 146 | """ 147 | 148 | def __init__(self, height=8, width=8): 149 | 150 | # Set initial height and width 151 | self.height = height 152 | self.width = width 153 | 154 | # Keep track of which cells have been clicked on 155 | self.moves_made = set() 156 | 157 | # Keep track of cells known to be safe or mines 158 | self.mines = set() 159 | self.safes = set() 160 | 161 | # List of sentences about the game known to be true 162 | self.knowledge = [] 163 | 164 | def mark_mine(self, cell): 165 | """ 166 | Marks a cell as a mine, and updates all knowledge 167 | to mark that cell as a mine as well. 168 | """ 169 | counter = 0 170 | self.mines.add(cell) 171 | for sentence in self.knowledge: 172 | counter += sentence.mark_mine(cell) 173 | return counter 174 | 175 | def mark_safe(self, cell): 176 | """ 177 | Marks a cell as safe, and updates all knowledge 178 | to mark that cell as safe as well. 179 | """ 180 | counter = 0 181 | self.safes.add(cell) 182 | for sentence in self.knowledge: 183 | counter += sentence.mark_safe(cell) 184 | return counter 185 | 186 | def add_knowledge(self, cell, count): 187 | """ 188 | Called when the Minesweeper board tells us, for a given 189 | safe cell, how many neighboring cells have mines in them. 190 | 191 | This function should: 192 | 1) mark the cell as a move that has been made 193 | 2) mark the cell as safe 194 | 3) add a new sentence to the AI's knowledge base 195 | based on the value of `cell` and `count` 196 | 4) mark any additional cells as safe or as mines 197 | if it can be concluded based on the AI's knowledge base 198 | 5) add any new sentences to the AI's knowledge base 199 | if they can be inferred from existing knowledge 200 | """ 201 | # mark the cell as a move that has been made 202 | self.moves_made.add(cell) 203 | 204 | # mark the cell as safe 205 | self.mark_safe(cell) 206 | 207 | neighbors = set() 208 | i, j = cell 209 | 210 | for x in range(max(0, i-1), min(i+2, self.height)): 211 | for y in range(max(0, j-1), min(j+2, self.width)): 212 | if (x, y) != (i, j): 213 | neighbors.add((x, y)) 214 | 215 | # add a new sentence to the AI's knowledge base 216 | # based on the value of `cell` and `count` 217 | self.knowledge.append(Sentence(neighbors, count)) 218 | 219 | # mark any additional cells as safe or as mines 220 | # if it can be concluded based on the AI's knowledge base 221 | self.update_safes_and_mines() 222 | 223 | # add any new sentences to the AI's knowledge base 224 | # if they can be inferred from existing knowledge 225 | new_inferences = self.get_new_inferences() 226 | while new_inferences: 227 | for sentence in new_inferences: 228 | self.knowledge.append(sentence) 229 | 230 | self.update_safes_and_mines() 231 | new_inferences = self.get_new_inferences() 232 | 233 | def update_safes_and_mines(self): 234 | # repeat update if an update was made in the previous cycle 235 | counter = 1 236 | while counter: 237 | counter = 0 238 | for sentence in self.knowledge: 239 | for cell in sentence.known_safes(): 240 | self.mark_safe(cell) 241 | counter += 1 242 | for cell in sentence.known_mines(): 243 | self.mark_mine(cell) 244 | counter += 1 245 | for cell in self.safes: 246 | counter += self.mark_safe(cell) 247 | for cell in self.mines: 248 | counter += self.mark_mine(cell) 249 | 250 | def get_new_inferences(self): 251 | new_inferences = [] 252 | sentences_to_remove = [] 253 | 254 | # for each sentence known 255 | for set_1 in self.knowledge: 256 | # all cells removed from the sentence 257 | if not set_1.cells: 258 | sentences_to_remove.append(set_1) 259 | continue 260 | 261 | for set_2 in self.knowledge: 262 | # all cells removed from the sentence 263 | if not set_2.cells: 264 | sentences_to_remove.append(set_2) 265 | continue 266 | 267 | if set_1 != set_2: 268 | # check if subset, if yes, set2 - set1 = count2 - count1 269 | if set_2.cells.issubset(set_1.cells): 270 | diff_cells = set_1.cells.difference(set_2.cells) 271 | diff_count = set_1.count - set_2.count 272 | # an inference can be drawn 273 | new_inference_to_add = Sentence(diff_cells, diff_count) 274 | if new_inference_to_add not in self.knowledge: 275 | new_inferences.append(new_inference_to_add) 276 | 277 | self.knowledge = [x for x in self.knowledge if x not in sentences_to_remove] 278 | return new_inferences 279 | 280 | def make_safe_move(self): 281 | """ 282 | Returns a safe cell to choose on the Minesweeper board. 283 | The move must be known to be safe, and not already a move 284 | that has been made. 285 | 286 | This function may use the knowledge in self.mines, self.safes 287 | and self.moves_made, but should not modify any of those values. 288 | """ 289 | for move in self.safes: 290 | if move not in self.moves_made and move not in self.mines: 291 | return move 292 | return None 293 | 294 | def make_random_move(self): 295 | """ 296 | Returns a move to make on the Minesweeper board. 297 | Should choose randomly among cells that: 298 | 1) have not already been chosen, and 299 | 2) are not known to be mines 300 | """ 301 | for i in range(self.height): 302 | for j in range(self.width): 303 | move = (i, j) 304 | if move not in self.moves_made and move not in self.mines: 305 | return move 306 | return None -------------------------------------------------------------------------------- /1-knowledge/minesweeper/requirements.txt: -------------------------------------------------------------------------------- 1 | pygame 2 | -------------------------------------------------------------------------------- /1-knowledge/minesweeper/runner.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import sys 3 | import time 4 | 5 | from minesweeper import Minesweeper, MinesweeperAI 6 | 7 | HEIGHT = 8 8 | WIDTH = 8 9 | MINES = 8 10 | 11 | # Colors 12 | BLACK = (0, 0, 0) 13 | GRAY = (180, 180, 180) 14 | WHITE = (255, 255, 255) 15 | 16 | # Create game 17 | pygame.init() 18 | size = width, height = 600, 400 19 | screen = pygame.display.set_mode(size) 20 | 21 | # Fonts 22 | OPEN_SANS = "assets/fonts/OpenSans-Regular.ttf" 23 | smallFont = pygame.font.Font(OPEN_SANS, 20) 24 | mediumFont = pygame.font.Font(OPEN_SANS, 28) 25 | largeFont = pygame.font.Font(OPEN_SANS, 40) 26 | 27 | # Compute board size 28 | BOARD_PADDING = 20 29 | board_width = ((2 / 3) * width) - (BOARD_PADDING * 2) 30 | board_height = height - (BOARD_PADDING * 2) 31 | cell_size = int(min(board_width / WIDTH, board_height / HEIGHT)) 32 | board_origin = (BOARD_PADDING, BOARD_PADDING) 33 | 34 | # Add images 35 | flag = pygame.image.load("assets/images/flag.png") 36 | flag = pygame.transform.scale(flag, (cell_size, cell_size)) 37 | mine = pygame.image.load("assets/images/mine.png") 38 | mine = pygame.transform.scale(mine, (cell_size, cell_size)) 39 | 40 | # Create game and AI agent 41 | game = Minesweeper(height=HEIGHT, width=WIDTH, mines=MINES) 42 | ai = MinesweeperAI(height=HEIGHT, width=WIDTH) 43 | 44 | # Keep track of revealed cells, flagged cells, and if a mine was hit 45 | revealed = set() 46 | flags = set() 47 | lost = False 48 | 49 | # Show instructions initially 50 | instructions = True 51 | 52 | while True: 53 | 54 | # Check if game quit 55 | for event in pygame.event.get(): 56 | if event.type == pygame.QUIT: 57 | sys.exit() 58 | 59 | screen.fill(BLACK) 60 | 61 | # Show game instructions 62 | if instructions: 63 | 64 | # Title 65 | title = largeFont.render("Play Minesweeper", True, WHITE) 66 | titleRect = title.get_rect() 67 | titleRect.center = ((width / 2), 50) 68 | screen.blit(title, titleRect) 69 | 70 | # Rules 71 | rules = [ 72 | "Click a cell to reveal it.", 73 | "Right-click a cell to mark it as a mine.", 74 | "Mark all mines successfully to win!" 75 | ] 76 | for i, rule in enumerate(rules): 77 | line = smallFont.render(rule, True, WHITE) 78 | lineRect = line.get_rect() 79 | lineRect.center = ((width / 2), 150 + 30 * i) 80 | screen.blit(line, lineRect) 81 | 82 | # Play game button 83 | buttonRect = pygame.Rect((width / 4), (3 / 4) * height, width / 2, 50) 84 | buttonText = mediumFont.render("Play Game", True, BLACK) 85 | buttonTextRect = buttonText.get_rect() 86 | buttonTextRect.center = buttonRect.center 87 | pygame.draw.rect(screen, WHITE, buttonRect) 88 | screen.blit(buttonText, buttonTextRect) 89 | 90 | # Check if play button clicked 91 | click, _, _ = pygame.mouse.get_pressed() 92 | if click == 1: 93 | mouse = pygame.mouse.get_pos() 94 | if buttonRect.collidepoint(mouse): 95 | instructions = False 96 | time.sleep(0.3) 97 | 98 | pygame.display.flip() 99 | continue 100 | 101 | # Draw board 102 | cells = [] 103 | for i in range(HEIGHT): 104 | row = [] 105 | for j in range(WIDTH): 106 | 107 | # Draw rectangle for cell 108 | rect = pygame.Rect( 109 | board_origin[0] + j * cell_size, 110 | board_origin[1] + i * cell_size, 111 | cell_size, cell_size 112 | ) 113 | pygame.draw.rect(screen, GRAY, rect) 114 | pygame.draw.rect(screen, WHITE, rect, 3) 115 | 116 | # Add a mine, flag, or number if needed 117 | if game.is_mine((i, j)) and lost: 118 | screen.blit(mine, rect) 119 | elif (i, j) in flags: 120 | screen.blit(flag, rect) 121 | elif (i, j) in revealed: 122 | neighbors = smallFont.render( 123 | str(game.nearby_mines((i, j))), 124 | True, BLACK 125 | ) 126 | neighborsTextRect = neighbors.get_rect() 127 | neighborsTextRect.center = rect.center 128 | screen.blit(neighbors, neighborsTextRect) 129 | 130 | row.append(rect) 131 | cells.append(row) 132 | 133 | # AI Move button 134 | aiButton = pygame.Rect( 135 | (2 / 3) * width + BOARD_PADDING, (1 / 3) * height - 50, 136 | (width / 3) - BOARD_PADDING * 2, 50 137 | ) 138 | buttonText = mediumFont.render("AI Move", True, BLACK) 139 | buttonRect = buttonText.get_rect() 140 | buttonRect.center = aiButton.center 141 | pygame.draw.rect(screen, WHITE, aiButton) 142 | screen.blit(buttonText, buttonRect) 143 | 144 | # Reset button 145 | resetButton = pygame.Rect( 146 | (2 / 3) * width + BOARD_PADDING, (1 / 3) * height + 20, 147 | (width / 3) - BOARD_PADDING * 2, 50 148 | ) 149 | buttonText = mediumFont.render("Reset", True, BLACK) 150 | buttonRect = buttonText.get_rect() 151 | buttonRect.center = resetButton.center 152 | pygame.draw.rect(screen, WHITE, resetButton) 153 | screen.blit(buttonText, buttonRect) 154 | 155 | # Display text 156 | text = "Lost" if lost else "Won" if game.mines == flags else "" 157 | text = mediumFont.render(text, True, WHITE) 158 | textRect = text.get_rect() 159 | textRect.center = ((5 / 6) * width, (2 / 3) * height) 160 | screen.blit(text, textRect) 161 | 162 | move = None 163 | 164 | left, _, right = pygame.mouse.get_pressed() 165 | 166 | # Check for a right-click to toggle flagging 167 | if right == 1 and not lost: 168 | mouse = pygame.mouse.get_pos() 169 | for i in range(HEIGHT): 170 | for j in range(WIDTH): 171 | if cells[i][j].collidepoint(mouse) and (i, j) not in revealed: 172 | if (i, j) in flags: 173 | flags.remove((i, j)) 174 | else: 175 | flags.add((i, j)) 176 | time.sleep(0.2) 177 | 178 | elif left == 1: 179 | mouse = pygame.mouse.get_pos() 180 | 181 | # If AI button clicked, make an AI move 182 | if aiButton.collidepoint(mouse) and not lost: 183 | move = ai.make_safe_move() 184 | if move is None: 185 | move = ai.make_random_move() 186 | if move is None: 187 | flags = ai.mines.copy() 188 | print("No moves left to make.") 189 | else: 190 | print("No known safe moves, AI making random move.") 191 | else: 192 | print("AI making safe move.") 193 | time.sleep(0.2) 194 | 195 | # Reset game state 196 | elif resetButton.collidepoint(mouse): 197 | game = Minesweeper(height=HEIGHT, width=WIDTH, mines=MINES) 198 | ai = MinesweeperAI(height=HEIGHT, width=WIDTH) 199 | revealed = set() 200 | flags = set() 201 | lost = False 202 | continue 203 | 204 | # User-made move 205 | elif not lost: 206 | for i in range(HEIGHT): 207 | for j in range(WIDTH): 208 | if (cells[i][j].collidepoint(mouse) 209 | and (i, j) not in flags 210 | and (i, j) not in revealed): 211 | move = (i, j) 212 | 213 | # Make move and update AI knowledge 214 | if move: 215 | if game.is_mine(move): 216 | lost = True 217 | else: 218 | nearby = game.nearby_mines(move) 219 | revealed.add(move) 220 | ai.add_knowledge(move, nearby) 221 | 222 | pygame.display.flip() 223 | -------------------------------------------------------------------------------- /2-uncertainity/heredity/data/family0.csv: -------------------------------------------------------------------------------- 1 | name,mother,father,trait 2 | Harry,Lily,James, 3 | James,,,1 4 | Lily,,,0 5 | -------------------------------------------------------------------------------- /2-uncertainity/heredity/data/family1.csv: -------------------------------------------------------------------------------- 1 | name,mother,father,trait 2 | Arthur,,,0 3 | Charlie,Molly,Arthur,0 4 | Fred,Molly,Arthur,1 5 | Ginny,Molly,Arthur, 6 | Molly,,,0 7 | Ron,Molly,Arthur, 8 | -------------------------------------------------------------------------------- /2-uncertainity/heredity/data/family2.csv: -------------------------------------------------------------------------------- 1 | name,mother,father,trait 2 | Arthur,,,0 3 | Hermione,,,0 4 | Molly,,, 5 | Ron,Molly,Arthur,0 6 | Rose,Ron,Hermione,1 7 | -------------------------------------------------------------------------------- /2-uncertainity/heredity/heredity.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import itertools 3 | import sys 4 | 5 | PROBS = { 6 | 7 | # Unconditional probabilities for having gene 8 | "gene": { 9 | 2: 0.01, 10 | 1: 0.03, 11 | 0: 0.96 12 | }, 13 | 14 | "trait": { 15 | 16 | # Probability of trait given two copies of gene 17 | 2: { 18 | True: 0.65, 19 | False: 0.35 20 | }, 21 | 22 | # Probability of trait given one copy of gene 23 | 1: { 24 | True: 0.56, 25 | False: 0.44 26 | }, 27 | 28 | # Probability of trait given no gene 29 | 0: { 30 | True: 0.01, 31 | False: 0.99 32 | } 33 | }, 34 | 35 | # Mutation probability 36 | "mutation": 0.01 37 | } 38 | 39 | 40 | def main(): 41 | 42 | # Check for proper usage 43 | if len(sys.argv) != 2: 44 | sys.exit("Usage: python heredity.py data.csv") 45 | people = load_data(sys.argv[1]) 46 | 47 | # Keep track of gene and trait probabilities for each person 48 | probabilities = { 49 | person: { 50 | "gene": { 51 | 2: 0, 52 | 1: 0, 53 | 0: 0 54 | }, 55 | "trait": { 56 | True: 0, 57 | False: 0 58 | } 59 | } 60 | for person in people 61 | } 62 | 63 | # Loop over all sets of people who might have the trait 64 | names = set(people) 65 | for have_trait in powerset(names): 66 | 67 | # Check if current set of people violates known information 68 | fails_evidence = any( 69 | (people[person]["trait"] is not None and 70 | people[person]["trait"] != (person in have_trait)) 71 | for person in names 72 | ) 73 | if fails_evidence: 74 | continue 75 | 76 | # Loop over all sets of people who might have the gene 77 | for one_gene in powerset(names): 78 | for two_genes in powerset(names - one_gene): 79 | 80 | # Update probabilities with new joint probability 81 | p = joint_probability(people, one_gene, two_genes, have_trait) 82 | update(probabilities, one_gene, two_genes, have_trait, p) 83 | 84 | # Ensure probabilities sum to 1 85 | normalize(probabilities) 86 | 87 | # Print results 88 | for person in people: 89 | print(f"{person}:") 90 | for field in probabilities[person]: 91 | print(f" {field.capitalize()}:") 92 | for value in probabilities[person][field]: 93 | p = probabilities[person][field][value] 94 | print(f" {value}: {p:.4f}") 95 | 96 | 97 | def load_data(filename): 98 | """ 99 | Load gene and trait data from a file into a dictionary. 100 | File assumed to be a CSV containing fields name, mother, father, trait. 101 | mother, father must both be blank, or both be valid names in the CSV. 102 | trait should be 0 or 1 if trait is known, blank otherwise. 103 | """ 104 | data = dict() 105 | with open(filename) as f: 106 | reader = csv.DictReader(f) 107 | for row in reader: 108 | name = row["name"] 109 | data[name] = { 110 | "name": name, 111 | "mother": row["mother"] or None, 112 | "father": row["father"] or None, 113 | "trait": (True if row["trait"] == "1" else 114 | False if row["trait"] == "0" else None) 115 | } 116 | return data 117 | 118 | 119 | def powerset(s): 120 | """ 121 | Return a list of all possible subsets of set s. 122 | """ 123 | s = list(s) 124 | return [ 125 | set(s) for s in itertools.chain.from_iterable( 126 | itertools.combinations(s, r) for r in range(len(s) + 1) 127 | ) 128 | ] 129 | 130 | def joint_probability(people, one_gene, two_genes, have_trait): 131 | """ 132 | Compute and return a joint probability. 133 | 134 | The probability returned should be the probability that 135 | * everyone in set `one_gene` has one copy of the gene, and 136 | * everyone in set `two_genes` has two copies of the gene, and 137 | * everyone not in `one_gene` or `two_gene` does not have the gene, and 138 | * everyone in set `have_trait` has the trait, and 139 | * everyone not in set` have_trait` does not have the trait. 140 | """ 141 | probability = 1 142 | zero_gene = people.keys() - (one_gene | two_genes) 143 | 144 | for i in zero_gene: 145 | if people[i]["mother"] == None: 146 | prob = PROBS["gene"][0] 147 | elif people[i]["mother"] != None: 148 | prob = 1 149 | mother = people[i]["mother"] 150 | father = people[i]["father"] 151 | if mother in zero_gene and father in zero_gene: 152 | prob *= (1 - PROBS["mutation"]) * (1 - PROBS["mutation"]) 153 | if mother in zero_gene and father in one_gene: 154 | prob *= (1 - PROBS["mutation"]) * 0.5 155 | if mother in zero_gene and father in two_genes: 156 | prob *= (1 - PROBS["mutation"]) * PROBS["mutation"] 157 | 158 | if mother in one_gene and father in zero_gene: 159 | prob *= 0.5 * (1 - PROBS["mutation"]) 160 | if mother in one_gene and father in one_gene: 161 | prob *= 0.5 * 0.5 162 | if mother in one_gene and father in two_genes: 163 | prob *= 0.5 * PROBS["mutation"] 164 | 165 | if mother in two_genes and father in zero_gene: 166 | prob *= PROBS["mutation"] * (1 - PROBS["mutation"]) 167 | if mother in two_genes and father in one_gene: 168 | prob *= PROBS["mutation"] * 0.5 169 | if mother in two_genes and father in two_genes: 170 | prob *= PROBS["mutation"] * PROBS["mutation"] 171 | 172 | prob *= PROBS["trait"][0][i in have_trait] 173 | probability *= prob 174 | 175 | for i in one_gene: 176 | if people[i]["mother"] == None: 177 | prob = PROBS["gene"][1] 178 | elif people[i]["mother"] != None: 179 | prob = 1 180 | mother = people[i]["mother"] 181 | father = people[i]["father"] 182 | 183 | if mother in zero_gene and father in zero_gene: 184 | prob *= PROBS["mutation"] * (1 - PROBS["mutation"]) + (1 - PROBS["mutation"]) * PROBS["mutation"] 185 | if mother in zero_gene and father in one_gene: 186 | prob *= PROBS["mutation"] * 0.5 + (1 - PROBS["mutation"]) * 0.5 187 | if mother in zero_gene and father in two_genes: 188 | prob *= PROBS["mutation"] * PROBS["mutation"] + (1 - PROBS["mutation"]) * (1 - PROBS["mutation"]) 189 | 190 | if mother in one_gene and father in zero_gene: 191 | prob *= 0.5 * (1 - PROBS["mutation"]) + 0.5 * PROBS["mutation"] 192 | if mother in one_gene and father in one_gene: 193 | prob *= 0.5 * 0.5 + 0.5 * 0.5 194 | if mother in one_gene and father in two_genes: 195 | prob *= 0.5 * PROBS["mutation"] + 0.5 * (1 - PROBS["mutation"]) 196 | 197 | if mother in two_genes and father in zero_gene: 198 | prob *= (1 - PROBS["mutation"]) * (1 - PROBS["mutation"]) + PROBS["mutation"] * PROBS["mutation"] 199 | if mother in two_genes and father in one_gene: 200 | prob *= (1 - PROBS["mutation"]) * 0.5 + PROBS["mutation"] * 0.5 201 | if mother in two_genes and father in two_genes: 202 | prob *= (1 - PROBS["mutation"]) * PROBS["mutation"] + PROBS["mutation"] * (1 - PROBS["mutation"]) 203 | 204 | prob *= PROBS["trait"][1][i in have_trait] 205 | probability *= prob 206 | 207 | for i in two_genes: 208 | if people[i]["mother"] == None: 209 | prob = PROBS["gene"][2] 210 | elif people[i]["mother"] != None: 211 | prob = 1 212 | mother = people[i]["mother"] 213 | father = people[i]["father"] 214 | if mother in zero_gene and father in zero_gene: 215 | prob *= PROBS["mutation"] * PROBS["mutation"] 216 | if mother in zero_gene and father in one_gene: 217 | prob *= PROBS["mutation"] * 0.5 218 | if mother in zero_gene and father in two_genes: 219 | prob *= PROBS["mutation"] * (1 - PROBS["mutation"]) 220 | 221 | if mother in one_gene and father in zero_gene: 222 | prob *= 0.5 * PROBS["mutation"] 223 | if mother in one_gene and father in one_gene: 224 | prob *= 0.5 * 0.5 225 | if mother in one_gene and father in two_genes: 226 | prob *= 0.5 * (1 - PROBS["mutation"]) 227 | 228 | if mother in two_genes and father in zero_gene: 229 | prob *= (1 - PROBS["mutation"]) * PROBS["mutation"] 230 | if mother in two_genes and father in one_gene: 231 | prob *= (1 - PROBS["mutation"]) * 0.5 232 | if mother in two_genes and father in two_genes: 233 | prob *= (1 - PROBS["mutation"]) * (1 - PROBS["mutation"]) 234 | 235 | prob *= PROBS["trait"][2][i in have_trait] 236 | 237 | probability *= prob 238 | 239 | return probability 240 | 241 | 242 | def update(probabilities, one_gene, two_genes, have_trait, p): 243 | """ 244 | Add to `probabilities` a new joint probability `p`. 245 | Each person should have their "gene" and "trait" distributions updated. 246 | Which value for each distribution is updated depends on whether 247 | the person is in `have_gene` and `have_trait`, respectively. 248 | """ 249 | for person in probabilities: 250 | if person in two_genes: 251 | probabilities[person]["gene"][2] += p 252 | elif person in one_gene: 253 | probabilities[person]["gene"][1] += p 254 | else: 255 | probabilities[person]["gene"][0] += p 256 | 257 | probabilities[person]["trait"][person in have_trait] += p 258 | 259 | 260 | def normalize(probabilities): 261 | """ 262 | Update `probabilities` such that each probability distribution 263 | is normalized (i.e., sums to 1, with relative proportions the same). 264 | """ 265 | for person in probabilities: 266 | total_genes = sum(probabilities[person]["gene"].values()) 267 | for i in probabilities[person]["gene"]: 268 | probabilities[person]["gene"][i] /= total_genes 269 | 270 | total_traits = sum(probabilities[person]["trait"].values()) 271 | for i in probabilities[person]["trait"]: 272 | probabilities[person]["trait"][i] /= total_traits 273 | 274 | 275 | if __name__ == "__main__": 276 | main() 277 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus0/1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 1 5 | 6 | 7 |

1

8 | 9 |
Links:
10 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus0/2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 2 5 | 6 | 7 |

2

8 | 9 |
Links:
10 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus0/3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 3 5 | 6 | 7 |

3

8 | 9 |
Links:
10 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus0/4.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 4 5 | 6 | 7 |

4

8 | 9 |
Links:
10 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus1/bfs.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | BFS 5 | 6 | 7 |

BFS

8 | 9 |
Links:
10 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus1/dfs.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | DFS 5 | 6 | 7 |

DFS

8 | 9 |
Links:
10 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus1/games.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Games 5 | 6 | 7 |

Games

8 | 9 |
Links:
10 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus1/minesweeper.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Minesweeper 5 | 6 | 7 |

Minesweeper

8 | 9 |
Links:
10 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus1/minimax.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Minimax 5 | 6 | 7 |

Minimax

8 | 9 |
Links:
10 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus1/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Search 5 | 6 | 7 |

Search

8 | 9 |
Links:
10 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus1/tictactoe.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | TicTacToe 5 | 6 | 7 |

TicTacToe

8 | 9 |
Links:
10 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus2/ai.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | AI 5 | 6 | 7 |

AI

8 | 9 |
Links:
10 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus2/algorithms.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Algorithms 5 | 6 | 7 |

Algorithms

8 | 9 |
Links:
10 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus2/c.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | C 5 | 6 | 7 |

C

8 | 9 |
Links:
10 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus2/inference.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Inference 5 | 6 | 7 |

Inference

8 | 9 |
Links:
10 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus2/logic.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Logic 5 | 6 | 7 |

Logic

8 | 9 |
Links:
10 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus2/programming.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Programming 5 | 6 | 7 |

Programming

8 | 9 |
Links:
10 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus2/python.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Python 5 | 6 | 7 |

Python

8 | 9 |
Links:
10 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/corpus2/recursion.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Recursion 5 | 6 | 7 |

Recursion

8 | 9 |
Links:
10 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /2-uncertainity/pagerank/pagerank.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import random 4 | import re 5 | import sys 6 | 7 | DAMPING = 0.85 8 | SAMPLES = 10000 9 | 10 | 11 | def main(): 12 | if len(sys.argv) != 2: 13 | sys.exit("Usage: python pagerank.py corpus") 14 | corpus = crawl(sys.argv[1]) 15 | ranks = sample_pagerank(corpus, DAMPING, SAMPLES) 16 | print(f"PageRank Results from Sampling (n = {SAMPLES})") 17 | for page in sorted(ranks): 18 | print(f" {page}: {ranks[page]:.4f}") 19 | ranks = iterate_pagerank(corpus, DAMPING) 20 | print(f"PageRank Results from Iteration") 21 | for page in sorted(ranks): 22 | print(f" {page}: {ranks[page]:.4f}") 23 | 24 | 25 | def crawl(directory): 26 | """ 27 | Parse a directory of HTML pages and check for links to other pages. 28 | Return a dictionary where each key is a page, and values are 29 | a list of all other pages in the corpus that are linked to by the page. 30 | """ 31 | pages = dict() 32 | 33 | # Extract all links from HTML files 34 | for filename in os.listdir(directory): 35 | if not filename.endswith(".html"): 36 | continue 37 | with open(os.path.join(directory, filename)) as f: 38 | contents = f.read() 39 | links = re.findall(r"]*?)href=\"([^\"]*)\"", contents) 40 | pages[filename] = set(links) - {filename} 41 | 42 | # Only include links to other pages in the corpus 43 | for filename in pages: 44 | pages[filename] = set( 45 | link for link in pages[filename] 46 | if link in pages 47 | ) 48 | 49 | return pages 50 | 51 | 52 | def transition_model(corpus, page, damping_factor): 53 | """ 54 | Return a probability distribution over which page to visit next, 55 | given a current page. 56 | 57 | With probability `damping_factor`, choose a link at random 58 | linked to by `page`. With probability `1 - damping_factor`, choose 59 | a link at random chosen from all pages in the corpus. 60 | """ 61 | pd = {} 62 | page_links = corpus[page] 63 | total_pages = len(corpus) 64 | total_linked_pages = len(page_links) 65 | if page_links: 66 | for key in corpus: 67 | pd[key] = (1 - damping_factor) / total_pages 68 | 69 | for key in page_links: 70 | pd[key] += damping_factor / total_linked_pages 71 | else: 72 | for key in corpus: 73 | pd[key] = 1.0 / total_pages 74 | return pd 75 | 76 | 77 | def sample_pagerank(corpus, damping_factor, n): 78 | """ 79 | Return PageRank values for each page by sampling `n` pages 80 | according to transition model, starting with a page at random. 81 | 82 | Return a dictionary where keys are page names, and values are 83 | their estimated PageRank value (a value between 0 and 1). All 84 | PageRank values should sum to 1. 85 | """ 86 | distribution = {}.fromkeys(corpus.keys(), 0) 87 | page = random.choices(list(corpus.keys()))[0] 88 | 89 | for i in range(1, n): 90 | curr_distribution = transition_model(corpus, page, damping_factor) 91 | for _page in distribution: 92 | distribution[_page] = (((i-1) * distribution[_page]) + curr_distribution[_page]) / i 93 | page = random.choices(list(distribution.keys()), weights=list(distribution.values()), k=1)[0] 94 | 95 | return distribution 96 | 97 | 98 | def iterate_pagerank(corpus, damping_factor): 99 | """ 100 | Return PageRank values for each page by iteratively updating 101 | PageRank values until convergence. 102 | 103 | Return a dictionary where keys are page names, and values are 104 | their estimated PageRank value (a value between 0 and 1). All 105 | PageRank values should sum to 1. 106 | """ 107 | total_pages = len(corpus) 108 | distribution = {}.fromkeys(corpus.keys(), 1.0 / total_pages) 109 | change = True 110 | 111 | while change: 112 | change = False 113 | old_distribution = copy.deepcopy(distribution) 114 | for page in corpus: 115 | distribution[page] = ((1 - damping_factor)/total_pages) + (damping_factor * get_sum(corpus, distribution, page)) 116 | change = change or abs(old_distribution[page] - distribution[page]) > 0.001 117 | 118 | return distribution 119 | 120 | 121 | def get_sum(corpus, distribution, page): 122 | result = 0 123 | for p in corpus: 124 | if page in corpus[p]: 125 | result += distribution[p] / len(corpus[p]) 126 | return result 127 | 128 | 129 | if __name__ == "__main__": 130 | main() 131 | -------------------------------------------------------------------------------- /3-optimization/crossword/assets/fonts/OpenSans-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amangarg078/cs50ai/3cdecbfaeda1440b829b7c0913bcf2a991108936/3-optimization/crossword/assets/fonts/OpenSans-Regular.ttf -------------------------------------------------------------------------------- /3-optimization/crossword/crossword.py: -------------------------------------------------------------------------------- 1 | class Variable(): 2 | 3 | ACROSS = "across" 4 | DOWN = "down" 5 | 6 | def __init__(self, i, j, direction, length): 7 | """Create a new variable with starting point, direction, and length.""" 8 | self.i = i 9 | self.j = j 10 | self.direction = direction 11 | self.length = length 12 | self.cells = [] 13 | for k in range(self.length): 14 | self.cells.append( 15 | (self.i + (k if self.direction == Variable.DOWN else 0), 16 | self.j + (k if self.direction == Variable.ACROSS else 0)) 17 | ) 18 | 19 | def __hash__(self): 20 | return hash((self.i, self.j, self.direction, self.length)) 21 | 22 | def __eq__(self, other): 23 | return ( 24 | (self.i == other.i) and 25 | (self.j == other.j) and 26 | (self.direction == other.direction) and 27 | (self.length == other.length) 28 | ) 29 | 30 | def __str__(self): 31 | return f"({self.i}, {self.j}) {self.direction} : {self.length}" 32 | 33 | def __repr__(self): 34 | direction = repr(self.direction) 35 | return f"Variable({self.i}, {self.j}, {direction}, {self.length})" 36 | 37 | 38 | class Crossword(): 39 | 40 | def __init__(self, structure_file, words_file): 41 | 42 | # Determine structure of crossword 43 | with open(structure_file) as f: 44 | contents = f.read().splitlines() 45 | self.height = len(contents) 46 | self.width = max(len(line) for line in contents) 47 | 48 | self.structure = [] 49 | for i in range(self.height): 50 | row = [] 51 | for j in range(self.width): 52 | if j >= len(contents[i]): 53 | row.append(False) 54 | elif contents[i][j] == "_": 55 | row.append(True) 56 | else: 57 | row.append(False) 58 | self.structure.append(row) 59 | 60 | # Save vocabulary list 61 | with open(words_file) as f: 62 | self.words = set(f.read().upper().splitlines()) 63 | 64 | # Determine variable set 65 | self.variables = set() 66 | for i in range(self.height): 67 | for j in range(self.width): 68 | 69 | # Vertical words 70 | starts_word = ( 71 | self.structure[i][j] 72 | and (i == 0 or not self.structure[i - 1][j]) 73 | ) 74 | if starts_word: 75 | length = 1 76 | for k in range(i + 1, self.height): 77 | if self.structure[k][j]: 78 | length += 1 79 | else: 80 | break 81 | if length > 1: 82 | self.variables.add(Variable( 83 | i=i, j=j, 84 | direction=Variable.DOWN, 85 | length=length 86 | )) 87 | 88 | # Horizontal words 89 | starts_word = ( 90 | self.structure[i][j] 91 | and (j == 0 or not self.structure[i][j - 1]) 92 | ) 93 | if starts_word: 94 | length = 1 95 | for k in range(j + 1, self.width): 96 | if self.structure[i][k]: 97 | length += 1 98 | else: 99 | break 100 | if length > 1: 101 | self.variables.add(Variable( 102 | i=i, j=j, 103 | direction=Variable.ACROSS, 104 | length=length 105 | )) 106 | 107 | # Compute overlaps for each word 108 | # For any pair of variables v1, v2, their overlap is either: 109 | # None, if the two variables do not overlap; or 110 | # (i, j), where v1's ith character overlaps v2's jth character 111 | self.overlaps = dict() 112 | for v1 in self.variables: 113 | for v2 in self.variables: 114 | if v1 == v2: 115 | continue 116 | cells1 = v1.cells 117 | cells2 = v2.cells 118 | intersection = set(cells1).intersection(cells2) 119 | if not intersection: 120 | self.overlaps[v1, v2] = None 121 | else: 122 | intersection = intersection.pop() 123 | self.overlaps[v1, v2] = ( 124 | cells1.index(intersection), 125 | cells2.index(intersection) 126 | ) 127 | 128 | def neighbors(self, var): 129 | """Given a variable, return set of overlapping variables.""" 130 | return set( 131 | v for v in self.variables 132 | if v != var and self.overlaps[v, var] 133 | ) 134 | -------------------------------------------------------------------------------- /3-optimization/crossword/data/structure0.txt: -------------------------------------------------------------------------------- 1 | #___# 2 | #_##_ 3 | #_##_ 4 | #_##_ 5 | #____ 6 | -------------------------------------------------------------------------------- /3-optimization/crossword/data/structure1.txt: -------------------------------------------------------------------------------- 1 | ############## 2 | #######_####_# 3 | #____________# 4 | #_#####_####_# 5 | #_##_____###_# 6 | #_#####_####_# 7 | #_###______#_# 8 | #######_####_# 9 | ############## 10 | -------------------------------------------------------------------------------- /3-optimization/crossword/data/structure2.txt: -------------------------------------------------------------------------------- 1 | ######_ 2 | ____##_ 3 | _##____ 4 | _##_##_ 5 | _##_##_ 6 | #___##_ 7 | -------------------------------------------------------------------------------- /3-optimization/crossword/data/words0.txt: -------------------------------------------------------------------------------- 1 | one 2 | two 3 | three 4 | four 5 | five 6 | six 7 | seven 8 | eight 9 | nine 10 | ten 11 | -------------------------------------------------------------------------------- /3-optimization/crossword/data/words1.txt: -------------------------------------------------------------------------------- 1 | adversarial 2 | alpha 3 | arc 4 | artificial 5 | bayes 6 | beta 7 | bit 8 | breadth 9 | byte 10 | classification 11 | classify 12 | condition 13 | constraint 14 | create 15 | depth 16 | distribution 17 | end 18 | false 19 | graph 20 | heuristic 21 | infer 22 | inference 23 | initial 24 | intelligence 25 | knowledge 26 | language 27 | learning 28 | line 29 | logic 30 | loss 31 | markov 32 | minimax 33 | network 34 | neural 35 | node 36 | optimization 37 | probability 38 | proposition 39 | prune 40 | reason 41 | recurrent 42 | regression 43 | resolution 44 | resolve 45 | satisfaction 46 | search 47 | sine 48 | start 49 | true 50 | truth 51 | uncertainty 52 | -------------------------------------------------------------------------------- /3-optimization/crossword/generate.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from crossword import * 4 | 5 | 6 | class CrosswordCreator(): 7 | 8 | def __init__(self, crossword): 9 | """ 10 | Create new CSP crossword generate. 11 | """ 12 | self.crossword = crossword 13 | self.domains = { 14 | var: self.crossword.words.copy() 15 | for var in self.crossword.variables 16 | } 17 | 18 | def letter_grid(self, assignment): 19 | """ 20 | Return 2D array representing a given assignment. 21 | """ 22 | letters = [ 23 | [None for _ in range(self.crossword.width)] 24 | for _ in range(self.crossword.height) 25 | ] 26 | for variable, word in assignment.items(): 27 | direction = variable.direction 28 | for k in range(len(word)): 29 | i = variable.i + (k if direction == Variable.DOWN else 0) 30 | j = variable.j + (k if direction == Variable.ACROSS else 0) 31 | letters[i][j] = word[k] 32 | return letters 33 | 34 | def print(self, assignment): 35 | """ 36 | Print crossword assignment to the terminal. 37 | """ 38 | letters = self.letter_grid(assignment) 39 | for i in range(self.crossword.height): 40 | for j in range(self.crossword.width): 41 | if self.crossword.structure[i][j]: 42 | print(letters[i][j] or " ", end="") 43 | else: 44 | print("█", end="") 45 | print() 46 | 47 | def save(self, assignment, filename): 48 | """ 49 | Save crossword assignment to an image file. 50 | """ 51 | from PIL import Image, ImageDraw, ImageFont 52 | cell_size = 100 53 | cell_border = 2 54 | interior_size = cell_size - 2 * cell_border 55 | letters = self.letter_grid(assignment) 56 | 57 | # Create a blank canvas 58 | img = Image.new( 59 | "RGBA", 60 | (self.crossword.width * cell_size, 61 | self.crossword.height * cell_size), 62 | "black" 63 | ) 64 | font = ImageFont.truetype("assets/fonts/OpenSans-Regular.ttf", 80) 65 | draw = ImageDraw.Draw(img) 66 | 67 | for i in range(self.crossword.height): 68 | for j in range(self.crossword.width): 69 | 70 | rect = [ 71 | (j * cell_size + cell_border, 72 | i * cell_size + cell_border), 73 | ((j + 1) * cell_size - cell_border, 74 | (i + 1) * cell_size - cell_border) 75 | ] 76 | if self.crossword.structure[i][j]: 77 | draw.rectangle(rect, fill="white") 78 | if letters[i][j]: 79 | w, h = draw.textsize(letters[i][j], font=font) 80 | draw.text( 81 | (rect[0][0] + ((interior_size - w) / 2), 82 | rect[0][1] + ((interior_size - h) / 2) - 10), 83 | letters[i][j], fill="black", font=font 84 | ) 85 | 86 | img.save(filename) 87 | 88 | def solve(self): 89 | """ 90 | Enforce node and arc consistency, and then solve the CSP. 91 | """ 92 | self.enforce_node_consistency() 93 | self.ac3() 94 | return self.backtrack(dict()) 95 | 96 | def enforce_node_consistency(self): 97 | """ 98 | Update `self.domains` such that each variable is node-consistent. 99 | (Remove any values that are inconsistent with a variable's unary 100 | constraints; in this case, the length of the word.) 101 | """ 102 | for variable in self.crossword.variables: 103 | for word in self.crossword.words: 104 | if len(word) != variable.length: 105 | self.domains[variable].remove(word) 106 | 107 | def revise(self, x, y): 108 | """ 109 | Make variable `x` arc consistent with variable `y`. 110 | To do so, remove values from `self.domains[x]` for which there is no 111 | possible corresponding value for `y` in `self.domains[y]`. 112 | 113 | Return True if a revision was made to the domain of `x`; return 114 | False if no revision was made. 115 | """ 116 | revised = False 117 | overlap = self.crossword.overlaps[x, y] 118 | if overlap: 119 | a, b = overlap 120 | domains_to_remove = set() 121 | for x_domain in self.domains[x]: 122 | overlap_possible = False 123 | for y_domain in self.domains[y]: 124 | if x_domain != y_domain and x_domain[a] == y_domain[b]: 125 | overlap_possible = True 126 | break 127 | # no value in y.domain satifies the constraints 128 | if not overlap_possible: 129 | domains_to_remove.add(x_domain) 130 | if domains_to_remove: 131 | self.domains[x] -= domains_to_remove 132 | revised = True 133 | return revised 134 | 135 | def ac3(self, arcs=None): 136 | """ 137 | Update `self.domains` such that each variable is arc consistent. 138 | If `arcs` is None, begin with initial list of all arcs in the problem. 139 | Otherwise, use `arcs` as the initial list of arcs to make consistent. 140 | 141 | Return True if arc consistency is enforced and no domains are empty; 142 | return False if one or more domains end up empty. 143 | """ 144 | if not arcs: 145 | # select only those variables which have an overlap 146 | queue = [] 147 | for variable1 in self.crossword.variables: 148 | for variable2 in self.crossword.neighbors(variable1): 149 | queue.append((variable1, variable2)) 150 | else: 151 | queue = list(arcs) 152 | while queue: 153 | x, y = queue.pop(0) 154 | set_y = set() 155 | set_y.add(y) 156 | if self.revise(x, y): 157 | if len(self.domains[x]) == 0: 158 | return False 159 | for z in self.crossword.neighbors(x) - set_y: 160 | queue.append((z, x)) 161 | 162 | return True 163 | 164 | def assignment_complete(self, assignment): 165 | """ 166 | Return True if `assignment` is complete (i.e., assigns a value to each 167 | crossword variable); return False otherwise. 168 | """ 169 | for variable in self.crossword.variables: 170 | if variable not in assignment.keys(): 171 | return False 172 | if assignment[variable] not in self.crossword.words: 173 | return False 174 | return True 175 | 176 | def consistent(self, assignment): 177 | """ 178 | Return True if `assignment` is consistent (i.e., words fit in crossword 179 | puzzle without conflicting characters); return False otherwise. 180 | """ 181 | for x in assignment: 182 | word1 = assignment[x] 183 | if x.length != len(word1): 184 | return False 185 | 186 | for y in assignment: 187 | word2 = assignment[y] 188 | if x != y: 189 | if word1 == word2: 190 | return False 191 | 192 | overlap = self.crossword.overlaps[x, y] 193 | if overlap: 194 | a, b = overlap 195 | if word1[a] != word2[b]: 196 | return False 197 | return True 198 | 199 | def order_domain_values(self, var, assignment): 200 | """ 201 | Return a list of values in the domain of `var`, in order by 202 | the number of values they rule out for neighboring variables. 203 | The first value in the list, for example, should be the one 204 | that rules out the fewest values among the neighbors of `var`. 205 | """ 206 | 207 | # get neighbors 208 | neighbors = self.crossword.neighbors(var) 209 | for i in assignment: 210 | if i in neighbors: 211 | neighbors.remove(i) 212 | 213 | result = [] 214 | # for every value in domain, check for overlaps that don't satisfy criteria 215 | for val in self.domains[var]: 216 | total_ruled_out = 0 217 | for var2 in neighbors: 218 | for val2 in self.domains[var2]: 219 | overlap = self.crossword.overlaps[var, var2] 220 | if overlap: 221 | a, b = overlap 222 | if val[a] != val2[b]: 223 | # if values don't match, they need to removed 224 | total_ruled_out += 1 225 | result.append([val, total_ruled_out]) 226 | result.sort(key=lambda x: (x[1])) 227 | return [i[0] for i in result] 228 | 229 | def select_unassigned_variable(self, assignment): 230 | """ 231 | Return an unassigned variable not already part of `assignment`. 232 | Choose the variable with the minimum number of remaining values 233 | in its domain. If there is a tie, choose the variable with the highest 234 | degree. If there is a tie, any of the tied variables are acceptable 235 | return values. 236 | """ 237 | list_of_variables = [] 238 | for var in self.crossword.variables: 239 | if var not in assignment: 240 | list_of_variables.append([var, len(self.domains[var]), len(self.crossword.neighbors(var))]) 241 | 242 | if list_of_variables: 243 | list_of_variables.sort(key=lambda x: (x[1], -x[2])) 244 | return list_of_variables[0][0] 245 | return None 246 | 247 | def backtrack(self, assignment): 248 | """ 249 | Using Backtracking Search, take as input a partial assignment for the 250 | crossword and return a complete assignment if possible to do so. 251 | 252 | `assignment` is a mapping from variables (keys) to words (values). 253 | 254 | If no assignment is possible, return None. 255 | """ 256 | 257 | if self.assignment_complete(assignment): 258 | return assignment 259 | var = self.select_unassigned_variable(assignment) 260 | for val in self.order_domain_values(var, assignment): 261 | new_assigment = assignment.copy() 262 | new_assigment[var] = val 263 | if self.consistent(new_assigment): 264 | result = self.backtrack(new_assigment) 265 | if result: 266 | return result 267 | return None 268 | 269 | def main(): 270 | 271 | # Check usage 272 | if len(sys.argv) not in [3, 4]: 273 | sys.exit("Usage: python generate.py structure words [output]") 274 | 275 | # Parse command-line arguments 276 | structure = sys.argv[1] 277 | words = sys.argv[2] 278 | output = sys.argv[3] if len(sys.argv) == 4 else None 279 | 280 | # Generate crossword 281 | crossword = Crossword(structure, words) 282 | creator = CrosswordCreator(crossword) 283 | assignment = creator.solve() 284 | 285 | # Print result 286 | if assignment is None: 287 | print("No solution.") 288 | else: 289 | creator.print(assignment) 290 | if output: 291 | creator.save(assignment, output) 292 | 293 | 294 | if __name__ == "__main__": 295 | main() 296 | -------------------------------------------------------------------------------- /4-learning/nim/nim.py: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | import time 4 | 5 | 6 | class Nim(): 7 | 8 | def __init__(self, initial=[1, 3, 5, 7]): 9 | """ 10 | Initialize game board. 11 | Each game board has 12 | - `piles`: a list of how many elements remain in each pile 13 | - `player`: 0 or 1 to indicate which player's turn 14 | - `winner`: None, 0, or 1 to indicate who the winner is 15 | """ 16 | self.piles = initial.copy() 17 | self.player = 0 18 | self.winner = None 19 | 20 | @classmethod 21 | def available_actions(cls, piles): 22 | """ 23 | Nim.available_actions(piles) takes a `piles` list as input 24 | and returns all of the available actions `(i, j)` in that state. 25 | 26 | Action `(i, j)` represents the action of removing `j` items 27 | from pile `i` (where piles are 0-indexed). 28 | """ 29 | actions = set() 30 | for i, pile in enumerate(piles): 31 | for j in range(1, piles[i] + 1): 32 | actions.add((i, j)) 33 | return actions 34 | 35 | @classmethod 36 | def other_player(cls, player): 37 | """ 38 | Nim.other_player(player) returns the player that is not 39 | `player`. Assumes `player` is either 0 or 1. 40 | """ 41 | return 0 if player == 1 else 1 42 | 43 | def switch_player(self): 44 | """ 45 | Switch the current player to the other player. 46 | """ 47 | self.player = Nim.other_player(self.player) 48 | 49 | def move(self, action): 50 | """ 51 | Make the move `action` for the current player. 52 | `action` must be a tuple `(i, j)`. 53 | """ 54 | pile, count = action 55 | 56 | # Check for errors 57 | if self.winner is not None: 58 | raise Exception("Game already won") 59 | elif pile < 0 or pile >= len(self.piles): 60 | raise Exception("Invalid pile") 61 | elif count < 1 or count > self.piles[pile]: 62 | raise Exception("Invalid number of objects") 63 | 64 | # Update pile 65 | self.piles[pile] -= count 66 | self.switch_player() 67 | 68 | # Check for a winner 69 | if all(pile == 0 for pile in self.piles): 70 | self.winner = self.player 71 | 72 | 73 | class NimAI(): 74 | 75 | def __init__(self, alpha=0.5, epsilon=0.1): 76 | """ 77 | Initialize AI with an empty Q-learning dictionary, 78 | an alpha (learning) rate, and an epsilon rate. 79 | 80 | The Q-learning dictionary maps `(state, action)` 81 | pairs to a Q-value (a number). 82 | - `state` is a tuple of remaining piles, e.g. (1, 1, 4, 4) 83 | - `action` is a tuple `(i, j)` for an action 84 | """ 85 | self.q = dict() 86 | self.alpha = alpha 87 | self.epsilon = epsilon 88 | 89 | def update(self, old_state, action, new_state, reward): 90 | """ 91 | Update Q-learning model, given an old state, an action taken 92 | in that state, a new resulting state, and the reward received 93 | from taking that action. 94 | """ 95 | old = self.get_q_value(old_state, action) 96 | best_future = self.best_future_reward(new_state) 97 | self.update_q_value(old_state, action, old, reward, best_future) 98 | 99 | def get_q_value(self, state, action): 100 | """ 101 | Return the Q-value for the state `state` and the action `action`. 102 | If no Q-value exists yet in `self.q`, return 0. 103 | """ 104 | return self.q[(tuple(state), action)] if (tuple(state), action) in self.q else 0 105 | 106 | def update_q_value(self, state, action, old_q, reward, future_rewards): 107 | """ 108 | Update the Q-value for the state `state` and the action `action` 109 | given the previous Q-value `old_q`, a current reward `reward`, 110 | and an estiamte of future rewards `future_rewards`. 111 | 112 | Use the formula: 113 | 114 | Q(s, a) <- old value estimate 115 | + alpha * (new value estimate - old value estimate) 116 | 117 | where `old value estimate` is the previous Q-value, 118 | `alpha` is the learning rate, and `new value estimate` 119 | is the sum of the current reward and estimated future rewards. 120 | """ 121 | self.q[(tuple(state), action)] = old_q + self.alpha * (future_rewards + reward - old_q) 122 | 123 | def best_future_reward(self, state): 124 | """ 125 | Given a state `state`, consider all possible `(state, action)` 126 | pairs available in that state and return the maximum of all 127 | of their Q-values. 128 | 129 | Use 0 as the Q-value if a `(state, action)` pair has no 130 | Q-value in `self.q`. If there are no available actions in 131 | `state`, return 0. 132 | """ 133 | best_reward = 0 134 | actions = list(Nim.available_actions(state)) 135 | for action in actions: 136 | best_reward = max(self.get_q_value(state, action), best_reward) 137 | return best_reward 138 | 139 | def choose_action(self, state, epsilon=True): 140 | """ 141 | Given a state `state`, return an action `(i, j)` to take. 142 | 143 | If `epsilon` is `False`, then return the best action 144 | available in the state (the one with the highest Q-value, 145 | using 0 for pairs that have no Q-values). 146 | 147 | If `epsilon` is `True`, then with probability 148 | `self.epsilon` choose a random available action, 149 | otherwise choose the best action available. 150 | 151 | If multiple actions have the same Q-value, any of those 152 | options is an acceptable return value. 153 | """ 154 | best_action = None 155 | best_reward = 0 156 | actions = list(Nim.available_actions(state)) 157 | for action in actions: 158 | q_val = self.get_q_value(state, action) 159 | if best_action is None or q_val > best_reward: 160 | best_reward = q_val 161 | best_action = action 162 | 163 | if epsilon: 164 | total_actions = len(actions) 165 | weights = [(1 - self.epsilon) if action == best_action else self.epsilon for action in actions] 166 | best_action = random.choices(actions, weights=weights, k=1)[0] 167 | 168 | return best_action 169 | 170 | def train(n): 171 | """ 172 | Train an AI by playing `n` games against itself. 173 | """ 174 | 175 | player = NimAI() 176 | 177 | # Play n games 178 | for i in range(n): 179 | print(f"Playing training game {i + 1}") 180 | game = Nim() 181 | 182 | # Keep track of last move made by either player 183 | last = { 184 | 0: {"state": None, "action": None}, 185 | 1: {"state": None, "action": None} 186 | } 187 | 188 | # Game loop 189 | while True: 190 | 191 | # Keep track of current state and action 192 | state = game.piles.copy() 193 | action = player.choose_action(game.piles) 194 | 195 | # Keep track of last state and action 196 | last[game.player]["state"] = state 197 | last[game.player]["action"] = action 198 | 199 | # Make move 200 | game.move(action) 201 | new_state = game.piles.copy() 202 | 203 | # When game is over, update Q values with rewards 204 | if game.winner is not None: 205 | player.update(state, action, new_state, -1) 206 | player.update( 207 | last[game.player]["state"], 208 | last[game.player]["action"], 209 | new_state, 210 | 1 211 | ) 212 | break 213 | 214 | # If game is continuing, no rewards yet 215 | elif last[game.player]["state"] is not None: 216 | player.update( 217 | last[game.player]["state"], 218 | last[game.player]["action"], 219 | new_state, 220 | 0 221 | ) 222 | 223 | print("Done training") 224 | 225 | # Return the trained AI 226 | return player 227 | 228 | 229 | def play(ai, human_player=None): 230 | """ 231 | Play human game against the AI. 232 | `human_player` can be set to 0 or 1 to specify whether 233 | human player moves first or second. 234 | """ 235 | 236 | # If no player order set, choose human's order randomly 237 | if human_player is None: 238 | human_player = random.randint(0, 1) 239 | 240 | # Create new game 241 | game = Nim() 242 | 243 | # Game loop 244 | while True: 245 | 246 | # Print contents of piles 247 | print() 248 | print("Piles:") 249 | for i, pile in enumerate(game.piles): 250 | print(f"Pile {i}: {pile}") 251 | print() 252 | 253 | # Compute available actions 254 | available_actions = Nim.available_actions(game.piles) 255 | time.sleep(1) 256 | 257 | # Let human make a move 258 | if game.player == human_player: 259 | print("Your Turn") 260 | while True: 261 | pile = int(input("Choose Pile: ")) 262 | count = int(input("Choose Count: ")) 263 | if (pile, count) in available_actions: 264 | break 265 | print("Invalid move, try again.") 266 | 267 | # Have AI make a move 268 | else: 269 | print("AI's Turn") 270 | pile, count = ai.choose_action(game.piles, epsilon=False) 271 | print(f"AI chose to take {count} from pile {pile}.") 272 | 273 | # Make move 274 | game.move((pile, count)) 275 | 276 | # Check for winner 277 | if game.winner is not None: 278 | print() 279 | print("GAME OVER") 280 | winner = "Human" if game.winner == human_player else "AI" 281 | print(f"Winner is {winner}") 282 | return 283 | -------------------------------------------------------------------------------- /4-learning/nim/play.py: -------------------------------------------------------------------------------- 1 | from nim import train, play 2 | 3 | ai = train(10000) 4 | play(ai) 5 | -------------------------------------------------------------------------------- /4-learning/shopping/shopping.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import sys 3 | 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.neighbors import KNeighborsClassifier 6 | 7 | TEST_SIZE = 0.4 8 | 9 | 10 | def main(): 11 | 12 | # Check command-line arguments 13 | if len(sys.argv) != 2: 14 | sys.exit("Usage: python shopping.py data") 15 | 16 | # Load data from spreadsheet and split into train and test sets 17 | evidence, labels = load_data(sys.argv[1]) 18 | X_train, X_test, y_train, y_test = train_test_split( 19 | evidence, labels, test_size=TEST_SIZE 20 | ) 21 | 22 | # Train model and make predictions 23 | model = train_model(X_train, y_train) 24 | predictions = model.predict(X_test) 25 | sensitivity, specificity = evaluate(y_test, predictions) 26 | 27 | # Print results 28 | print(f"Correct: {(y_test == predictions).sum()}") 29 | print(f"Incorrect: {(y_test != predictions).sum()}") 30 | print(f"True Positive Rate: {100 * sensitivity:.2f}%") 31 | print(f"True Negative Rate: {100 * specificity:.2f}%") 32 | 33 | 34 | def load_data(filename): 35 | """ 36 | Load shopping data from a CSV file `filename` and convert into a list of 37 | evidence lists and a list of labels. Return a tuple (evidence, labels). 38 | 39 | evidence should be a list of lists, where each list contains the 40 | following values, in order: 41 | - Administrative, an integer 42 | - Administrative_Duration, a floating point number 43 | - Informational, an integer 44 | - Informational_Duration, a floating point number 45 | - ProductRelated, an integer 46 | - ProductRelated_Duration, a floating point number 47 | - BounceRates, a floating point number 48 | - ExitRates, a floating point number 49 | - PageValues, a floating point number 50 | - SpecialDay, a floating point number 51 | - Month, an index from 0 (January) to 11 (December) 52 | - OperatingSystems, an integer 53 | - Browser, an integer 54 | - Region, an integer 55 | - TrafficType, an integer 56 | - VisitorType, an integer 0 (not returning) or 1 (returning) 57 | - Weekend, an integer 0 (if false) or 1 (if true) 58 | 59 | labels should be the corresponding list of labels, where each label 60 | is 1 if Revenue is true, and 0 otherwise. 61 | """ 62 | evidence, labels = [], [] 63 | month_to_int = dict(Jan=0, Feb=1, Mar=2, Apr=3, May=4, June=5, Jul=6, Aug=7, Sep=8, Oct=9, Nov=10, Dec=11) 64 | with open(filename, newline='') as csvfile: 65 | reader = csv.DictReader(csvfile) 66 | for row in reader: 67 | evidence.append([ 68 | int(row["Administrative"]), 69 | float(row["Administrative_Duration"]), 70 | int(row["Informational"]), 71 | float(row["Informational_Duration"]), 72 | int(row["ProductRelated"]), 73 | float(row["ProductRelated_Duration"]), 74 | float(row["BounceRates"]), 75 | float(row["ExitRates"]), 76 | float(row["PageValues"]), 77 | float(row["SpecialDay"]), 78 | month_to_int[row["Month"]], 79 | int(row["OperatingSystems"]), 80 | int(row["Browser"]), 81 | int(row["Region"]), 82 | int(row["TrafficType"]), 83 | 1 if row["VisitorType"] == "Returning_Visitor" else 0, 84 | 1 if row["Weekend"] == "TRUE" else 0, 85 | ]) 86 | labels.append(1 if row["Revenue"] == "TRUE" else 0) 87 | return evidence, labels 88 | 89 | 90 | def train_model(evidence, labels): 91 | """ 92 | Given a list of evidence lists and a list of labels, return a 93 | fitted k-nearest neighbor model (k=1) trained on the data. 94 | """ 95 | model = KNeighborsClassifier(n_neighbors=1) 96 | model.fit(evidence, labels) 97 | return model 98 | 99 | 100 | def evaluate(labels, predictions): 101 | """ 102 | Given a list of actual labels and a list of predicted labels, 103 | return a tuple (sensitivity, specificty). 104 | 105 | Assume each label is either a 1 (positive) or 0 (negative). 106 | 107 | `sensitivity` should be a floating-point value from 0 to 1 108 | representing the "true positive rate": the proportion of 109 | actual positive labels that were accurately identified. 110 | 111 | `specificity` should be a floating-point value from 0 to 1 112 | representing the "true negative rate": the proportion of 113 | actual negative labels that were accurately identified. 114 | """ 115 | sensitivity, specificity = float(0), float(0) 116 | positive, negative = float(0), float(0) 117 | for label, prediction in zip(labels, predictions): 118 | if label == 1: 119 | positive += 1 120 | if label == prediction: 121 | sensitivity += 1 122 | 123 | if label == 0: 124 | negative += 1 125 | if label == prediction: 126 | specificity += 1 127 | 128 | sensitivity /= positive 129 | specificity /= negative 130 | 131 | return sensitivity, specificity 132 | 133 | 134 | if __name__ == "__main__": 135 | main() 136 | -------------------------------------------------------------------------------- /5-neural-networks/traffic/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | scikit-learn 3 | tensorflow 4 | -------------------------------------------------------------------------------- /5-neural-networks/traffic/traffic.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import os 4 | import sys 5 | import tensorflow as tf 6 | 7 | from sklearn.model_selection import train_test_split 8 | 9 | EPOCHS = 10 10 | IMG_WIDTH = 30 11 | IMG_HEIGHT = 30 12 | NUM_CATEGORIES = 43 13 | TEST_SIZE = 0.4 14 | 15 | 16 | def main(): 17 | 18 | # Check command-line arguments 19 | if len(sys.argv) not in [2, 3]: 20 | sys.exit("Usage: python traffic.py data_directory [model.h5]") 21 | 22 | # Get image arrays and labels for all image files 23 | images, labels = load_data(sys.argv[1]) 24 | 25 | # Split data into training and testing sets 26 | labels = tf.keras.utils.to_categorical(labels) 27 | x_train, x_test, y_train, y_test = train_test_split( 28 | np.array(images), np.array(labels), test_size=TEST_SIZE 29 | ) 30 | 31 | # Get a compiled neural network 32 | model = get_model() 33 | 34 | # Fit model on training data 35 | model.fit(x_train, y_train, epochs=EPOCHS) 36 | 37 | # Evaluate neural network performance 38 | model.evaluate(x_test, y_test, verbose=2) 39 | 40 | # Save model to file 41 | if len(sys.argv) == 3: 42 | filename = sys.argv[2] 43 | model.save(filename) 44 | print(f"Model saved to {filename}.") 45 | 46 | 47 | def load_data(data_dir): 48 | """ 49 | Load image data from directory `data_dir`. 50 | 51 | Assume `data_dir` has one directory named after each category, numbered 52 | 0 through NUM_CATEGORIES - 1. Inside each category directory will be some 53 | number of image files. 54 | 55 | Return tuple `(images, labels)`. `images` should be a list of all 56 | of the images in the data directory, where each image is formatted as a 57 | numpy ndarray with dimensions IMG_WIDTH x IMG_HEIGHT x 3. `labels` should 58 | be a list of integer labels, representing the categories for each of the 59 | corresponding `images`. 60 | """ 61 | images = [] 62 | labels = [] 63 | for folder in os.listdir(data_dir): 64 | folder_path = os.path.join(data_dir, folder) 65 | if os.path.isdir(folder_path): 66 | for image_file in os.listdir(folder_path): 67 | image = cv2.imread(os.path.join(folder_path, image_file), cv2.IMREAD_COLOR) 68 | image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_AREA) 69 | images.append(image) 70 | labels.append(int(folder)) 71 | 72 | return images, labels 73 | 74 | 75 | def get_model(): 76 | """ 77 | Returns a compiled convolutional neural network model. Assume that the 78 | `input_shape` of the first layer is `(IMG_WIDTH, IMG_HEIGHT, 3)`. 79 | The output layer should have `NUM_CATEGORIES` units, one for each category. 80 | """ 81 | model = tf.keras.models.Sequential([ 82 | tf.keras.layers.Conv2D( 83 | 32, (3, 3), activation="relu", input_shape=(IMG_WIDTH, IMG_HEIGHT, 3) 84 | ), 85 | tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), 86 | 87 | tf.keras.layers.Conv2D( 88 | 64, (3, 3), activation="relu" 89 | ), 90 | tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), 91 | 92 | # Flatten units 93 | tf.keras.layers.Flatten(), 94 | 95 | # Add a hidden layer with dropout 96 | tf.keras.layers.Dense(128, activation="relu"), 97 | tf.keras.layers.Dense(64, activation="relu"), 98 | tf.keras.layers.Dropout(0.33), 99 | 100 | # add output layer with NUM_CATEGORIES outputs 101 | tf.keras.layers.Dense(NUM_CATEGORIES, activation="softmax") 102 | 103 | ]) 104 | 105 | # train the model 106 | model.compile( 107 | optimizer="adam", 108 | loss="categorical_crossentropy", 109 | metrics=["accuracy"] 110 | ) 111 | return model 112 | 113 | 114 | if __name__ == "__main__": 115 | main() 116 | -------------------------------------------------------------------------------- /6-language/parser/parser.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | from nltk.tokenize import word_tokenize 3 | import re 4 | import sys 5 | 6 | TERMINALS = """ 7 | Adj -> "country" | "dreadful" | "enigmatical" | "little" | "moist" | "red" 8 | Adv -> "down" | "here" | "never" 9 | Conj -> "and" 10 | Det -> "a" | "an" | "his" | "my" | "the" 11 | N -> "armchair" | "companion" | "day" | "door" | "hand" | "he" | "himself" 12 | N -> "holmes" | "home" | "i" | "mess" | "paint" | "palm" | "pipe" | "she" 13 | N -> "smile" | "thursday" | "walk" | "we" | "word" 14 | P -> "at" | "before" | "in" | "of" | "on" | "to" | "until" 15 | V -> "arrived" | "came" | "chuckled" | "had" | "lit" | "said" | "sat" 16 | V -> "smiled" | "tell" | "were" 17 | """ 18 | 19 | NONTERMINALS = """ 20 | S -> NP VP | S Conj S | S Conj VP 21 | AP -> Adj | Adj AP 22 | NP -> N | Det NP | AP NP | PP NP 23 | PP -> P NP | P S 24 | VP -> V | V NP | VP PP | Adv VP | VP Adv 25 | """ 26 | 27 | grammar = nltk.CFG.fromstring(NONTERMINALS + TERMINALS) 28 | parser = nltk.ChartParser(grammar) 29 | 30 | 31 | def main(): 32 | 33 | # If filename specified, read sentence from file 34 | if len(sys.argv) == 2: 35 | with open(sys.argv[1]) as f: 36 | s = f.read() 37 | 38 | # Otherwise, get sentence as input 39 | else: 40 | s = input("Sentence: ") 41 | 42 | # Convert input into list of words 43 | s = preprocess(s) 44 | 45 | # Attempt to parse sentence 46 | try: 47 | trees = list(parser.parse(s)) 48 | except ValueError as e: 49 | print(e) 50 | return 51 | if not trees: 52 | print("Could not parse sentence.") 53 | return 54 | 55 | # Print each tree with noun phrase chunks 56 | for tree in trees: 57 | tree.pretty_print() 58 | 59 | print("Noun Phrase Chunks") 60 | for np in np_chunk(tree): 61 | print(" ".join(np.flatten())) 62 | 63 | 64 | def preprocess(sentence): 65 | """ 66 | Convert `sentence` to a list of its words. 67 | Pre-process sentence by converting all characters to lowercase 68 | and removing any word that does not contain at least one alphabetic 69 | character. 70 | """ 71 | sentence = sentence.lower() 72 | words = word_tokenize(sentence) 73 | return [word for word in words if re.match('[a-z]', word)] 74 | 75 | 76 | def np_chunk(tree): 77 | """ 78 | Return a list of all noun phrase chunks in the sentence tree. 79 | A noun phrase chunk is defined as any subtree of the sentence 80 | whose label is "NP" that does not itself contain any other 81 | noun phrases as subtrees. 82 | """ 83 | chunks = [] 84 | for subtree in tree.subtrees(lambda t: t.label() == 'NP'): 85 | if not contains_NP(subtree): 86 | chunks.append(subtree) 87 | return chunks 88 | 89 | 90 | def contains_NP(subtree): 91 | for st in subtree.subtrees(): 92 | if st == subtree: 93 | continue 94 | elif st.label() == 'NP': 95 | return True 96 | return False 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /6-language/parser/requirements.txt: -------------------------------------------------------------------------------- 1 | nltk 2 | -------------------------------------------------------------------------------- /6-language/parser/sentences/1.txt: -------------------------------------------------------------------------------- 1 | Holmes sat. 2 | -------------------------------------------------------------------------------- /6-language/parser/sentences/10.txt: -------------------------------------------------------------------------------- 1 | I had a little moist red paint in the palm of my hand. 2 | -------------------------------------------------------------------------------- /6-language/parser/sentences/2.txt: -------------------------------------------------------------------------------- 1 | Holmes lit a pipe. 2 | -------------------------------------------------------------------------------- /6-language/parser/sentences/3.txt: -------------------------------------------------------------------------------- 1 | We arrived the day before Thursday. 2 | -------------------------------------------------------------------------------- /6-language/parser/sentences/4.txt: -------------------------------------------------------------------------------- 1 | Holmes sat in the red armchair and he chuckled. 2 | -------------------------------------------------------------------------------- /6-language/parser/sentences/5.txt: -------------------------------------------------------------------------------- 1 | My companion smiled an enigmatical smile. -------------------------------------------------------------------------------- /6-language/parser/sentences/6.txt: -------------------------------------------------------------------------------- 1 | Holmes chuckled to himself. 2 | -------------------------------------------------------------------------------- /6-language/parser/sentences/7.txt: -------------------------------------------------------------------------------- 1 | She never said a word until we were at the door here. 2 | -------------------------------------------------------------------------------- /6-language/parser/sentences/8.txt: -------------------------------------------------------------------------------- 1 | Holmes sat down and lit his pipe. 2 | -------------------------------------------------------------------------------- /6-language/parser/sentences/9.txt: -------------------------------------------------------------------------------- 1 | I had a country walk on Thursday and came home in a dreadful mess. 2 | -------------------------------------------------------------------------------- /6-language/questions/corpus/natural_language_processing.txt: -------------------------------------------------------------------------------- 1 | https://en.wikipedia.org/wiki/Natural_language_processing 2 | 3 | Natural language processing (NLP) is a subfield of linguistics, computer science, information engineering, and artificial intelligence concerned with the interactions between computers and human (natural) languages, in particular how to program computers to process and analyze large amounts of natural language data. 4 | Challenges in natural language processing frequently involve speech recognition, natural language understanding, and natural language generation. 5 | 6 | 7 | == History == 8 | The history of natural language processing (NLP) generally started in the 1950s, although work can be found from earlier periods. 9 | In 1950, Alan Turing published an article titled "Computing Machinery and Intelligence" which proposed what is now called the Turing test as a criterion of intelligence. 10 | The Georgetown experiment in 1954 involved fully automatic translation of more than sixty Russian sentences into English. The authors claimed that within three or five years, machine translation would be a solved problem. However, real progress was much slower, and after the ALPAC report in 1966, which found that ten-year-long research had failed to fulfill the expectations, funding for machine translation was dramatically reduced. Little further research in machine translation was conducted until the late 1980s when the first statistical machine translation systems were developed. 11 | Some notably successful natural language processing systems developed in the 1960s were SHRDLU, a natural language system working in restricted "blocks worlds" with restricted vocabularies, and ELIZA, a simulation of a Rogerian psychotherapist, written by Joseph Weizenbaum between 1964 and 1966. Using almost no information about human thought or emotion, ELIZA sometimes provided a startlingly human-like interaction. When the "patient" exceeded the very small knowledge base, ELIZA might provide a generic response, for example, responding to "My head hurts" with "Why do you say your head hurts?". 12 | During the 1970s, many programmers began to write "conceptual ontologies", which structured real-world information into computer-understandable data. Examples are MARGIE (Schank, 1975), SAM (Cullingford, 1978), PAM (Wilensky, 1978), TaleSpin (Meehan, 1976), QUALM (Lehnert, 1977), Politics (Carbonell, 1979), and Plot Units (Lehnert 1981). During this time, many chatterbots were written including PARRY, Racter, and Jabberwacky. 13 | Up to the 1980s, most natural language processing systems were based on complex sets of hand-written rules. Starting in the late 1980s, however, there was a revolution in natural language processing with the introduction of machine learning algorithms for language processing. This was due to both the steady increase in computational power (see Moore's law) and the gradual lessening of the dominance of Chomskyan theories of linguistics (e.g. transformational grammar), whose theoretical underpinnings discouraged the sort of corpus linguistics that underlies the machine-learning approach to language processing. Some of the earliest-used machine learning algorithms, such as decision trees, produced systems of hard if-then rules similar to existing hand-written rules. However, part-of-speech tagging introduced the use of hidden Markov models to natural language processing, and increasingly, research has focused on statistical models, which make soft, probabilistic decisions based on attaching real-valued weights to the features making up the input data. The cache language models upon which many speech recognition systems now rely are examples of such statistical models. Such models are generally more robust when given unfamiliar input, especially input that contains errors (as is very common for real-world data), and produce more reliable results when integrated into a larger system comprising multiple subtasks. 14 | Many of the notable early successes occurred in the field of machine translation, due especially to work at IBM Research, where successively more complicated statistical models were developed. These systems were able to take advantage of existing multilingual textual corpora that had been produced by the Parliament of Canada and the European Union as a result of laws calling for the translation of all governmental proceedings into all official languages of the corresponding systems of government. However, most other systems depended on corpora specifically developed for the tasks implemented by these systems, which was (and often continues to be) a major limitation in the success of these systems. As a result, a great deal of research has gone into methods of more effectively learning from limited amounts of data. 15 | Recent research has increasingly focused on unsupervised and semi-supervised learning algorithms. Such algorithms can learn from data that has not been hand-annotated with the desired answers or using a combination of annotated and non-annotated data. Generally, this task is much more difficult than supervised learning, and typically produces less accurate results for a given amount of input data. However, there is an enormous amount of non-annotated data available (including, among other things, the entire content of the World Wide Web), which can often make up for the inferior results if the algorithm used has a low enough time complexity to be practical. 16 | In the 2010s, representation learning and deep neural network-style machine learning methods became widespread in natural language processing, due in part to a flurry of results showing that such techniques can achieve state-of-the-art results in many natural language tasks, for example in language modeling, parsing, and many others. Popular techniques include the use of word embeddings to capture semantic properties of words, and an increase in end-to-end learning of a higher-level task (e.g., question answering) instead of relying on a pipeline of separate intermediate tasks (e.g., part-of-speech tagging and dependency parsing). In some areas, this shift has entailed substantial changes in how NLP systems are designed, such that deep neural network-based approaches may be viewed as a new paradigm distinct from statistical natural language processing. For instance, the term neural machine translation (NMT) emphasizes the fact that deep learning-based approaches to machine translation directly learn sequence-to-sequence transformations, obviating the need for intermediate steps such as word alignment and language modeling that was used in statistical machine translation (SMT). 17 | 18 | 19 | == Rule-based vs. statistical NLP == 20 | In the early days, many language-processing systems were designed by hand-coding a set of rules: such as by writing grammars or devising heuristic rules for stemming. 21 | Since the so-called "statistical revolution" in the late 1980s and mid-1990s, much natural language processing research has relied heavily on machine learning. The machine-learning paradigm calls instead for using statistical inference to automatically learn such rules through the analysis of large corpora (the plural form of corpus, is a set of documents, possibly with human or computer annotations) of typical real-world examples. 22 | Many different classes of machine-learning algorithms have been applied to natural-language-processing tasks. These algorithms take as input a large set of "features" that are generated from the input data. Some of the earliest-used algorithms, such as decision trees, produced systems of hard if-then rules similar to the systems of handwritten rules that were then common. Increasingly, however, research has focused on statistical models, which make soft, probabilistic decisions based on attaching real-valued weights to each input feature. Such models have the advantage that they can express the relative certainty of many different possible answers rather than only one, producing more reliable results when such a model is included as a component of a larger system. 23 | Systems based on machine-learning algorithms have many advantages over hand-produced rules: 24 | 25 | The learning procedures used during machine learning automatically focus on the most common cases, whereas when writing rules by hand it is often not at all obvious where the effort should be directed. 26 | Automatic learning procedures can make use of statistical inference algorithms to produce models that are robust to unfamiliar input (e.g. containing words or structures that have not been seen before) and to erroneous input (e.g. with misspelled words or words accidentally omitted). Generally, handling such input gracefully with handwritten rules, or, more generally, creating systems of handwritten rules that make soft decisions, is extremely difficult, error-prone and time-consuming. 27 | Systems based on automatically learning the rules can be made more accurate simply by supplying more input data. However, systems based on handwritten rules can only be made more accurate by increasing the complexity of the rules, which is a much more difficult task. In particular, there is a limit to the complexity of systems based on handcrafted rules, beyond which the systems become more and more unmanageable. However, creating more data to input to machine-learning systems simply requires a corresponding increase in the number of man-hours worked, generally without significant increases in the complexity of the annotation process. 28 | 29 | 30 | == Major evaluations and tasks == 31 | The following is a list of some of the most commonly researched tasks in natural language processing. Some of these tasks have direct real-world applications, while others more commonly serve as subtasks that are used to aid in solving larger tasks. 32 | Though natural language processing tasks are closely intertwined, they are frequently subdivided into categories for convenience. A coarse division is given below. 33 | 34 | 35 | === Syntax === 36 | Grammar induction 37 | Generate a formal grammar that describes a language's syntax. 38 | Lemmatization 39 | The task of removing inflectional endings only and to return the base dictionary form of a word which is also known as a lemma. 40 | Morphological segmentation 41 | Separate words into individual morphemes and identify the class of the morphemes. The difficulty of this task depends greatly on the complexity of the morphology (i.e. the structure of words) of the language being considered. English has fairly simple morphology, especially inflectional morphology, and thus it is often possible to ignore this task entirely and simply model all possible forms of a word (e.g. "open, opens, opened, opening") as separate words. In languages such as Turkish or Meitei, a highly agglutinated Indian language, however, such an approach is not possible, as each dictionary entry has thousands of possible word forms. 42 | Part-of-speech tagging 43 | Given a sentence, determine the part of speech (POS) for each word. Many words, especially common ones, can serve as multiple parts of speech. For example, "book" can be a noun ("the book on the table") or verb ("to book a flight"); "set" can be a noun, verb or adjective; and "out" can be any of at least five different parts of speech. Some languages have more such ambiguity than others. Languages with little inflectional morphology, such as English, are particularly prone to such ambiguity. Chinese is prone to such ambiguity because it is a tonal language during verbalization. Such inflection is not readily conveyed via the entities employed within the orthography to convey the intended meaning. 44 | Parsing 45 | Determine the parse tree (grammatical analysis) of a given sentence. The grammar for natural languages is ambiguous and typical sentences have multiple possible analyses. Perhaps surprisingly, for a typical sentence, there may be thousands of potential parses (most of which will seem completely nonsensical to a human). There are two primary types of parsing, Dependency Parsing, and Constituency Parsing. Dependency Parsing focuses on the relationships between words in a sentence (marking things like Primary Objects and predicates), whereas Constituency Parsing focuses on building out the Parse Tree using a Probabilistic Context-Free Grammar (PCFG). See also: Stochastic grammar. 46 | Sentence breaking (also known as sentence boundary disambiguation) 47 | Given a chunk of text, find the sentence boundaries. Sentence boundaries are often marked by periods or other punctuation marks, but these same characters can serve other purposes (e.g. marking abbreviations). 48 | Stemming 49 | The process of reducing inflected (or sometimes derived) words to their root form. (e.g. "close" will be the root for "closed", "closing", "close", "closer" etc.). 50 | Word segmentation 51 | Separate a chunk of continuous text into separate words. For a language like English, this is fairly trivial, since words are usually separated by spaces. However, some written languages like Chinese, Japanese and Thai do not mark word boundaries in such a fashion, and in those languages text segmentation is a significant task requiring knowledge of the vocabulary and morphology of words in the language. Sometimes this process is also used in cases like Bag of Words (BOW) creation in data mining. 52 | Terminology extraction 53 | The goal of terminology extraction is to automatically extract relevant terms from a given corpus. 54 | 55 | 56 | === Semantics === 57 | Lexical semantics 58 | What is the computational meaning of individual words in context? 59 | Distributional semantics 60 | How can we learn semantic representations from data? 61 | Machine translation 62 | Automatically translate text from one human language to another. This is one of the most difficult problems, and is a member of a class of problems colloquially termed "AI-complete", i.e. requiring all of the different types of knowledge that humans possess (grammar, semantics, facts about the real world, etc.) to solve properly. 63 | Named entity recognition (NER) 64 | Given a stream of text, determine which items in the text map to proper names, such as people or places, and what the type of each such name is (e.g. person, location, organization). Although capitalization can aid in recognizing named entities in languages such as English, this information cannot aid in determining the type of named entity, and in any case, is often inaccurate or insufficient. For example, the first letter of a sentence is also capitalized, and named entities often span several words, only some of which are capitalized. Furthermore, many other languages in non-Western scripts (e.g. Chinese or Arabic) do not have any capitalization at all, and even languages with capitalization may not consistently use it to distinguish names. For example, German capitalizes all nouns, regardless of whether they are names, and French and Spanish do not capitalize names that serve as adjectives. 65 | Natural language generation 66 | Convert information from computer databases or semantic intents into readable human language. 67 | Natural language understanding 68 | Convert chunks of text into more formal representations such as first-order logic structures that are easier for computer programs to manipulate. Natural language understanding involves the identification of the intended semantic from the multiple possible semantics which can be derived from a natural language expression which usually takes the form of organized notations of natural language concepts. Introduction and creation of language metamodel and ontology are efficient however empirical solutions. An explicit formalization of natural language semantics without confusions with implicit assumptions such as closed-world assumption (CWA) vs. open-world assumption, or subjective Yes/No vs. objective True/False is expected for the construction of a basis of semantics formalization. 69 | Optical character recognition (OCR) 70 | Given an image representing printed text, determine the corresponding text. 71 | Question answering 72 | Given a human-language question, determine its answer. Typical questions have a specific right answer (such as "What is the capital of Canada?"), but sometimes open-ended questions are also considered (such as "What is the meaning of life?"). Recent works have looked at even more complex questions. 73 | Recognizing Textual entailment 74 | Given two text fragments, determine if one being true entails the other, entails the other's negation, or allows the other to be either true or false. 75 | Relationship extraction 76 | Given a chunk of text, identify the relationships among named entities (e.g. who is married to whom). 77 | Sentiment analysis (see also multimodal sentiment analysis) 78 | Extract subjective information usually from a set of documents, often using online reviews to determine "polarity" about specific objects. It is especially useful for identifying trends of public opinion in social media, for marketing. 79 | Topic segmentation and recognition 80 | Given a chunk of text, separate it into segments each of which is devoted to a topic, and identify the topic of the segment. 81 | Word sense disambiguation 82 | Many words have more than one meaning; we have to select the meaning which makes the most sense in context. For this problem, we are typically given a list of words and associated word senses, e.g. from a dictionary or an online resource such as WordNet. 83 | 84 | 85 | === Discourse === 86 | Automatic summarization 87 | Produce a readable summary of a chunk of text. Often used to provide summaries of the text of a known type, such as research papers, articles in the financial section of a newspaper. 88 | Coreference resolution 89 | Given a sentence or larger chunk of text, determine which words ("mentions") refer to the same objects ("entities"). Anaphora resolution is a specific example of this task, and is specifically concerned with matching up pronouns with the nouns or names to which they refer. The more general task of coreference resolution also includes identifying so-called "bridging relationships" involving referring expressions. For example, in a sentence such as "He entered John's house through the front door", "the front door" is a referring expression and the bridging relationship to be identified is the fact that the door being referred to is the front door of John's house (rather than of some other structure that might also be referred to). 90 | Discourse analysis 91 | This rubric includes several related tasks. One task is identifying the discourse structure of a connected text, i.e. the nature of the discourse relationships between sentences (e.g. elaboration, explanation, contrast). Another possible task is recognizing and classifying the speech acts in a chunk of text (e.g. yes-no question, content question, statement, assertion, etc.). 92 | 93 | 94 | === Speech === 95 | Speech recognition 96 | Given a sound clip of a person or people speaking, determine the textual representation of the speech. This is the opposite of text to speech and is one of the extremely difficult problems colloquially termed "AI-complete" (see above). In natural speech there are hardly any pauses between successive words, and thus speech segmentation is a necessary subtask of speech recognition (see below). In most spoken languages, the sounds representing successive letters blend into each other in a process termed coarticulation, so the conversion of the analog signal to discrete characters can be a very difficult process. Also, given that words in the same language are spoken by people with different accents, the speech recognition software must be able to recognize the wide variety of input as being identical to each other in terms of its textual equivalent. 97 | Speech segmentation 98 | Given a sound clip of a person or people speaking, separate it into words. A subtask of speech recognition and typically grouped with it. 99 | Text-to-speech 100 | Given a text, transform those units and produce a spoken representation. Text-to-speech can be used to aid the visually impaired. 101 | 102 | 103 | === Dialogue === 104 | The first published work by an artificial intelligence was published in 2018, 1 the Road, marketed as a novel, contains sixty million words. 105 | 106 | 107 | == See also == 108 | 109 | 110 | == References == 111 | 112 | 113 | == Further reading == 114 | -------------------------------------------------------------------------------- /6-language/questions/corpus/neural_network.txt: -------------------------------------------------------------------------------- 1 | https://en.wikipedia.org/wiki/Neural_network 2 | 3 | Artificial neural networks (ANN) or connectionist systems are computing systems vaguely inspired by the biological neural networks that constitute animal brains. Such systems "learn" to perform tasks by considering examples, generally without being programmed with task-specific rules. For example, in image recognition, they might learn to identify images that contain cats by analyzing example images that have been manually labeled as "cat" or "no cat" and using the results to identify cats in other images. They do this without any prior knowledge of cats, for example, that they have fur, tails, whiskers and cat-like faces. Instead, they automatically generate identifying characteristics from the examples that they process. 4 | An ANN is based on a collection of connected units or nodes called artificial neurons, which loosely model the neurons in a biological brain. Each connection, like the synapses in a biological brain, can transmit a signal to other neurons. An artificial neuron that receives a signal then processes it and can signal neurons connected to it. 5 | In ANN implementations, the "signal" at a connection is a real number, and the output of each neuron is computed by some non-linear function of the sum of its inputs. The connections are called edges. Neurons and edges typically have a weight that adjusts as learning proceeds. The weight increases or decreases the strength of the signal at a connection. Neurons may have a threshold such that a signal is sent only if the aggregate signal crosses that threshold. Typically, neurons are aggregated into layers. Different layers may perform different transformations on their inputs. Signals travel from the first layer (the input layer), to the last layer (the output layer), possibly after traversing the layers multiple times. 6 | The original goal of the ANN approach was to solve problems in the same way that a human brain would. But over time, attention moved to performing specific tasks, leading to deviations from biology. ANNs have been used on a variety of tasks, including computer vision, speech recognition, machine translation, social network filtering, playing board and video games, medical diagnosis, and even in activities that have traditionally been considered as reserved to humans, like painting. 7 | 8 | 9 | == History == 10 | 11 | Warren McCulloch and Walter Pitts (1943) opened the subject by creating a computational model for neural networks. In the late 1940s, D. O. Hebb created a learning hypothesis based on the mechanism of neural plasticity that became known as Hebbian learning. Farley and Wesley A. Clark (1954) first used computational machines, then called "calculators", to simulate a Hebbian network. Rosenblatt (1958) created the perceptron. The first functional networks with many layers were published by Ivakhnenko and Lapa in 1965, as the Group Method of Data Handling. The basics of continuous backpropagation were derived in the context of control theory by Kelley in 1960 and by Bryson in 1961, using principles of dynamic programming. 12 | In 1970, Seppo Linnainmaa published the general method for automatic differentiation (AD) of discrete connected networks of nested differentiable functions. In 1973, Dreyfus used backpropagation to adapt parameters of controllers in proportion to error gradients. Werbos's (1975) backpropagation algorithm enabled practical training of multi-layer networks. In 1982, he applied Linnainmaa's AD method to neural networks in the way that became widely used. Thereafter research stagnated following Minsky and Papert (1969), who discovered that basic perceptrons were incapable of processing the exclusive-or circuit and that computers lacked sufficient power to process useful neural networks. In 1992, max-pooling was introduced to help with least-shift invariance and tolerance to deformation to aid 3D object recognition. Schmidhuber adopted a multi-level hierarchy of networks (1992) pre-trained one level at a time by unsupervised learning and fine-tuned by backpropagation. The development of metal–oxide–semiconductor (MOS) very-large-scale integration (VLSI), in the form of complementary MOS (CMOS) technology, enabled the development of practical artificial neural networks in the 1980s. A landmark publication in the field was the 1989 book Analog VLSI Implementation of Neural Systems by Carver A. Mead and Mohammed Ismail. Geoffrey Hinton et al. (2006) proposed learning a high-level representation using successive layers of binary or real-valued latent variables with a restricted Boltzmann machine to model each layer. In 2012, Ng and Dean created a network that learned to recognize higher-level concepts, such as cats, only from watching unlabeled images. Unsupervised pre-training and increased computing power from GPUs and distributed computing allowed the use of larger networks, particularly in image and visual recognition problems, which became known as "deep learning". 13 | Ciresan and colleagues (2010) showed that despite the vanishing gradient problem, GPUs make backpropagation feasible for many-layered feedforward neural networks. Between 2009 and 2012, ANNs began winning prizes in ANN contests, approaching human level performance on various tasks, initially in pattern recognition and machine learning. For example, the bi-directional and multi-dimensional long short-term memory (LSTM) of Graves et al. won three competitions in connected handwriting recognition in 2009 without any prior knowledge about the three languages to be learned. Ciresan and colleagues built the first pattern recognizers to achieve human-competitive/superhuman performance on benchmarks such as traffic sign recognition (IJCNN 2012). 14 | 15 | 16 | == Models == 17 | 18 | ANNs began as an attempt to exploit the architecture of the human brain to perform tasks that conventional algorithms had little success with. They soon reoriented towards improving empirical results, mostly abandoning attempts to remain true to their biological precursors. Neurons are connected to each other in various patterns, to allow the output of some neurons to become the input of others. The network forms a directed, weighted graph. An artificial neural network consists of a collection of simulated neurons. Each neuron is a node which is connected to other nodes via links that correspond to biological axon-synapse-dendrite connections. Each link has a weight, which determines the strength of one node's influence on another. 19 | 20 | 21 | === Components of ANNs === 22 | 23 | 24 | ==== Neurons ==== 25 | ANNs are composed of artificial neurons which retain the biological concept of neurons, which receive input, combine the input with their internal state (activation) and an optional threshold using an activation function, and produce output using an output function. The initial inputs are external data, such as images and documents. The ultimate outputs accomplish the task, such as recognizing an object in an image. The important characteristic of the activation function is that it provides a smooth, differentiable transition as input values change, i.e. a small change in input produces a small change in output. 26 | 27 | 28 | ==== Connections and weights ==== 29 | The network consists of connections, each connection providing the output of one neuron as an input to another neuron. Each connection is assigned a weight that represents its relative importance. A given neuron can have multiple input and output connections. 30 | 31 | 32 | ==== Propagation function ==== 33 | The propagation function computes the input to a neuron from the outputs of its predecessor neurons and their connections as a weighted sum. A bias term can be added to the result of the propagation. 34 | 35 | 36 | === Organization === 37 | The neurons are typically organized into multiple layers, especially in deep learning. Neurons of one layer connect only to neurons of the immediately preceding and immediately following layers. The layer that receives external data is the input layer. The layer that produces the ultimate result is the output layer. In between them are zero or more hidden layers. Single layer and unlayered networks are also used. Between two layers, multiple connection patterns are possible. They can be fully connected, with every neuron in one layer connecting to every neuron in the next layer. They can be pooling, where a group of neurons in one layer connect to a single neuron in the next layer, thereby reducing the number of neurons in that layer. Neurons with only such connections form a directed acyclic graph and are known as feedforward networks. Alternatively, networks that allow connections between neurons in the same or previous layers are known as recurrent networks. 38 | 39 | 40 | === Hyperparameter === 41 | 42 | A hyperparameter is a constant parameter whose value is set before the learning process begins. The values of parameters are derived via learning. Examples of hyperparameters include learning rate, the number of hidden layers and batch size. The values of some hyperparameters can be dependent on those of other hyperparameters. For example, the size of some layers can depend on the overall number of layers. 43 | 44 | 45 | === Learning === 46 | 47 | Learning is the adaptation of the network to better handle a task by considering sample observations. Learning involves adjusting the weights (and optional thresholds) of the network to improve the accuracy of the result. This is done by minimizing the observed errors. Learning is complete when examining additional observations does not usefully reduce the error rate. Even after learning, the error rate typically does not reach 0. If after learning, the error rate is too high, the network typically must be redesigned. Practically this is done by defining a cost function that is evaluated periodically during learning. As long as its output continues to decline, learning continues. The cost is frequently defined as a statistic whose value can only be approximated. The outputs are actually numbers, so when the error is low, the difference between the output (almost certainly a cat) and the correct answer (cat) is small. Learning attempts to reduce the total of the differences across the observations. Most learning models can be viewed as a straightforward application of optimization theory and statistical estimation. 48 | 49 | 50 | ==== Learning rate ==== 51 | The learning rate defines the size of the corrective steps that the model takes to adjust for errors in each observation. A high learning rate shortens the training time, but with lower ultimate accuracy, while a lower learning rate takes longer, but with the potential for greater accuracy. Optimizations such as Quickprop are primarily aimed at speeding up error minimization, while other improvements mainly try to increase reliability. In order to avoid oscillation inside the network such as alternating connection weights, and to improve the rate of convergence, refinements use an adaptive learning rate that increases or decreases as appropriate. The concept of momentum allows the balance between the gradient and the previous change to be weighted such that the weight adjustment depends to some degree on the previous change. A momentum close to 0 emphasizes the gradient, while a value close to 1 emphasizes the last change. 52 | 53 | 54 | ==== Cost function ==== 55 | While it is possible to define a cost function ad hoc, frequently the choice is determined by the functions desirable properties (such as convexity) or because it arises from the model (e.g., in a probabilistic model the model's posterior probability can be used as an inverse cost). 56 | 57 | 58 | ==== Backpropagation ==== 59 | 60 | Backpropagation is a method to adjust the connection weights to compensate for each error found during learning. The error amount is effectively divided among the connections. Technically, backprop calculates the gradient (the derivative) of the cost function associated with a given state with respect to the weights. The weight updates can be done via stochastic gradient descent or other methods, such as Extreme Learning Machines, "No-prop" networks, training without backtracking, "weightless" networks, and non-connectionist neural networks. 61 | 62 | 63 | === Learning paradigms === 64 | The three major learning paradigms are supervised learning, unsupervised learning and reinforcement learning. They each correspond to a particular learning task 65 | 66 | 67 | ==== Supervised learning ==== 68 | Supervised learning uses a set of paired inputs and desired outputs. The learning task is to produce the desired output for each input. In this case the cost function is related to eliminating incorrect deductions. A commonly used cost is the mean-squared error, which tries to minimize the average squared error between the network's output and the desired output. Tasks suited for supervised learning are pattern recognition (also known as classification) and regression (also known as function approximation). Supervised learning is also applicable to sequential data (e.g., for hand writing, speech and gesture recognition). This can be thought of as learning with a "teacher", in the form of a function that provides continuous feedback on the quality of solutions obtained thus far. 69 | 70 | 71 | ==== Unsupervised learning ==== 72 | In unsupervised learning, input data is given along with the cost function, some function of the data 73 | 74 | 75 | 76 | 77 | x 78 | 79 | 80 | 81 | {\displaystyle \textstyle x} 82 | and the network's output. The cost function is dependent on the task (the model domain) and any a priori assumptions (the implicit properties of the model, its parameters and the observed variables). As a trivial example, consider the model 83 | 84 | 85 | 86 | 87 | f 88 | ( 89 | x 90 | ) 91 | = 92 | a 93 | 94 | 95 | 96 | {\displaystyle \textstyle f(x)=a} 97 | where 98 | 99 | 100 | 101 | 102 | a 103 | 104 | 105 | 106 | {\displaystyle \textstyle a} 107 | is a constant and the cost 108 | 109 | 110 | 111 | 112 | C 113 | = 114 | E 115 | [ 116 | ( 117 | x 118 | − 119 | f 120 | ( 121 | x 122 | ) 123 | 124 | ) 125 | 126 | 2 127 | 128 | 129 | ] 130 | 131 | 132 | 133 | {\displaystyle \textstyle C=E[(x-f(x))^{2}]} 134 | . Minimizing this cost produces a value of 135 | 136 | 137 | 138 | 139 | a 140 | 141 | 142 | 143 | {\displaystyle \textstyle a} 144 | that is equal to the mean of the data. The cost function can be much more complicated. Its form depends on the application: for example, in compression it could be related to the mutual information between 145 | 146 | 147 | 148 | 149 | x 150 | 151 | 152 | 153 | {\displaystyle \textstyle x} 154 | and 155 | 156 | 157 | 158 | 159 | f 160 | ( 161 | x 162 | ) 163 | 164 | 165 | 166 | {\displaystyle \textstyle f(x)} 167 | , whereas in statistical modeling, it could be related to the posterior probability of the model given the data (note that in both of those examples those quantities would be maximized rather than minimized). Tasks that fall within the paradigm of unsupervised learning are in general estimation problems; the applications include clustering, the estimation of statistical distributions, compression and filtering. 168 | 169 | 170 | ==== Reinforcement learning ==== 171 | 172 | In applications such as playing video games, an actor takes a string of actions, receiving a generally unpredictable response from the environment after each one. The goal is to win the game, i.e., generate the most positive (lowest cost) responses. In reinforcement learning, the aim is to weight the network (devise a policy) to perform actions that minimize long-term (expected cumulative) cost. At each point in time the agent performs an action and the environment generates an observation and an instantaneous cost, according to some (usually unknown) rules. The rules and the long-term cost usually only can be estimated. At any juncture, the agent decides whether to explore new actions to uncover their costs or to exploit prior learning to proceed more quickly. 173 | Formally the environment is modeled as a Markov decision process (MDP) with states 174 | 175 | 176 | 177 | 178 | 179 | 180 | s 181 | 182 | 1 183 | 184 | 185 | , 186 | . 187 | . 188 | . 189 | , 190 | 191 | s 192 | 193 | n 194 | 195 | 196 | 197 | ∈ 198 | S 199 | 200 | 201 | 202 | {\displaystyle \textstyle {s_{1},...,s_{n}}\in S} 203 | and actions 204 | 205 | 206 | 207 | 208 | 209 | 210 | a 211 | 212 | 1 213 | 214 | 215 | , 216 | . 217 | . 218 | . 219 | , 220 | 221 | a 222 | 223 | m 224 | 225 | 226 | 227 | ∈ 228 | A 229 | 230 | 231 | 232 | {\displaystyle \textstyle {a_{1},...,a_{m}}\in A} 233 | . Because the state transitions are not known, probability distributions are used instead: the instantaneous cost distribution 234 | 235 | 236 | 237 | 238 | P 239 | ( 240 | 241 | c 242 | 243 | t 244 | 245 | 246 | 247 | | 248 | 249 | 250 | s 251 | 252 | t 253 | 254 | 255 | ) 256 | 257 | 258 | 259 | {\displaystyle \textstyle P(c_{t}|s_{t})} 260 | , the observation distribution 261 | 262 | 263 | 264 | 265 | P 266 | ( 267 | 268 | x 269 | 270 | t 271 | 272 | 273 | 274 | | 275 | 276 | 277 | s 278 | 279 | t 280 | 281 | 282 | ) 283 | 284 | 285 | 286 | {\displaystyle \textstyle P(x_{t}|s_{t})} 287 | and the transition distribution 288 | 289 | 290 | 291 | 292 | P 293 | ( 294 | 295 | s 296 | 297 | t 298 | + 299 | 1 300 | 301 | 302 | 303 | | 304 | 305 | 306 | s 307 | 308 | t 309 | 310 | 311 | , 312 | 313 | a 314 | 315 | t 316 | 317 | 318 | ) 319 | 320 | 321 | 322 | {\displaystyle \textstyle P(s_{t+1}|s_{t},a_{t})} 323 | , while a policy is defined as the conditional distribution over actions given the observations. Taken together, the two define a Markov chain (MC). The aim is to discover the lowest-cost MC. 324 | ANNs serve as the learning component in such applications. Dynamic programming coupled with ANNs (giving neurodynamic programming) has been applied to problems such as those involved in vehicle routing, video games, natural resource management and medicine because of ANNs ability to mitigate losses of accuracy even when reducing the discretization grid density for numerically approximating the solution of control problems. Tasks that fall within the paradigm of reinforcement learning are control problems, games and other sequential decision making tasks. 325 | 326 | 327 | ==== Self learning ==== 328 | Self learning in neural networks was introduced in 1982 along with a neural network capable of self-learning named Crossbar Adaptive Array (CAA). It is a system with only one input, situation s, and only one output, action (or behavior) a. It has neither external advice input nor external reinforcement input from the environment. The CAA computes, in a crossbar fashion, both decisions about actions and emotions (feelings) about encountered situations. The system is driven by the interaction between cognition and emotion. Given memory matrix W =||w(a,s)||, the crossbar self learning algorithm in each iteration performs the following computation: 329 | 330 | In situation s perform action a; 331 | Receive consequence situation s’; 332 | Compute emotion of being in consequence situation v(s’); 333 | Update crossbar memory w’(a,s) = w(a,s) + v(s’). 334 | 335 | The backpropagated value (secondary reinforcement) is the emotion toward the consequence situation. The CAA exists in two environments, one is behavioral environment where it behaves, and the other is genetic environment, where from it initially and only once receives initial emotions about to be encountered situations in the behavioral environment. Having received the genome vector (species vector) from the genetic environment, the CAA will learn a goal-seeking behavior, in the behavioral environment that contains both desirable and undesirable situations. 336 | 337 | 338 | === Other === 339 | In a Bayesian framework, a distribution over the set of allowed models is chosen to minimize the cost. Evolutionary methods, gene expression programming, simulated annealing, expectation-maximization, non-parametric methods and particle swarm optimization are other learning algorithms. Convergent recursion is a learning algorithm for cerebellar model articulation controller (CMAC) neural networks. 340 | 341 | 342 | ==== Modes ==== 343 | Two modes of learning are available: stochastic and batch. In stochastic learning, each input creates a weight adjustment. In batch learning weights are adjusted based on a batch of inputs, accumulating errors over the batch. Stochastic learning introduces "noise" into the process, using the local gradient calculated from one data point; this reduces the chance of the network getting stuck in local minima. However, batch learning typically yields a faster, more stable descent to a local minimum, since each update is performed in the direction of the batch's average error. A common compromise is to use "mini-batches", small batches with samples in each batch selected stochastically from the entire data set. 344 | 345 | 346 | == Types == 347 | 348 | ANNs have evolved into a broad family of techniques that have advanced the state of the art across multiple domains. The simplest types have one or more static components, including number of units, number of layers, unit weights and topology. Dynamic types allow one or more of these to evolve via learning. The latter are much more complicated, but can shorten learning periods and produce better results. Some types allow/require learning to be "supervised" by the operator, while others operate independently. Some types operate purely in hardware, while others are purely software and run on general purpose computers. 349 | Some of the main breakthroughs include: convolutional neural networks that have proven particularly successful in processing visual and other two-dimensional data; long short-term memory avoid the vanishing gradient problem and can handle signals that have a mix of low and high frequency components aiding large-vocabulary speech recognition, text-to-speech synthesis, and photo-real talking heads; competitive networks such as generative adversarial networks in which multiple networks (of varying structure) compete with each other, on tasks such as winning a game or on deceiving the opponent about the authenticity of an input. 350 | 351 | 352 | == Network design == 353 | 354 | Neural architecture search (NAS) uses machine learning to automate ANN design. Various approaches to NAS have designed networks that compare well with hand-designed systems. The basic search algorithm is to propose a candidate model, evaluate it against a dataset and use the results as feedback to teach the NAS network. Available systems include AutoML and AutoKeras. Design issues include deciding the number, type and connectedness of network layers, as well as the size of each and the connection type (full, pooling, ...). 355 | Hyperparameters must also be defined as part of the design (they are not learned), governing matters such as how many neurons are in each layer, learning rate, step, stride, depth, receptive field and padding (for CNNs), etc. 356 | 357 | 358 | == Use == 359 | Using Artificial neural networks requires an understanding of their characteristics. 360 | 361 | Choice of model: This depends on the data representation and the application. Overly complex models slow learning. 362 | Learning algorithm: Numerous trade-offs exist between learning algorithms. Almost any algorithm will work well with the correct hyperparameters for training on a particular data set. However, selecting and tuning an algorithm for training on unseen data requires significant experimentation. 363 | Robustness: If the model, cost function and learning algorithm are selected appropriately, the resulting ANN can become robust. ANN capabilities fall within the following broad categories: 364 | Function approximation, or regression analysis, including time series prediction, fitness approximation and modeling. 365 | Classification, including pattern and sequence recognition, novelty detection and sequential decision making. 366 | Data processing, including filtering, clustering, blind source separation and compression. 367 | Robotics, including directing manipulators and prostheses. 368 | Control, including computer numerical control. 369 | 370 | 371 | == Applications == 372 | Because of their ability to reproduce and model nonlinear processes, Artificial neural networks have found applications in many disciplines. Application areas include system identification and control (vehicle control, trajectory prediction, process control, natural resource management), quantum chemistry, general game playing, pattern recognition (radar systems, face identification, signal classification, 3D reconstruction, object recognition and more), sequence recognition (gesture, speech, handwritten and printed text recognition), medical diagnosis, finance (e.g. automated trading systems), data mining, visualization, machine translation, social network filtering and e-mail spam filtering. ANNs have been used to diagnose cancers, including lung cancer, prostate cancer, colorectal cancer and to distinguish highly invasive cancer cell lines from less invasive lines using only cell shape information. ANNs have been used to accelerate reliability analysis of infrastructures subject to natural disasters and to predict foundation settlements. ANNs have also been used for building black-box models in geoscience: hydrology, ocean modelling and coastal engineering, and geomorphology. ANNs have been employed in cybersecurity, with the objective to discriminate between legitimate activities and malicious ones. For example, machine learning has been used for classifying Android malware, for identifying domains belonging to threat actors and for detecting URLs posing a security risk. Research is underway on ANN systems designed for penetration testing, for detecting botnets, credit cards frauds and network intrusions. 373 | ANNs have been proposed as a tool to simulate the properties of many-body open quantum systems. In brain research ANNs have studied short-term behavior of individual neurons, the dynamics of neural circuitry arise from interactions between individual neurons and how behavior can arise from abstract neural modules that represent complete subsystems. Studies considered long-and short-term plasticity of neural systems and their relation to learning and memory from the individual neuron to the system level. 374 | 375 | 376 | == Theoretical properties == 377 | 378 | 379 | === Computational power === 380 | The multilayer perceptron is a universal function approximator, as proven by the universal approximation theorem. However, the proof is not constructive regarding the number of neurons required, the network topology, the weights and the learning parameters. 381 | A specific recurrent architecture with rational-valued weights (as opposed to full precision real number-valued weights) has the power of a universal Turing machine, using a finite number of neurons and standard linear connections. Further, the use of irrational values for weights results in a machine with super-Turing power. 382 | 383 | 384 | === Capacity === 385 | A model's "capacity" property corresponds to its ability to model any given function. It is related to the amount of information that can be stored in the network and to the notion of complexity. 386 | Two notions of capacity are known by the community. The information capacity and the VC Dimension. The information capacity of a perceptron is intensively discussed in Sir David MacKay's book which summarizes work by Thomas Cover . The capacity of a network of standard neurons (not convolutional) can be derived by four rules that derive from understanding a neuron as an electrical element. The information capacity captures the functions modelable by the network given any data as input. The second notion, is the VC dimension. VC Dimension uses the principles of measure theory and finds the maximum capacity under the best possible circumstances. This is, given input data in a specific form. As noted in , the VC Dimension for arbitrary inputs is half the information capacity of a Perceptron. The VC Dimension for arbitrary points is sometimes referred to as Memory Capacity . 387 | 388 | 389 | === Convergence === 390 | Models may not consistently converge on a single solution, firstly because local minima may exist, depending on the cost function and the model. Secondly, the optimization method used might not guarantee to converge when it begins far from any local minimum. Thirdly, for sufficiently large data or parameters, some methods become impractical. 391 | Convergence behavior of certain types of ANN architectures are more understood than others. Such as when the width of network approaches to infinity, the ANN resembles linear model, thus such ANN follows the convergence behavior of linear model also. Another example is when parameters are small, it is observed that ANN often fits target functions from low to high frequencies. Such phenomenon is in opposite to the behavior of some well studied iterative numerical schemes such as Jacobi method. 392 | 393 | 394 | === Generalization and statistics === 395 | Applications whose goal is to create a system that generalizes well to unseen examples, face the possibility of over-training. This arises in convoluted or over-specified systems when the network capacity significantly exceeds the needed free parameters. Two approaches address over-training. The first is to use cross-validation and similar techniques to check for the presence of over-training and to select hyperparameters to minimize the generalization error. 396 | The second is to use some form of regularization. This concept emerges in a probabilistic (Bayesian) framework, where regularization can be performed by selecting a larger prior probability over simpler models; but also in statistical learning theory, where the goal is to minimize over two quantities: the 'empirical risk' and the 'structural risk', which roughly corresponds to the error over the training set and the predicted error in unseen data due to overfitting. 397 | 398 | Supervised neural networks that use a mean squared error (MSE) cost function can use formal statistical methods to determine the confidence of the trained model. The MSE on a validation set can be used as an estimate for variance. This value can then be used to calculate the confidence interval of network output, assuming a normal distribution. A confidence analysis made this way is statistically valid as long as the output probability distribution stays the same and the network is not modified. 399 | By assigning a softmax activation function, a generalization of the logistic function, on the output layer of the neural network (or a softmax component in a component-based network) for categorical target variables, the outputs can be interpreted as posterior probabilities. This is useful in classification as it gives a certainty measure on classifications. 400 | The softmax activation function is: 401 | 402 | 403 | 404 | 405 | 406 | y 407 | 408 | i 409 | 410 | 411 | = 412 | 413 | 414 | 415 | e 416 | 417 | 418 | x 419 | 420 | i 421 | 422 | 423 | 424 | 425 | 426 | 427 | ∑ 428 | 429 | j 430 | = 431 | 1 432 | 433 | 434 | c 435 | 436 | 437 | 438 | e 439 | 440 | 441 | x 442 | 443 | j 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | {\displaystyle y_{i}={\frac {e^{x_{i}}}{\sum _{j=1}^{c}e^{x_{j}}}}} 454 | 455 | 456 | 457 | == Criticism == 458 | 459 | 460 | === Training === 461 | A common criticism of neural networks, particularly in robotics, is that they require too much training for real-world operation. Potential solutions include randomly shuffling training examples, by using a numerical optimization algorithm that does not take too large steps when changing the network connections following an example, grouping examples in so-called mini-batches and/or introducing a recursive least squares algorithm for CMAC. 462 | 463 | 464 | === Theory === 465 | A fundamental objection is that ANNs do not sufficiently reflect neuronal function. Backpropagation is a critical step, although no such mechanism exists in biological neural networks. How information is coded by real neurons is not known. Sensor neurons fire action potentials more frequently with sensor activation and muscle cells pull more strongly when their associated motor neurons receive action potentials more frequently. Other than the case of relaying information from a sensor neuron to a motor neuron, almost nothing of the principles of how information is handled by biological neural networks is known. 466 | A central claim of ANNs is that they embody new and powerful general principles for processing information. Unfortunately, these principles are ill-defined. It is often claimed that they are emergent from the network itself. This allows simple statistical association (the basic function of artificial neural networks) to be described as learning or recognition. Alexander Dewdney commented that, as a result, artificial neural networks have a "something-for-nothing quality, one that imparts a peculiar aura of laziness and a distinct lack of curiosity about just how good these computing systems are. No human hand (or mind) intervenes; solutions are found as if by magic; and no one, it seems, has learned anything". One response to Dewdney is that neural networks handle many complex and diverse tasks, ranging from autonomously flying aircraft to detecting credit card fraud to mastering the game of Go. 467 | Technology writer Roger Bridgman commented: 468 | 469 | Neural networks, for instance, are in the dock not only because they have been hyped to high heaven, (what hasn't?) but also because you could create a successful net without understanding how it worked: the bunch of numbers that captures its behaviour would in all probability be "an opaque, unreadable table...valueless as a scientific resource". 470 | In spite of his emphatic declaration that science is not technology, Dewdney seems here to pillory neural nets as bad science when most of those devising them are just trying to be good engineers. An unreadable table that a useful machine could read would still be well worth having. 471 | 472 | Biological brains use both shallow and deep circuits as reported by brain anatomy, displaying a wide variety of invariance. Weng argued that the brain self-wires largely according to signal statistics and therefore, a serial cascade cannot catch all major statistical dependencies. 473 | 474 | 475 | === Hardware === 476 | Large and effective neural networks require considerable computing resources. While the brain has hardware tailored to the task of processing signals through a graph of neurons, simulating even a simplified neuron on von Neumann architecture may consume vast amounts of memory and storage. Furthermore, the designer often needs to transmit signals through many of these connections and their associated neurons – which require enormous CPU power and time. 477 | Schmidhuber noted that the resurgence of neural networks in the twenty-first century is largely attributable to advances in hardware: from 1991 to 2015, computing power, especially as delivered by GPGPUs (on GPUs), has increased around a million-fold, making the standard backpropagation algorithm feasible for training networks that are several layers deeper than before. The use of accelerators such as FPGAs and GPUs can reduce training times from months to days. Neuromorphic engineering addresses the hardware difficulty directly, by constructing non-von-Neumann chips to directly implement neural networks in circuitry. Another type of chip optimized for neural network processing is called a Tensor Processing Unit, or TPU. 478 | 479 | 480 | === Practical counterexamples === 481 | Analyzing what has been learned by an ANN, is much easier than to analyze what has been learned by a biological neural network. Furthermore, researchers involved in exploring learning algorithms for neural networks are gradually uncovering general principles that allow a learning machine to be successful. For example, local vs. non-local learning and shallow vs. deep architecture. 482 | 483 | 484 | === Hybrid approaches === 485 | Advocates of hybrid models (combining neural networks and symbolic approaches), claim that such a mixture can better capture the mechanisms of the human mind. 486 | 487 | 488 | == Gallery == 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | == See also == 499 | 500 | 501 | == References == 502 | 503 | 504 | == Bibliography == 505 | 506 | 507 | == External links == 508 | The Neural Network Zoo – a compilation of neural network types 509 | The Stilwell Brain – a Mind Field episode featuring an experiment in which humans act as individual neurons in a neural network that classifies handwritten digits 510 | -------------------------------------------------------------------------------- /6-language/questions/corpus/probability.txt: -------------------------------------------------------------------------------- 1 | https://en.wikipedia.org/wiki/Probability 2 | 3 | Probability is a numerical description of how likely an event is to occur or how likely it is that a proposition is true. Probability is a number between 0 and 1, where, roughly speaking, 0 indicates impossibility and 1 indicates certainty. The higher the probability of an event, the more likely it is that the event will occur. A simple example is the tossing of a fair (unbiased) coin. Since the coin is fair, the two outcomes ("heads" and "tails") are both equally probable; the probability of "heads" equals the probability of "tails"; and since no other outcomes are possible, the probability of either "heads" or "tails" is 1/2 (which could also be written as 0.5 or 50%). 4 | These concepts have been given an axiomatic mathematical formalization in probability theory, which is used widely in such areas of study as mathematics, statistics, finance, gambling, science (in particular physics), artificial intelligence/machine learning, computer science, game theory, and philosophy to, for example, draw inferences about the expected frequency of events. Probability theory is also used to describe the underlying mechanics and regularities of complex systems. 5 | 6 | 7 | == Interpretations == 8 | 9 | When dealing with experiments that are random and well-defined in a purely theoretical setting (like tossing a fair coin), probabilities can be numerically described by the number of desired outcomes divided by the total number of all outcomes. For example, tossing a fair coin twice will yield "head-head", "head-tail", "tail-head", and "tail-tail" outcomes. The probability of getting an outcome of "head-head" is 1 out of 4 outcomes, or, in numerical terms, 1/4, 0.25 or 25%. However, when it comes to practical application, there are two major competing categories of probability interpretations, whose adherents possess different views about the fundamental nature of probability: 10 | 11 | Objectivists assign numbers to describe some objective or physical state of affairs. The most popular version of objective probability is frequentist probability, which claims that the probability of a random event denotes the relative frequency of occurrence of an experiment's outcome, when repeating the experiment. This interpretation considers probability to be the relative frequency "in the long run" of outcomes. A modification of this is propensity probability, which interprets probability as the tendency of some experiment to yield a certain outcome, even if it is performed only once. 12 | Subjectivists assign numbers per subjective probability, i.e., as a degree of belief. The degree of belief has been interpreted as, "the price at which you would buy or sell a bet that pays 1 unit of utility if E, 0 if not E." The most popular version of subjective probability is Bayesian probability, which includes expert knowledge as well as experimental data to produce probabilities. The expert knowledge is represented by some (subjective) prior probability distribution. These data are incorporated in a likelihood function. The product of the prior and the likelihood, normalized, results in a posterior probability distribution that incorporates all the information known to date. By Aumann's agreement theorem, Bayesian agents whose prior beliefs are similar will end up with similar posterior beliefs. However, sufficiently different priors can lead to different conclusions regardless of how much information the agents share. 13 | 14 | 15 | == Etymology == 16 | 17 | The word probability derives from the Latin probabilitas, which can also mean "probity", a measure of the authority of a witness in a legal case in Europe, and often correlated with the witness's nobility. In a sense, this differs much from the modern meaning of probability, which, in contrast, is a measure of the weight of empirical evidence, and is arrived at from inductive reasoning and statistical inference. 18 | 19 | 20 | == History == 21 | 22 | The scientific study of probability is a modern development of mathematics. Gambling shows that there has been an interest in quantifying the ideas of probability for millennia, but exact mathematical descriptions arose much later. There are reasons for the slow development of the mathematics of probability. Whereas games of chance provided the impetus for the mathematical study of probability, fundamental issues are still obscured by the superstitions of gamblers. According to Richard Jeffrey, "Before the middle of the seventeenth century, the term 'probable' (Latin probabilis) meant approvable, and was applied in that sense, univocally, to opinion and to action. A probable action or opinion was one such as sensible people would undertake or hold, in the circumstances." However, in legal contexts especially, 'probable' could also apply to propositions for which there was good evidence. 23 | 24 | The earliest known forms of probability and statistics were developed by Middle Eastern mathematicians studying cryptography between the 8th and 13th centuries. Al-Khalil (717–786) wrote the Book of Cryptographic Messages which contains the first use of permutations and combinations to list all possible Arabic words with and without vowels. Al-Kindi (801–873) made the earliest known use of statistical inference in his work on cryptanalysis and frequency analysis. An important contribution of Ibn Adlan (1187–1268) was on sample size for use of frequency analysis. 25 | 26 | The sixteenth century Italian polymath Gerolamo Cardano demonstrated the efficacy of defining odds as the ratio of favourable to unfavourable outcomes (which implies that the probability of an event is given by the ratio of favourable outcomes to the total number of possible outcomes). 27 | Aside from the elementary work by Cardano, the doctrine of probabilities dates to the correspondence of Pierre de Fermat and Blaise Pascal (1654). Christiaan Huygens (1657) gave the earliest known scientific treatment of the subject. Jakob Bernoulli's Ars Conjectandi (posthumous, 1713) and Abraham de Moivre's Doctrine of Chances (1718) treated the subject as a branch of mathematics. See Ian Hacking's The Emergence of Probability and James Franklin's The Science of Conjecture for histories of the early development of the very concept of mathematical probability. 28 | The theory of errors may be traced back to Roger Cotes's Opera Miscellanea (posthumous, 1722), but a memoir prepared by Thomas Simpson in 1755 (printed 1756) first applied the theory to the discussion of errors of observation. The reprint (1757) of this memoir lays down the axioms that positive and negative errors are equally probable, and that certain assignable limits define the range of all errors. Simpson also discusses continuous errors and describes a probability curve. 29 | The first two laws of error that were proposed both originated with Pierre-Simon Laplace. The first law was published in 1774 and stated that the frequency of an error could be expressed as an exponential function of the numerical magnitude of the error, disregarding sign. The second law of error was proposed in 1778 by Laplace and stated that the frequency of the error is an exponential function of the square of the error. The second law of error is called the normal distribution or the Gauss law. "It is difficult historically to attribute that law to Gauss, who in spite of his well-known precocity had probably not made this discovery before he was two years old."Daniel Bernoulli (1778) introduced the principle of the maximum product of the probabilities of a system of concurrent errors. 30 | 31 | Adrien-Marie Legendre (1805) developed the method of least squares, and introduced it in his Nouvelles méthodes pour la détermination des orbites des comètes (New Methods for Determining the Orbits of Comets). In ignorance of Legendre's contribution, an Irish-American writer, Robert Adrain, editor of "The Analyst" (1808), first deduced the law of facility of error, 32 | 33 | 34 | 35 | 36 | ϕ 37 | ( 38 | x 39 | ) 40 | = 41 | c 42 | 43 | e 44 | 45 | − 46 | 47 | h 48 | 49 | 2 50 | 51 | 52 | 53 | x 54 | 55 | 2 56 | 57 | 58 | 59 | 60 | , 61 | 62 | 63 | {\displaystyle \phi (x)=ce^{-h^{2}x^{2}},} 64 | where 65 | 66 | 67 | 68 | h 69 | 70 | 71 | {\displaystyle h} 72 | is a constant depending on precision of observation, and 73 | 74 | 75 | 76 | c 77 | 78 | 79 | {\displaystyle c} 80 | is a scale factor ensuring that the area under the curve equals 1. He gave two proofs, the second being essentially the same as John Herschel's (1850). Gauss gave the first proof that seems to have been known in Europe (the third after Adrain's) in 1809. Further proofs were given by Laplace (1810, 1812), Gauss (1823), James Ivory (1825, 1826), Hagen (1837), Friedrich Bessel (1838), W.F. Donkin (1844, 1856), and Morgan Crofton (1870). Other contributors were Ellis (1844), De Morgan (1864), Glaisher (1872), and Giovanni Schiaparelli (1875). Peters's (1856) formula for r, the probable error of a single observation, is well known. 81 | In the nineteenth century authors on the general theory included Laplace, Sylvestre Lacroix (1816), Littrow (1833), Adolphe Quetelet (1853), Richard Dedekind (1860), Helmert (1872), Hermann Laurent (1873), Liagre, Didion, and Karl Pearson. Augustus De Morgan and George Boole improved the exposition of the theory. 82 | Andrey Markov introduced the notion of Markov chains (1906), which played an important role in stochastic processes theory and its applications. The modern theory of probability based on the measure theory was developed by Andrey Kolmogorov (1931).On the geometric side (see integral geometry) contributors to The Educational Times were influential (Miller, Crofton, McColl, Wolstenholme, Watson, and Artemas Martin). 83 | 84 | 85 | == Theory == 86 | 87 | Like other theories, the theory of probability is a representation of its concepts in formal terms—that is, in terms that can be considered separately from their meaning. These formal terms are manipulated by the rules of mathematics and logic, and any results are interpreted or translated back into the problem domain. 88 | There have been at least two successful attempts to formalize probability, namely the Kolmogorov formulation and the Cox formulation. In Kolmogorov's formulation (see probability space), sets are interpreted as events and probability itself as a measure on a class of sets. In Cox's theorem, probability is taken as a primitive (that is, not further analyzed) and the emphasis is on constructing a consistent assignment of probability values to propositions. In both cases, the laws of probability are the same, except for technical details. 89 | There are other methods for quantifying uncertainty, such as the Dempster–Shafer theory or possibility theory, but those are essentially different and not compatible with the laws of probability as usually understood. 90 | 91 | 92 | == Applications == 93 | Probability theory is applied in everyday life in risk assessment and modeling. The insurance industry and markets use actuarial science to determine pricing and make trading decisions. Governments apply probabilistic methods in environmental regulation, entitlement analysis (Reliability theory of aging and longevity), and financial regulation. 94 | A good example of the use of probability theory in equity trading is the effect of the perceived probability of any widespread Middle East conflict on oil prices, which have ripple effects in the economy as a whole. An assessment by a commodity trader that a war is more likely can send that commodity's prices up or down, and signals other traders of that opinion. Accordingly, the probabilities are neither assessed independently nor necessarily very rationally. The theory of behavioral finance emerged to describe the effect of such groupthink on pricing, on policy, and on peace and conflict. In addition to financial assessment, probability can be used to analyze trends in biology (e.g. disease spread) as well as ecology (e.g. biological Punnett squares). As with finance, risk assessment can be used as a statistical tool to calculate the likelihood of undesirable events occurring and can assist with implementing protocols to avoid encountering such circumstances. Probability is used to design games of chance so that casinos can make a guaranteed profit, yet provide payouts to players that are frequent enough to encourage continued play. The discovery of rigorous methods to assess and combine probability assessments has changed society. Another significant application of probability theory in everyday life is reliability. Many consumer products, such as automobiles and consumer electronics, use reliability theory in product design to reduce the probability of failure. Failure probability may influence a manufacturer's decisions on a product's warranty. The cache language model and other statistical language models that are used in natural language processing are also examples of applications of probability theory. 95 | 96 | 97 | == Mathematical treatment == 98 | 99 | Consider an experiment that can produce a number of results. The collection of all possible results is called the sample space of the experiment. The power set of the sample space is formed by considering all different collections of possible results. For example, rolling a die can produce six possible results. One collection of possible results gives an odd number on the die. Thus, the subset {1,3,5} is an element of the power set of the sample space of dice rolls. These collections are called "events". In this case, {1,3,5} is the event that the die falls on some odd number. If the results that actually occur fall in a given event, the event is said to have occurred. 100 | A probability is a way of assigning every event a value between zero and one, with the requirement that the event made up of all possible results (in our example, the event {1,2,3,4,5,6}) is assigned a value of one. To qualify as a probability, the assignment of values must satisfy the requirement that if you look at a collection of mutually exclusive events (events with no common results, e.g., the events {1,6}, {3}, and {2,4} are all mutually exclusive), the probability that at least one of the events will occur is given by the sum of the probabilities of all the individual events. The probability of an event A is written as 101 | 102 | 103 | 104 | P 105 | ( 106 | A 107 | ) 108 | 109 | 110 | {\displaystyle P(A)} 111 | , 112 | 113 | 114 | 115 | p 116 | ( 117 | A 118 | ) 119 | 120 | 121 | {\displaystyle p(A)} 122 | , or 123 | 124 | 125 | 126 | 127 | Pr 128 | 129 | ( 130 | A 131 | ) 132 | 133 | 134 | {\displaystyle {\text{Pr}}(A)} 135 | . This mathematical definition of probability can extend to infinite sample spaces, and even uncountable sample spaces, using the concept of a measure. 136 | The opposite or complement of an event A is the event [not A] (that is, the event of A not occurring), often denoted as 137 | 138 | 139 | 140 | 141 | 142 | A 143 | ¯ 144 | 145 | 146 | , 147 | 148 | A 149 | 150 | ∁ 151 | 152 | 153 | , 154 | ¬ 155 | A 156 | 157 | 158 | {\displaystyle {\overline {A}},A^{\complement },\neg A} 159 | , or 160 | 161 | 162 | 163 | 164 | ∼ 165 | 166 | A 167 | 168 | 169 | {\displaystyle {\sim }A} 170 | ; its probability is given by P(not A) = 1 − P(A). As an example, the chance of not rolling a six on a six-sided die is 1 – (chance of rolling a six) 171 | 172 | 173 | 174 | = 175 | 1 176 | − 177 | 178 | 179 | 180 | 1 181 | 6 182 | 183 | 184 | 185 | = 186 | 187 | 188 | 189 | 5 190 | 6 191 | 192 | 193 | 194 | 195 | 196 | {\displaystyle =1-{\tfrac {1}{6}}={\tfrac {5}{6}}} 197 | . See Complementary event for a more complete treatment. 198 | If two events A and B occur on a single performance of an experiment, this is called the intersection or joint probability of A and B, denoted as 199 | 200 | 201 | 202 | P 203 | ( 204 | A 205 | ∩ 206 | B 207 | ) 208 | 209 | 210 | {\displaystyle P(A\cap B)} 211 | . 212 | 213 | 214 | === Independent events === 215 | If two events, A and B are independent then the joint probability is 216 | 217 | 218 | 219 | 220 | P 221 | ( 222 | A 223 | 224 | 225 | and 226 | 227 | 228 | B 229 | ) 230 | = 231 | P 232 | ( 233 | A 234 | ∩ 235 | B 236 | ) 237 | = 238 | P 239 | ( 240 | A 241 | ) 242 | P 243 | ( 244 | B 245 | ) 246 | , 247 | 248 | 249 | 250 | {\displaystyle P(A{\mbox{ and }}B)=P(A\cap B)=P(A)P(B),\,} 251 | for example, if two coins are flipped the chance of both being heads is 252 | 253 | 254 | 255 | 256 | 257 | 258 | 1 259 | 2 260 | 261 | 262 | 263 | × 264 | 265 | 266 | 267 | 1 268 | 2 269 | 270 | 271 | 272 | = 273 | 274 | 275 | 276 | 1 277 | 4 278 | 279 | 280 | 281 | 282 | 283 | {\displaystyle {\tfrac {1}{2}}\times {\tfrac {1}{2}}={\tfrac {1}{4}}} 284 | . 285 | 286 | 287 | === Mutually exclusive events === 288 | If either event A or event B but never both occurs on a single performance of an experiment, then they are called mutually exclusive events. 289 | If two events are mutually exclusive then the probability of both occurring is denoted as 290 | 291 | 292 | 293 | P 294 | ( 295 | A 296 | ∩ 297 | B 298 | ) 299 | 300 | 301 | {\displaystyle P(A\cap B)} 302 | . 303 | 304 | 305 | 306 | 307 | P 308 | ( 309 | A 310 | 311 | 312 | and 313 | 314 | 315 | B 316 | ) 317 | = 318 | P 319 | ( 320 | A 321 | ∩ 322 | B 323 | ) 324 | = 325 | 0 326 | 327 | 328 | {\displaystyle P(A{\mbox{ and }}B)=P(A\cap B)=0} 329 | If two events are mutually exclusive then the probability of either occurring is denoted as 330 | 331 | 332 | 333 | P 334 | ( 335 | A 336 | ∪ 337 | B 338 | ) 339 | 340 | 341 | {\displaystyle P(A\cup B)} 342 | . 343 | 344 | 345 | 346 | 347 | P 348 | ( 349 | A 350 | 351 | 352 | or 353 | 354 | 355 | B 356 | ) 357 | = 358 | P 359 | ( 360 | A 361 | ∪ 362 | B 363 | ) 364 | = 365 | P 366 | ( 367 | A 368 | ) 369 | + 370 | P 371 | ( 372 | B 373 | ) 374 | − 375 | P 376 | ( 377 | A 378 | ∩ 379 | B 380 | ) 381 | = 382 | P 383 | ( 384 | A 385 | ) 386 | + 387 | P 388 | ( 389 | B 390 | ) 391 | − 392 | 0 393 | = 394 | P 395 | ( 396 | A 397 | ) 398 | + 399 | P 400 | ( 401 | B 402 | ) 403 | 404 | 405 | {\displaystyle P(A{\mbox{ or }}B)=P(A\cup B)=P(A)+P(B)-P(A\cap B)=P(A)+P(B)-0=P(A)+P(B)} 406 | For example, the chance of rolling a 1 or 2 on a six-sided die is 407 | 408 | 409 | 410 | P 411 | ( 412 | 1 413 | 414 | 415 | or 416 | 417 | 418 | 2 419 | ) 420 | = 421 | P 422 | ( 423 | 1 424 | ) 425 | + 426 | P 427 | ( 428 | 2 429 | ) 430 | = 431 | 432 | 433 | 434 | 1 435 | 6 436 | 437 | 438 | 439 | + 440 | 441 | 442 | 443 | 1 444 | 6 445 | 446 | 447 | 448 | = 449 | 450 | 451 | 452 | 1 453 | 3 454 | 455 | 456 | 457 | . 458 | 459 | 460 | {\displaystyle P(1{\mbox{ or }}2)=P(1)+P(2)={\tfrac {1}{6}}+{\tfrac {1}{6}}={\tfrac {1}{3}}.} 461 | 462 | 463 | 464 | === Not mutually exclusive events === 465 | If the events are not mutually exclusive then 466 | 467 | 468 | 469 | 470 | P 471 | 472 | ( 473 | 474 | A 475 | 476 | 477 | or 478 | 479 | 480 | B 481 | 482 | ) 483 | 484 | = 485 | P 486 | ( 487 | A 488 | ∪ 489 | B 490 | ) 491 | = 492 | P 493 | 494 | ( 495 | A 496 | ) 497 | 498 | + 499 | P 500 | 501 | ( 502 | B 503 | ) 504 | 505 | − 506 | P 507 | 508 | ( 509 | 510 | A 511 | 512 | 513 | and 514 | 515 | 516 | B 517 | 518 | ) 519 | 520 | . 521 | 522 | 523 | {\displaystyle P\left(A{\hbox{ or }}B\right)=P(A\cup B)=P\left(A\right)+P\left(B\right)-P\left(A{\mbox{ and }}B\right).} 524 | For example, when drawing a single card at random from a regular deck of cards, the chance of getting a heart or a face card (J,Q,K) (or one that is both) is 525 | 526 | 527 | 528 | 529 | 530 | 531 | 13 532 | 52 533 | 534 | 535 | 536 | + 537 | 538 | 539 | 540 | 12 541 | 52 542 | 543 | 544 | 545 | − 546 | 547 | 548 | 549 | 3 550 | 52 551 | 552 | 553 | 554 | = 555 | 556 | 557 | 558 | 11 559 | 26 560 | 561 | 562 | 563 | 564 | 565 | {\displaystyle {\tfrac {13}{52}}+{\tfrac {12}{52}}-{\tfrac {3}{52}}={\tfrac {11}{26}}} 566 | , because of the 52 cards of a deck 13 are hearts, 12 are face cards, and 3 are both: here the possibilities included in the "3 that are both" are included in each of the "13 hearts" and the "12 face cards" but should only be counted once. 567 | 568 | 569 | === Conditional probability === 570 | Conditional probability is the probability of some event A, given the occurrence of some other event B. 571 | Conditional probability is written 572 | 573 | 574 | 575 | P 576 | ( 577 | A 578 | ∣ 579 | B 580 | ) 581 | 582 | 583 | {\displaystyle P(A\mid B)} 584 | , and is read "the probability of A, given B". It is defined by 585 | 586 | 587 | 588 | 589 | P 590 | ( 591 | A 592 | ∣ 593 | B 594 | ) 595 | = 596 | 597 | 598 | 599 | P 600 | ( 601 | A 602 | ∩ 603 | B 604 | ) 605 | 606 | 607 | P 608 | ( 609 | B 610 | ) 611 | 612 | 613 | 614 | . 615 | 616 | 617 | 618 | {\displaystyle P(A\mid B)={\frac {P(A\cap B)}{P(B)}}.\,} 619 | If 620 | 621 | 622 | 623 | P 624 | ( 625 | B 626 | ) 627 | = 628 | 0 629 | 630 | 631 | {\displaystyle P(B)=0} 632 | then 633 | 634 | 635 | 636 | P 637 | ( 638 | A 639 | ∣ 640 | B 641 | ) 642 | 643 | 644 | {\displaystyle P(A\mid B)} 645 | is formally undefined by this expression. However, it is possible to define a conditional probability for some zero-probability events using a σ-algebra of such events (such as those arising from a continuous random variable).For example, in a bag of 2 red balls and 2 blue balls (4 balls in total), the probability of taking a red ball is 646 | 647 | 648 | 649 | 1 650 | 651 | / 652 | 653 | 2 654 | 655 | 656 | {\displaystyle 1/2} 657 | ; however, when taking a second ball, the probability of it being either a red ball or a blue ball depends on the ball previously taken, such as, if a red ball was taken, the probability of picking a red ball again would be 658 | 659 | 660 | 661 | 1 662 | 663 | / 664 | 665 | 3 666 | 667 | 668 | {\displaystyle 1/3} 669 | since only 1 red and 2 blue balls would have been remaining. 670 | 671 | 672 | === Inverse probability === 673 | In probability theory and applications, Bayes' rule relates the odds of event 674 | 675 | 676 | 677 | 678 | A 679 | 680 | 1 681 | 682 | 683 | 684 | 685 | {\displaystyle A_{1}} 686 | to event 687 | 688 | 689 | 690 | 691 | A 692 | 693 | 2 694 | 695 | 696 | 697 | 698 | {\displaystyle A_{2}} 699 | , before (prior to) and after (posterior to) conditioning on another event 700 | 701 | 702 | 703 | B 704 | 705 | 706 | {\displaystyle B} 707 | . The odds on 708 | 709 | 710 | 711 | 712 | A 713 | 714 | 1 715 | 716 | 717 | 718 | 719 | {\displaystyle A_{1}} 720 | to event 721 | 722 | 723 | 724 | 725 | A 726 | 727 | 2 728 | 729 | 730 | 731 | 732 | {\displaystyle A_{2}} 733 | is simply the ratio of the probabilities of the two events. When arbitrarily many events 734 | 735 | 736 | 737 | A 738 | 739 | 740 | {\displaystyle A} 741 | are of interest, not just two, the rule can be rephrased as posterior is proportional to prior times likelihood, 742 | 743 | 744 | 745 | P 746 | ( 747 | A 748 | 749 | | 750 | 751 | B 752 | ) 753 | ∝ 754 | P 755 | ( 756 | A 757 | ) 758 | P 759 | ( 760 | B 761 | 762 | | 763 | 764 | A 765 | ) 766 | 767 | 768 | {\displaystyle P(A|B)\propto P(A)P(B|A)} 769 | where the proportionality symbol means that the left hand side is proportional to (i.e., equals a constant times) the right hand side as 770 | 771 | 772 | 773 | A 774 | 775 | 776 | {\displaystyle A} 777 | varies, for fixed or given 778 | 779 | 780 | 781 | B 782 | 783 | 784 | {\displaystyle B} 785 | (Lee, 2012; Bertsch McGrayne, 2012). In this form it goes back to Laplace (1774) and to Cournot (1843); see Fienberg (2005). See Inverse probability and Bayes' rule. 786 | 787 | 788 | === Summary of probabilities === 789 | 790 | 791 | == Relation to randomness and probability in quantum mechanics == 792 | 793 | In a deterministic universe, based on Newtonian concepts, there would be no probability if all conditions were known (Laplace's demon), (but there are situations in which sensitivity to initial conditions exceeds our ability to measure them, i.e. know them). In the case of a roulette wheel, if the force of the hand and the period of that force are known, the number on which the ball will stop would be a certainty (though as a practical matter, this would likely be true only of a roulette wheel that had not been exactly levelled – as Thomas A. Bass' Newtonian Casino revealed). This also assumes knowledge of inertia and friction of the wheel, weight, smoothness and roundness of the ball, variations in hand speed during the turning and so forth. A probabilistic description can thus be more useful than Newtonian mechanics for analyzing the pattern of outcomes of repeated rolls of a roulette wheel. Physicists face the same situation in kinetic theory of gases, where the system, while deterministic in principle, is so complex (with the number of molecules typically the order of magnitude of the Avogadro constant 6.02×1023) that only a statistical description of its properties is feasible. 794 | Probability theory is required to describe quantum phenomena. A revolutionary discovery of early 20th century physics was the random character of all physical processes that occur at sub-atomic scales and are governed by the laws of quantum mechanics. The objective wave function evolves deterministically but, according to the Copenhagen interpretation, it deals with probabilities of observing, the outcome being explained by a wave function collapse when an observation is made. However, the loss of determinism for the sake of instrumentalism did not meet with universal approval. Albert Einstein famously remarked in a letter to Max Born: "I am convinced that God does not play dice". Like Einstein, Erwin Schrödinger, who discovered the wave function, believed quantum mechanics is a statistical approximation of an underlying deterministic reality. In some modern interpretations of the statistical mechanics of measurement, quantum decoherence is invoked to account for the appearance of subjectively probabilistic experimental outcomes. 795 | 796 | 797 | == See also == 798 | 799 | Chance (disambiguation) 800 | Class membership probabilities 801 | Contingency 802 | Equiprobability 803 | Heuristics in judgment and decision-making 804 | Probability theory 805 | Randomness 806 | Statistics 807 | Estimators 808 | Estimation Theory 809 | Probability density functionIn LawBalance of probabilities 810 | 811 | 812 | == Notes == 813 | 814 | 815 | == References == 816 | 817 | 818 | == Bibliography == 819 | Kallenberg, O. (2005) Probabilistic Symmetries and Invariance Principles. Springer-Verlag, New York. 510 pp. ISBN 0-387-25115-4 820 | Kallenberg, O. (2002) Foundations of Modern Probability, 2nd ed. Springer Series in Statistics. 650 pp. ISBN 0-387-95313-2 821 | Olofsson, Peter (2005) Probability, Statistics, and Stochastic Processes, Wiley-Interscience. 504 pp ISBN 0-471-67969-0. 822 | 823 | 824 | == External links == 825 | Virtual Laboratories in Probability and Statistics (Univ. of Ala.-Huntsville) 826 | Probability on In Our Time at the BBC 827 | Probability and Statistics EBook 828 | Edwin Thompson Jaynes. Probability Theory: The Logic of Science. Preprint: Washington University, (1996). — HTML index with links to PostScript files and PDF (first three chapters) 829 | People from the History of Probability and Statistics (Univ. of Southampton) 830 | Probability and Statistics on the Earliest Uses Pages (Univ. of Southampton) 831 | Earliest Uses of Symbols in Probability and Statistics on Earliest Uses of Various Mathematical Symbols 832 | A tutorial on probability and Bayes' theorem devised for first-year Oxford University students 833 | [1] pdf file of An Anthology of Chance Operations (1963) at UbuWeb 834 | Introduction to Probability – eBook, by Charles Grinstead, Laurie Snell Source (GNU Free Documentation License) 835 | (in English and Italian) Bruno de Finetti, Probabilità e induzione, Bologna, CLUEB, 1993. ISBN 88-8091-176-7 (digital version) 836 | Richard P. Feynman's Lecture on probability. 837 | -------------------------------------------------------------------------------- /6-language/questions/questions.py: -------------------------------------------------------------------------------- 1 | import math 2 | import nltk 3 | import os 4 | import string 5 | import sys 6 | 7 | FILE_MATCHES = 1 8 | SENTENCE_MATCHES = 1 9 | 10 | 11 | def main(): 12 | 13 | # Check command-line arguments 14 | if len(sys.argv) != 2: 15 | sys.exit("Usage: python questions.py corpus") 16 | 17 | # Calculate IDF values across files 18 | files = load_files(sys.argv[1]) 19 | file_words = { 20 | filename: tokenize(files[filename]) 21 | for filename in files 22 | } 23 | file_idfs = compute_idfs(file_words) 24 | 25 | # Prompt user for query 26 | query = set(tokenize(input("Query: "))) 27 | 28 | # Determine top file matches according to TF-IDF 29 | filenames = top_files(query, file_words, file_idfs, n=FILE_MATCHES) 30 | 31 | # Extract sentences from top files 32 | sentences = dict() 33 | for filename in filenames: 34 | for passage in files[filename].split("\n"): 35 | for sentence in nltk.sent_tokenize(passage): 36 | tokens = tokenize(sentence) 37 | if tokens: 38 | sentences[sentence] = tokens 39 | 40 | # Compute IDF values across sentences 41 | idfs = compute_idfs(sentences) 42 | 43 | # Determine top sentence matches 44 | matches = top_sentences(query, sentences, idfs, n=SENTENCE_MATCHES) 45 | for match in matches: 46 | print(match) 47 | 48 | 49 | def load_files(directory): 50 | """ 51 | Given a directory name, return a dictionary mapping the filename of each 52 | `.txt` file inside that directory to the file's contents as a string. 53 | """ 54 | files_mapping = {} 55 | for file_name in os.listdir(directory): 56 | with open(os.path.join(directory, file_name)) as f: 57 | files_mapping[file_name] = f.read() 58 | return files_mapping 59 | 60 | 61 | def tokenize(document): 62 | """ 63 | Given a document (represented as a string), return a list of all of the 64 | words in that document, in order. 65 | 66 | Process document by coverting all words to lowercase, and removing any 67 | punctuation or English stopwords. 68 | """ 69 | words = nltk.word_tokenize(document.lower()) 70 | processed_doc = [] 71 | for word in words: 72 | if word not in nltk.corpus.stopwords.words("english") and word not in string.punctuation: 73 | processed_doc.append(word) 74 | return processed_doc 75 | 76 | 77 | def compute_idfs(documents): 78 | """ 79 | Given a dictionary of `documents` that maps names of documents to a list 80 | of words, return a dictionary that maps words to their IDF values. 81 | 82 | Any word that appears in at least one of the documents should be in the 83 | resulting dictionary. 84 | """ 85 | idfs = dict() 86 | words = set() 87 | total_docs = len(documents) 88 | for _file in documents: 89 | words.update(set(documents[_file])) 90 | 91 | for word in words: 92 | f = sum(word in documents[filename] for filename in documents) 93 | idf = math.log(total_docs / f) 94 | idfs[word] = idf 95 | return idfs 96 | 97 | 98 | def top_files(query, files, idfs, n): 99 | """ 100 | Given a `query` (a set of words), `files` (a dictionary mapping names of 101 | files to a list of their words), and `idfs` (a dictionary mapping words 102 | to their IDF values), return a list of the filenames of the the `n` top 103 | files that match the query, ranked according to tf-idf. 104 | """ 105 | tfidfs = [] 106 | for filename in files: 107 | tfidf = 0 108 | for q in query: 109 | tfidf += idfs[q] * files[filename].count(q) 110 | tfidfs.append((filename, tfidf)) 111 | tfidfs.sort(key=lambda x: x[1], reverse=True) 112 | return [x[0] for x in tfidfs[:n]] 113 | 114 | 115 | def top_sentences(query, sentences, idfs, n): 116 | """ 117 | Given a `query` (a set of words), `sentences` (a dictionary mapping 118 | sentences to a list of their words), and `idfs` (a dictionary mapping words 119 | to their IDF values), return a list of the `n` top sentences that match 120 | the query, ranked according to idf. If there are ties, preference should 121 | be given to sentences that have a higher query term density. 122 | """ 123 | result = [] 124 | for sentence in sentences: 125 | idf = 0 126 | total_words_found = 0 127 | for word in query: 128 | if word in sentences[sentence]: 129 | total_words_found += 1 130 | idf += idfs[word] 131 | density = float(total_words_found) / len(sentences[sentence]) 132 | result.append((sentence, idf, density)) 133 | result.sort(key=lambda x: (x[1], x[2]), reverse=True) 134 | return [x[0] for x in result[:n]] 135 | 136 | 137 | if __name__ == "__main__": 138 | main() 139 | -------------------------------------------------------------------------------- /6-language/questions/requirements.txt: -------------------------------------------------------------------------------- 1 | nltk 2 | --------------------------------------------------------------------------------