├── .gitignore ├── README.md ├── requirements.txt └── stats ├── .gitignore ├── models ├── BaseModel.py ├── PlayerBios.py ├── PlayerGameLogs.py ├── PlayerGeneralAdvancedTotals.py ├── PlayerGeneralTraditionalTotals.py ├── TeamGameLogs.py ├── TeamGeneralTraditional.py └── __init__.py ├── player_bios.ipynb ├── player_bios.py ├── player_game_logs.ipynb ├── player_game_logs.py ├── player_general_traditional.ipynb ├── player_general_traditional_totals.py └── settings.py /.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | .vscode/ 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nba-sql 2 | Used for creating and storing NBA data in a MySQL database 3 | 4 | ## Development 5 | 6 | Requirements: 7 | 8 | Python >= 3.6 9 | 10 | ### Local development 11 | 12 | ##### The manual way 13 | Create your virtual environment if you don’t have one already. In this case we use `venv` as the target folder for storing packages. 14 | 15 | `python -m venv venv` 16 | 17 | Then activate it: 18 | `source venv/bin/activate` 19 | 20 | Install dependencies using: 21 | `pip install -r requirements.txt` 22 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2020.12.5 2 | cffi==1.14.5 3 | chardet==4.0.0 4 | cryptography==3.2.1 5 | idna==2.10 6 | peewee==3.14.1 7 | pycparser==2.20 8 | PyMySQL==1.0.0 9 | python-dotenv==0.15.0 10 | requests==2.25.1 11 | six==1.15.0 12 | urllib3==1.26.3 13 | psycopg2==2.7.7 14 | -------------------------------------------------------------------------------- /stats/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # jupyter notebooks 107 | .debugging_file.ipynb 108 | debugging_file.ipynb 109 | -------------------------------------------------------------------------------- /stats/models/BaseModel.py: -------------------------------------------------------------------------------- 1 | from peewee import * 2 | from settings import Settings 3 | 4 | settings = Settings() 5 | 6 | class BaseModel(Model): 7 | class Meta: 8 | database = settings.db 9 | -------------------------------------------------------------------------------- /stats/models/PlayerBios.py: -------------------------------------------------------------------------------- 1 | from peewee import * 2 | from models import BaseModel 3 | 4 | class PlayerBios(BaseModel): 5 | season_id = CharField(null=True) # added in at the end 6 | player_id = IntegerField(null = True) 7 | player_name = CharField(null = True) 8 | team_id = IntegerField(null = True) 9 | team_abbreviation = CharField(null = True) 10 | age = IntegerField(null = True) 11 | player_height = CharField(null = True) 12 | player_height_inches = IntegerField(null = True) 13 | player_weight = CharField(null = True) 14 | college = CharField(null = True) 15 | country = CharField(null = True) 16 | draft_year = CharField(null = True) 17 | draft_round = CharField(null = True) 18 | draft_number = CharField(null = True) 19 | gp = IntegerField(null = True) 20 | pts = FloatField(null = True) 21 | reb = FloatField(null = True) 22 | ast = FloatField(null = True) 23 | net_rating = FloatField(null = True) 24 | oreb_pct = FloatField(null = True) 25 | dreb_pct = FloatField(null = True) 26 | usg_pct = FloatField(null = True) 27 | ts_pct = FloatField(null = True) 28 | ast_pct = FloatField(null = True) 29 | 30 | class Meta: 31 | db_table = 'player_bios' -------------------------------------------------------------------------------- /stats/models/PlayerGameLogs.py: -------------------------------------------------------------------------------- 1 | from peewee import * 2 | from models import BaseModel 3 | 4 | class PlayerGameLogs(BaseModel): 5 | season_id = CharField(null = True) 6 | player_id = IntegerField(null = True) 7 | player_name = CharField(null = True) 8 | team_id = IntegerField(null = True) 9 | team_abbreviation = CharField(null = True) 10 | team_name = CharField(null = True) 11 | game_id = CharField(null = True) 12 | game_date = CharField(null = True) 13 | matchup = CharField(null = True) 14 | wl = CharField(null = True) 15 | min = FloatField(null = True) 16 | fgm = FloatField(null = True) 17 | fga = FloatField(null = True) 18 | fg_pct = FloatField(null = True) 19 | fg3m = FloatField(null = True) 20 | fg3a = FloatField(null = True) 21 | fg3_pct = FloatField(null = True) 22 | ftm = FloatField(null = True) 23 | fta = FloatField(null = True) 24 | ft_pct = FloatField(null = True) 25 | oreb = FloatField(null = True) 26 | dreb = FloatField(null = True) 27 | reb = FloatField(null = True) 28 | ast = FloatField(null = True) 29 | stl = FloatField(null = True) 30 | blk = FloatField(null = True) 31 | tov = FloatField(null = True) 32 | pf = FloatField(null = True) 33 | pts = FloatField(null = True) 34 | plus_minus = FloatField(null = True) 35 | video_available = IntegerField(null = True) 36 | 37 | class Meta: 38 | db_table = 'player_game_logs' -------------------------------------------------------------------------------- /stats/models/PlayerGeneralAdvancedTotals.py: -------------------------------------------------------------------------------- 1 | from peewee import * 2 | from models import BaseModel 3 | 4 | class PlayerGeneralAdvancedTotals(BaseModel): 5 | season_id = CharField(null=True) # added in at the end 6 | player_id = IntegerField(null=True) 7 | player_name = CharField(null=True) 8 | team_id = IntegerField(null=True) 9 | team_abbreviation = CharField(null=True) 10 | age = IntegerField(null=True) 11 | gp = IntegerField(null=True) 12 | w = IntegerField(null=True) 13 | l = IntegerField(null=True) 14 | w_pct = FloatField(null=True) 15 | min = FloatField(null=True) 16 | off_rating = FloatField(null=True) 17 | def_rating = FloatField(null=True) 18 | net_rating = FloatField(null=True) 19 | ast_pct = FloatField(null=True) 20 | ast_to = FloatField(null=True) 21 | ast_ratio = FloatField(null=True) 22 | oreb_pct = FloatField(null=True) 23 | dreb_pct = FloatField(null=True) 24 | reb_pct = FloatField(null=True) 25 | tm_tov_pct = FloatField(null=True) 26 | efg_pct = FloatField(null=True) 27 | ts_pct = FloatField(null=True) 28 | usg_pct = FloatField(null=True) 29 | pace = FloatField(null=True) 30 | pie = FloatField(null=True) 31 | fgm = FloatField(null=True) 32 | fga = FloatField(null=True) 33 | fgm_pg = FloatField(null=True) 34 | fga_pg = FloatField(null=True) 35 | fg_pct = FloatField(null=True) 36 | gp_rank = IntegerField(null=True) 37 | w_rank = IntegerField(null=True) 38 | l_rank = IntegerField(null=True) 39 | w_pct_rank = IntegerField(null=True) 40 | min_rank = IntegerField(null=True) 41 | off_rating_rank = IntegerField(null=True) 42 | def_rating_rank = IntegerField(null=True) 43 | net_rating_rank = IntegerField(null=True) 44 | ast_pct_rank = IntegerField(null=True) 45 | ast_to_rank = IntegerField(null=True) 46 | ast_ratio_rank = IntegerField(null=True) 47 | oreb_pct_rank = IntegerField(null=True) 48 | dreb_pct_rank = IntegerField(null=True) 49 | reb_pct_rank = IntegerField(null=True) 50 | tm_tov_pct_rank = IntegerField(null=True) 51 | efg_pct_rank = IntegerField(null=True) 52 | ts_pct_rank = IntegerField(null=True) 53 | usg_pct_rank = IntegerField(null=True) 54 | pace_rank = IntegerField(null=True) 55 | pie_rank = IntegerField(null=True) 56 | fgm_rank = IntegerField(null=True) 57 | fga_rank = IntegerField(null=True) 58 | fgm_pg_rank = IntegerField(null=True) 59 | fga_pg_rank = IntegerField(null=True) 60 | fg_pct_rank = IntegerField(null=True) 61 | cfid = IntegerField(null=True) 62 | cfparams = CharField(null=True) 63 | 64 | class Meta: 65 | db_table = 'player_general_advanced_totals' -------------------------------------------------------------------------------- /stats/models/PlayerGeneralTraditionalTotals.py: -------------------------------------------------------------------------------- 1 | from peewee import * 2 | from models import BaseModel 3 | 4 | class PlayerGeneralTraditionalTotals(BaseModel): 5 | season_id = CharField(null=True) 6 | player_id = IntegerField(null=True) 7 | player_name = CharField(null=True) 8 | team_id = IntegerField(null=True) 9 | team_abbreviation = CharField(null=True) 10 | age = IntegerField(null=True) 11 | gp = IntegerField(null=True) 12 | w = IntegerField(null=True) 13 | l = IntegerField(null=True) 14 | w_pct = FloatField(null=True) 15 | min = FloatField(null=True) 16 | fgm = FloatField(null=True) 17 | fga = FloatField(null=True) 18 | fg_pct = FloatField(null=True) 19 | fg3m = FloatField(null=True) 20 | fg3a = FloatField(null=True) 21 | fg3_pct = FloatField(null=True) 22 | ftm = FloatField(null=True) 23 | fta = FloatField(null=True) 24 | ft_pct = FloatField(null=True) 25 | oreb = FloatField(null=True) 26 | dreb = FloatField(null=True) 27 | reb = FloatField(null=True) 28 | ast = FloatField(null=True) 29 | tov = FloatField(null=True) 30 | stl = FloatField(null=True) 31 | blk = FloatField(null=True) 32 | blka = FloatField(null=True) 33 | pf = FloatField(null=True) 34 | pfd = FloatField(null=True) 35 | pts = FloatField(null=True) 36 | plus_minus = FloatField(null=True) 37 | nba_fantasy_pts = FloatField(null=True) 38 | dd2 = FloatField(null=True) 39 | td3 = FloatField(null=True) 40 | gp_rank = IntegerField(null=True) 41 | w_rank = IntegerField(null=True) 42 | l_rank = IntegerField(null=True) 43 | w_pct_rank = IntegerField(null=True) 44 | min_rank = IntegerField(null=True) 45 | fgm_rank = IntegerField(null=True) 46 | fga_rank = IntegerField(null=True) 47 | fg_pct_rank = IntegerField(null=True) 48 | fg3m_rank = IntegerField(null=True) 49 | fg3a_rank = IntegerField(null=True) 50 | fg3_pct_rank = IntegerField(null=True) 51 | ftm_rank = IntegerField(null=True) 52 | fta_rank = IntegerField(null=True) 53 | ft_pct_rank = IntegerField(null=True) 54 | oreb_rank = IntegerField(null=True) 55 | dreb_rank = IntegerField(null=True) 56 | reb_rank = IntegerField(null=True) 57 | ast_rank = IntegerField(null=True) 58 | tov_rank = IntegerField(null=True) 59 | stl_rank = IntegerField(null=True) 60 | blk_rank = IntegerField(null=True) 61 | blka_rank = IntegerField(null=True) 62 | pf_rank = IntegerField(null=True) 63 | pfd_rank = IntegerField(null=True) 64 | pts_rank = IntegerField(null=True) 65 | plus_minus_rank = IntegerField(null=True) 66 | nba_fantasy_pts_rank = IntegerField(null=True) 67 | dd2_rank = IntegerField(null=True) 68 | td3_rank = IntegerField(null=True) 69 | cfid = IntegerField(null=True) 70 | cfparams = CharField(null=True) 71 | 72 | class Meta: 73 | db_table = 'player_general_traditional_totals' -------------------------------------------------------------------------------- /stats/models/TeamGameLogs.py: -------------------------------------------------------------------------------- 1 | from peewee import * 2 | from models import BaseModel 3 | 4 | class TeamGameLogs(BaseModel): 5 | season_id = CharField(null = True) 6 | team_id = IntegerField(null = True) 7 | team_abbreviation = CharField(null = True) 8 | team_name = CharField(null = True) 9 | game_id = CharField(null = True) 10 | game_date = CharField(null = True) 11 | matchup = CharField(null = True) 12 | wl = CharField(null = True) 13 | min = FloatField(null = True) 14 | fgm = FloatField(null = True) 15 | fga = FloatField(null = True) 16 | fg_pct = FloatField(null = True) 17 | fg3m = FloatField(null = True) 18 | fg3a = FloatField(null = True) 19 | fg3_pct = FloatField(null = True) 20 | ftm = FloatField(null = True) 21 | fta = FloatField(null = True) 22 | ft_pct = FloatField(null = True) 23 | oreb = FloatField(null = True) 24 | dreb = FloatField(null = True) 25 | reb = FloatField(null = True) 26 | ast = FloatField(null = True) 27 | stl = FloatField(null = True) 28 | blk = FloatField(null = True) 29 | tov = FloatField(null = True) 30 | pf = FloatField(null = True) 31 | pts = FloatField(null = True) 32 | plus_minus = FloatField(null = True) 33 | video_available = IntegerField(null = True) 34 | 35 | class Meta: 36 | db_table = 'team_game_logs' -------------------------------------------------------------------------------- /stats/models/TeamGeneralTraditional.py: -------------------------------------------------------------------------------- 1 | from peewee import * 2 | from models import BaseModel 3 | 4 | class TeamGeneralTraditional(BaseModel): 5 | season_id = CharField(null = True) 6 | team_id = IntegerField(null = True) 7 | team_name = CharField(null = True) 8 | gp = IntegerField(null = True) 9 | w = IntegerField(null = True) 10 | l = IntegerField(null = True) 11 | w_pct = FloatField(null = True) 12 | min = FloatField(null = True) 13 | fgm = FloatField(null = True) 14 | fga = FloatField(null = True) 15 | fg_pct = FloatField(null = True) 16 | fg3m = FloatField(null = True) 17 | fg3a = FloatField(null = True) 18 | fg3_pct = FloatField(null = True) 19 | ftm = FloatField(null = True) 20 | fta = FloatField(null = True) 21 | ft_pct = FloatField(null = True) 22 | oreb = FloatField(null = True) 23 | dreb = FloatField(null = True) 24 | reb = FloatField(null = True) 25 | ast = FloatField(null = True) 26 | tov = FloatField(null = True) 27 | stl = FloatField(null = True) 28 | blk = FloatField(null = True) 29 | blka = FloatField(null = True) 30 | pf = FloatField(null = True) 31 | pfd = FloatField(null = True) 32 | pts = FloatField(null = True) 33 | plus_minus = FloatField(null = True) 34 | gp_rank = IntegerField(null = True) 35 | w_rank = IntegerField(null = True) 36 | l_rank = IntegerField(null = True) 37 | w_pct_rank = IntegerField(null = True) 38 | min_rank = IntegerField(null = True) 39 | fgm_rank = IntegerField(null = True) 40 | fga_rank = IntegerField(null = True) 41 | fg_pct_rank = IntegerField(null = True) 42 | fg3m_rank = IntegerField(null = True) 43 | fg3a_rank = IntegerField(null = True) 44 | fg3_pct_rank = IntegerField(null = True) 45 | ftm_rank = IntegerField(null = True) 46 | fta_rank = IntegerField(null = True) 47 | ft_pct_rank = IntegerField(null = True) 48 | oreb_rank = IntegerField(null = True) 49 | dreb_rank = IntegerField(null = True) 50 | reb_rank = IntegerField(null = True) 51 | ast_rank = IntegerField(null = True) 52 | tov_rank = IntegerField(null = True) 53 | stl_rank = IntegerField(null = True) 54 | blk_rank = IntegerField(null = True) 55 | blka_rank = IntegerField(null = True) 56 | pf_rank = IntegerField(null = True) 57 | pfd_rank = IntegerField(null = True) 58 | pts_rank = IntegerField(null = True) 59 | plus_minus_rank = IntegerField(null = True) 60 | cfid = IntegerField(null = True) 61 | cfparams = CharField(null = True) 62 | 63 | class Meta: 64 | db_table = 'team_general_traditional' -------------------------------------------------------------------------------- /stats/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .BaseModel import BaseModel 2 | 3 | # Season Totals Tables 4 | from .PlayerGeneralTraditionalTotals import PlayerGeneralTraditionalTotals 5 | from .PlayerGeneralAdvancedTotals import PlayerGeneralAdvancedTotals 6 | 7 | # Team Tables 8 | from .TeamGeneralTraditional import TeamGeneralTraditional 9 | from .TeamGameLogs import TeamGameLogs 10 | 11 | # Misc Tables 12 | from .PlayerBios import PlayerBios 13 | from .PlayerGameLogs import PlayerGameLogs -------------------------------------------------------------------------------- /stats/player_bios.py: -------------------------------------------------------------------------------- 1 | # player_bios.py - scraps data from stats.nba.com and inserts into player_bios table within MySQL nba stats database 2 | import requests 3 | 4 | from settings import Settings 5 | from models import PlayerBios 6 | 7 | settings = Settings() 8 | settings.db.create_tables([PlayerBios], safe=True) 9 | 10 | headers = { 11 | 'Connection': 'keep-alive', 12 | 'Accept': 'application/json, text/plain, */*', 13 | 'x-nba-stats-token': 'true', 14 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36', 15 | 'x-nba-stats-origin': 'stats', 16 | 'Sec-Fetch-Site': 'same-origin', 17 | 'Sec-Fetch-Mode': 'cors', 18 | 'Referer': 'https://stats.nba.com/', 19 | 'Accept-Encoding': 'gzip, deflate, br', 20 | 'Accept-Language': 'en-US,en;q=0.9', 21 | } 22 | 23 | season_list = [ 24 | '1996-97', 25 | '1997-98', 26 | '1998-99', 27 | '1999-00', 28 | '2000-01', 29 | '2001-02', 30 | '2002-03', 31 | '2003-04', 32 | '2004-05', 33 | '2005-06', 34 | '2006-07', 35 | '2007-08', 36 | '2008-09', 37 | '2009-10', 38 | '2010-11', 39 | '2011-12', 40 | '2012-13', 41 | '2013-14', 42 | '2014-15', 43 | '2015-16', 44 | '2016-17', 45 | '2017-18', 46 | '2018-19', 47 | '2019-20' 48 | ] 49 | 50 | #per_mode = 'Per100Possessions' 51 | per_mode = 'Totals' 52 | #per_mode = 'Per36' 53 | #per_mode = 'PerGame' 54 | 55 | # for loop to loop over seasons 56 | for season_id in season_list: 57 | #player_info_url = 'http://stats.nba.com/stats/leaguedashplayerbiostats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season=' + seasonid + '&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight=' 58 | player_info_url = 'http://stats.nba.com/stats/leaguedashplayerbiostats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode={}&Period=0&PlayerExperience=&PlayerPosition=&Season={}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='.format(per_mode, season_id) 59 | # json response 60 | response = requests.get(url=player_info_url, headers=headers).json() 61 | # pulling just the data we want 62 | player_info = response['resultSets'][0]['rowSet'] 63 | # looping over data to insert into table 64 | for row in player_info: 65 | player = PlayerBios( 66 | season_id=season_id, # this is key, need this to join and sort by seasons 67 | player_id=row[0], 68 | player_name=row[1], 69 | team_id=row[2], 70 | team_abbreviation=row[3], 71 | age=row[4], 72 | player_height=row[5], 73 | player_height_inches=row[6], 74 | player_weight=row[7], 75 | college=row[8], 76 | country=row[9], 77 | draft_year=row[10], 78 | draft_round=row[11], 79 | draft_number=row[12], 80 | gp=row[13], 81 | pts=row[14], 82 | reb=row[15], 83 | ast=row[16], 84 | net_rating=row[17], 85 | oreb_pct=row[18], 86 | dreb_pct=row[19], 87 | usg_pct=row[20], 88 | ts_pct=row[21], 89 | ast_pct=row[22] 90 | ) 91 | 92 | player.save() 93 | print("Done with another season.") 94 | 95 | print ("Done inserting player bios data to the database!") -------------------------------------------------------------------------------- /stats/player_game_logs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Player Game Logs" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import requests\n", 17 | "from settings import Settings\n", 18 | "from models import PlayerGameLogs" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "settings = Settings()\n", 28 | "settings.db.create_tables([PlayerGameLogs], safe=True)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "headers = {\n", 38 | " 'Connection': 'keep-alive',\n", 39 | " 'Accept': 'application/json, text/plain, */*',\n", 40 | " 'x-nba-stats-token': 'true',\n", 41 | " 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36',\n", 42 | " 'x-nba-stats-origin': 'stats',\n", 43 | " 'Sec-Fetch-Site': 'same-origin',\n", 44 | " 'Sec-Fetch-Mode': 'cors',\n", 45 | " 'Referer': 'https://stats.nba.com/',\n", 46 | " 'Accept-Encoding': 'gzip, deflate, br',\n", 47 | " 'Accept-Language': 'en-US,en;q=0.9',\n", 48 | "}" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 4, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "season_list = [\n", 58 | " '1996-97',\n", 59 | " '1997-98',\n", 60 | " '1998-99',\n", 61 | " '1999-00',\n", 62 | " '2000-01',\n", 63 | " '2001-02',\n", 64 | " '2002-03',\n", 65 | " '2003-04',\n", 66 | " '2004-05',\n", 67 | " '2005-06',\n", 68 | " '2006-07',\n", 69 | " '2007-08',\n", 70 | " '2008-09',\n", 71 | " '2009-10',\n", 72 | " '2010-11',\n", 73 | " '2011-12',\n", 74 | " '2012-13',\n", 75 | " '2013-14',\n", 76 | " '2014-15',\n", 77 | " '2015-16',\n", 78 | " '2016-17',\n", 79 | " '2017-18',\n", 80 | " '2018-19',\n", 81 | " '2019-20'\n", 82 | "]\n", 83 | "\n", 84 | "type_player = 'P'\n", 85 | "type_team = 'T'" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 5, 91 | "metadata": {}, 92 | "outputs": [ 93 | { 94 | "name": "stdout", 95 | "output_type": "stream", 96 | "text": [ 97 | "Now working on 1996-97 season\n", 98 | "Now working on 1997-98 season\n", 99 | "Now working on 1998-99 season\n", 100 | "Now working on 1999-00 season\n", 101 | "Now working on 2000-01 season\n", 102 | "Now working on 2001-02 season\n", 103 | "Now working on 2002-03 season\n", 104 | "Now working on 2003-04 season\n", 105 | "Now working on 2004-05 season\n", 106 | "Now working on 2005-06 season\n", 107 | "Now working on 2006-07 season\n", 108 | "Now working on 2007-08 season\n", 109 | "Now working on 2008-09 season\n", 110 | "Now working on 2009-10 season\n", 111 | "Now working on 2010-11 season\n", 112 | "Now working on 2011-12 season\n", 113 | "Now working on 2012-13 season\n", 114 | "Now working on 2013-14 season\n", 115 | "Now working on 2014-15 season\n", 116 | "Now working on 2015-16 season\n", 117 | "Now working on 2016-17 season\n", 118 | "Now working on 2017-18 season\n", 119 | "Now working on 2018-19 season\n", 120 | "Now working on 2019-20 season\n", 121 | "Done inserting player bios data to the database!\n" 122 | ] 123 | } 124 | ], 125 | "source": [ 126 | "# for loop to loop over seasons\n", 127 | "for season_id in season_list:\n", 128 | " print(\"Now working on \"+season_id+ \" season\")\n", 129 | " player_info_url = 'https://stats.nba.com/stats/leaguegamelog?Counter=1000&DateFrom=&DateTo=&Direction=DESC&LeagueID=00&PlayerOrTeam='+type_player+'&Season='+season_id+'&SeasonType=Regular+Season&Sorter=DATE'\n", 130 | " # json response\n", 131 | " response = requests.get(url=player_info_url, headers=headers).json()\n", 132 | " # pulling just the data we want\n", 133 | " player_info = response['resultSets'][0]['rowSet']\n", 134 | " # looping over data to insert into table\n", 135 | " for row in player_info:\n", 136 | " player = PlayerGameLogs(\n", 137 | " season_id=season_id, # this is key, need this to join and sort by seasons\n", 138 | " player_id=row[1],\n", 139 | " player_name=row[2],\n", 140 | " team_id=row[3],\n", 141 | " team_abbreviation=row[4],\n", 142 | " team_name=row[5],\n", 143 | " game_id=row[6],\n", 144 | " game_date=row[7], \n", 145 | " matchup=row[8],\n", 146 | " wl=row[9],\n", 147 | " min=row[10],\n", 148 | " fgm=row[11],\n", 149 | " fga=row[12],\n", 150 | " fg_pct=row[13],\n", 151 | " fg3m=row[14],\n", 152 | " fg3a=row[15],\n", 153 | " fg3_pct=row[16],\n", 154 | " ftm=row[17],\n", 155 | " fta=row[18],\n", 156 | " ft_pct=row[19],\n", 157 | " oreb=row[20],\n", 158 | " dreb=row[21],\n", 159 | " reb=row[22],\n", 160 | " ast=row[23],\n", 161 | " stl=row[24],\n", 162 | " blk=row[25],\n", 163 | " tov=row[26],\n", 164 | " pf=row[27],\n", 165 | " pts=row[28],\n", 166 | " plus_minus=row[29],\n", 167 | " video_available=row[30]\n", 168 | " )\n", 169 | " player.save()\n", 170 | "\n", 171 | "print (\"Done inserting player bios data to the database!\")" 172 | ] 173 | } 174 | ], 175 | "metadata": { 176 | "kernelspec": { 177 | "display_name": "Python 3", 178 | "language": "python", 179 | "name": "python3" 180 | }, 181 | "language_info": { 182 | "codemirror_mode": { 183 | "name": "ipython", 184 | "version": 3 185 | }, 186 | "file_extension": ".py", 187 | "mimetype": "text/x-python", 188 | "name": "python", 189 | "nbconvert_exporter": "python", 190 | "pygments_lexer": "ipython3", 191 | "version": "3.6.6" 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 2 196 | } 197 | -------------------------------------------------------------------------------- /stats/player_game_logs.py: -------------------------------------------------------------------------------- 1 | # player_bios.py - scraps data from stats.nba.com and inserts into player_bios table within MySQL nba stats database 2 | import requests 3 | 4 | from settings import Settings 5 | from models import PlayerGameLogs 6 | 7 | settings = Settings() 8 | settings.db.create_tables([PlayerGameLogs], safe=True) 9 | 10 | headers = { 11 | 'Connection': 'keep-alive', 12 | 'Accept': 'application/json, text/plain, */*', 13 | 'x-nba-stats-token': 'true', 14 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36', 15 | 'x-nba-stats-origin': 'stats', 16 | 'Sec-Fetch-Site': 'same-origin', 17 | 'Sec-Fetch-Mode': 'cors', 18 | 'Referer': 'https://stats.nba.com/', 19 | 'Accept-Encoding': 'gzip, deflate, br', 20 | 'Accept-Language': 'en-US,en;q=0.9', 21 | } 22 | 23 | season_list = [ 24 | '1996-97', 25 | '1997-98', 26 | '1998-99', 27 | '1999-00', 28 | '2000-01', 29 | '2001-02', 30 | '2002-03', 31 | '2003-04', 32 | '2004-05', 33 | '2005-06', 34 | '2006-07', 35 | '2007-08', 36 | '2008-09', 37 | '2009-10', 38 | '2010-11', 39 | '2011-12', 40 | '2012-13', 41 | '2013-14', 42 | '2014-15', 43 | '2015-16', 44 | '2016-17', 45 | '2017-18', 46 | '2018-19', 47 | '2019-20' 48 | ] 49 | 50 | #per_mode = 'Per100Possessions' 51 | per_mode = 'Totals' 52 | #per_mode = 'Per36' 53 | #per_mode = 'PerGame' 54 | 55 | # for loop to loop over seasons 56 | for season_id in season_list: 57 | print("Now working on "+season_id+ " season") 58 | player_info_url = 'https://stats.nba.com/stats/leaguegamelog?Counter=1000&DateFrom=&DateTo=&Direction=DESC&LeagueID=00&PlayerOrTeam='+type_player+'&Season='+season_id+'&SeasonType=Regular+Season&Sorter=DATE' 59 | # json response 60 | response = requests.get(url=player_info_url, headers=headers).json() 61 | # pulling just the data we want 62 | player_info = response['resultSets'][0]['rowSet'] 63 | # looping over data to insert into table 64 | for row in player_info: 65 | player = PlayerGameLogs( 66 | season_id=season_id, # this is key, need this to join and sort by seasons 67 | player_id=row[1], 68 | player_name=row[2], 69 | team_id=row[3], 70 | team_abbreviation=row[4], 71 | team_name=row[5], 72 | game_id=row[6], 73 | game_date=row[7], 74 | matchup=row[8], 75 | wl=row[9], 76 | min=row[10], 77 | fgm=row[11], 78 | fga=row[12], 79 | fg_pct=row[13], 80 | fg3m=row[14], 81 | fg3a=row[15], 82 | fg3_pct=row[16], 83 | ftm=row[17], 84 | fta=row[18], 85 | ft_pct=row[19], 86 | oreb=row[20], 87 | dreb=row[21], 88 | reb=row[22], 89 | ast=row[23], 90 | stl=row[24], 91 | blk=row[25], 92 | tov=row[26], 93 | pf=row[27], 94 | pts=row[28], 95 | plus_minus=row[29], 96 | video_available=row[30] 97 | ) 98 | player.save() 99 | 100 | print ("Done inserting player bios data to the database!") -------------------------------------------------------------------------------- /stats/player_general_traditional.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Player General Traditional Stats Walk Through" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "First things first we need to import the modules we need. This includes the famous requests model and our settings and class model." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import requests\n", 24 | "from settings import Settings\n", 25 | "from models import PlayerGeneralTraditionalTotals" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "Next, let's initialize our database and create a table based on our BaseModel." 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "settings = Settings()\n", 42 | "settings.db.create_tables([PlayerGeneralTraditionalTotals], safe=True)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "To start, let's pick a specific season and a specific per mode, totals since we can always get per game data from the totals" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "season_id = '2018-19'\n", 59 | "per_mode = 'Totals'" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "Let's dynamically insert our season_id and per_mode variables into the url. Remember, these are the parameters we saw earlier. We will get specific data based on the parameters we pass it. If we changed the season we would get different data from the NBA API endpoint." 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "player_info_url = 'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode='+per_mode+'&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season='+season_id+'&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight='" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "This is the magic right here. This is what lets us get around Adam Silver and his cronies. For whatever reason, if you add this to the requests headers, you are smooth sailing as of August 2020." 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "Please note I got this from: https://github.com/rd11490/NBA_Tutorials/tree/master/finding_endpoints" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 5, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "headers = {\n", 99 | " 'Connection': 'keep-alive',\n", 100 | " 'Accept': 'application/json, text/plain, */*',\n", 101 | " 'x-nba-stats-token': 'true',\n", 102 | " 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36',\n", 103 | " 'x-nba-stats-origin': 'stats',\n", 104 | " 'Sec-Fetch-Site': 'same-origin',\n", 105 | " 'Sec-Fetch-Mode': 'cors',\n", 106 | " 'Referer': 'https://stats.nba.com/',\n", 107 | " 'Accept-Encoding': 'gzip, deflate, br',\n", 108 | " 'Accept-Language': 'en-US,en;q=0.9',\n", 109 | "}" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "This is the response we get back. We will save it into a response variable so we can access the specific json data." 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 6, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "response = requests.get(url=player_info_url, headers=headers).json()" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "If you remember the requests we saw in the preview tab, we wanted the data which was nested a few layers deep. We need to go to results set, take the first set of data, then that specific rowSet." 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 7, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "player_info = response['resultSets'][0]['rowSet']" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "We will save it into a new variable that allows us to for loop over it. Let's match each column with the appropriate row. This is time consuming but just copy paste this 'ish." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "player_info" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 8, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "Done inserting player general traditional season total data to the database!\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "for row in player_info:\n", 175 | " player = PlayerGeneralTraditionalTotals(\n", 176 | " season_id=season_id, # this is key, need this to join and sort by seasons\n", 177 | " player_id=row[0],\n", 178 | " player_name=row[1],\n", 179 | " team_id=row[2],\n", 180 | " team_abbreviation=row[3],\n", 181 | " age=row[4],\n", 182 | " gp=row[5],\n", 183 | " w=row[6],\n", 184 | " l=row[7],\n", 185 | " w_pct=row[8],\n", 186 | " min=row[9],\n", 187 | " fgm=row[10],\n", 188 | " fga=row[11],\n", 189 | " fg_pct=row[12],\n", 190 | " fg3m=row[13],\n", 191 | " fg3a=row[14],\n", 192 | " fg3_pct=row[15],\n", 193 | " ftm=row[16],\n", 194 | " fta=row[17],\n", 195 | " ft_pct=row[18],\n", 196 | " oreb=row[19],\n", 197 | " dreb=row[20],\n", 198 | " reb=row[21],\n", 199 | " ast=row[22],\n", 200 | " tov=row[23],\n", 201 | " stl=row[24],\n", 202 | " blk=row[25],\n", 203 | " blka=row[26],\n", 204 | " pf=row[27],\n", 205 | " pfd=row[28],\n", 206 | " pts=row[29],\n", 207 | " plus_minus=row[30],\n", 208 | " nba_fantasy_pts=row[31],\n", 209 | " dd2=row[32],\n", 210 | " td3=row[33],\n", 211 | " gp_rank=row[34],\n", 212 | " w_rank=row[35],\n", 213 | " l_rank=row[36],\n", 214 | " w_pct_rank=row[37],\n", 215 | " min_rank=row[38],\n", 216 | " fgm_rank=row[39],\n", 217 | " fga_rank=row[40],\n", 218 | " fg_pct_rank=row[41],\n", 219 | " fg3m_rank=row[42],\n", 220 | " fg3a_rank=row[43],\n", 221 | " fg3_pct_rank=row[44],\n", 222 | " ftm_rank=row[45],\n", 223 | " fta_rank=row[46],\n", 224 | " ft_pct_rank=row[47],\n", 225 | " oreb_rank=row[48],\n", 226 | " dreb_rank=row[49],\n", 227 | " reb_rank=row[50],\n", 228 | " ast_rank=row[51],\n", 229 | " tov_rank=row[52],\n", 230 | " stl_rank=row[53],\n", 231 | " blk_rank=row[54],\n", 232 | " blka_rank=row[55],\n", 233 | " pf_rank=row[56],\n", 234 | " pfd_rank=row[57],\n", 235 | " pts_rank=row[58],\n", 236 | " plus_minus_rank=row[59],\n", 237 | " nba_fantasy_pts_rank=row[60],\n", 238 | " dd2_rank=row[61],\n", 239 | " td3_rank=row[62],\n", 240 | " cfid=row[63],\n", 241 | " cfparams=row[64])\n", 242 | "\n", 243 | " player.save()\n", 244 | " \n", 245 | "print (\"Done inserting player general traditional season total data to the database!\")" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "Let's run it and voila!" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "## Looping Over Each Season" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "We'll next squash it into a script and create a new list of season ids to loop over. After that we indent the script once and now we are cooking for each season." 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "season_list = [\n", 276 | " '1996-97',\n", 277 | " '1997-98',\n", 278 | " '1998-99',\n", 279 | " '1999-00',\n", 280 | " '2000-01',\n", 281 | " '2001-02',\n", 282 | " '2002-03',\n", 283 | " '2003-04',\n", 284 | " '2004-05',\n", 285 | " '2005-06',\n", 286 | " '2006-07',\n", 287 | " '2007-08',\n", 288 | " '2008-09',\n", 289 | " '2009-10',\n", 290 | " '2010-11',\n", 291 | " '2011-12',\n", 292 | " '2012-13',\n", 293 | " '2013-14',\n", 294 | " '2014-15',\n", 295 | " '2015-16',\n", 296 | " '2016-17',\n", 297 | " '2017-18',\n", 298 | " '2018-19',\n", 299 | " '2019-20'\n", 300 | "]\n", 301 | "#per_mode = 'Per100Possessions'\n", 302 | "per_mode = 'Totals'\n", 303 | "#per_mode = 'Per36'\n", 304 | "#per_mode = 'PerGame'" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [ 311 | "Note the print statement to keep us updated throughout the script." 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 10, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "# for loop to loop over seasons\n", 321 | "for season_id in season_list:\n", 322 | " print(\"Now working on \"+season_id+ \" season\")\n", 323 | " # nba stats url to scrape\n", 324 | " player_info_url = 'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=' + per_mode +'&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=' + season_id + '&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight='\n", 325 | " # json response\n", 326 | " response = requests.get(url=player_info_url, headers=headers).json()\n", 327 | " # pulling just the data we want\n", 328 | " player_info = response['resultSets'][0]['rowSet']\n", 329 | " # looping over data to insert into table\n", 330 | " for row in player_info:\n", 331 | " player = PlayerGeneralTraditionalTotals(\n", 332 | " season_id=season_id, # this is key, need this to join and sort by seasons\n", 333 | " player_id=row[0],\n", 334 | " player_name=row[1],\n", 335 | " team_id=row[2],\n", 336 | " team_abbreviation=row[3],\n", 337 | " age=row[4],\n", 338 | " gp=row[5],\n", 339 | " w=row[6],\n", 340 | " l=row[7],\n", 341 | " w_pct=row[8],\n", 342 | " min=row[9],\n", 343 | " fgm=row[10],\n", 344 | " fga=row[11],\n", 345 | " fg_pct=row[12],\n", 346 | " fg3m=row[13],\n", 347 | " fg3a=row[14],\n", 348 | " fg3_pct=row[15],\n", 349 | " ftm=row[16],\n", 350 | " fta=row[17],\n", 351 | " ft_pct=row[18],\n", 352 | " oreb=row[19],\n", 353 | " dreb=row[20],\n", 354 | " reb=row[21],\n", 355 | " ast=row[22],\n", 356 | " tov=row[23],\n", 357 | " stl=row[24],\n", 358 | " blk=row[25],\n", 359 | " blka=row[26],\n", 360 | " pf=row[27],\n", 361 | " pfd=row[28],\n", 362 | " pts=row[29],\n", 363 | " plus_minus=row[30],\n", 364 | " nba_fantasy_pts=row[31],\n", 365 | " dd2=row[32],\n", 366 | " td3=row[33],\n", 367 | " gp_rank=row[34],\n", 368 | " w_rank=row[35],\n", 369 | " l_rank=row[36],\n", 370 | " w_pct_rank=row[37],\n", 371 | " min_rank=row[38],\n", 372 | " fgm_rank=row[39],\n", 373 | " fga_rank=row[40],\n", 374 | " fg_pct_rank=row[41],\n", 375 | " fg3m_rank=row[42],\n", 376 | " fg3a_rank=row[43],\n", 377 | " fg3_pct_rank=row[44],\n", 378 | " ftm_rank=row[45],\n", 379 | " fta_rank=row[46],\n", 380 | " ft_pct_rank=row[47],\n", 381 | " oreb_rank=row[48],\n", 382 | " dreb_rank=row[49],\n", 383 | " reb_rank=row[50],\n", 384 | " ast_rank=row[51],\n", 385 | " tov_rank=row[52],\n", 386 | " stl_rank=row[53],\n", 387 | " blk_rank=row[54],\n", 388 | " blka_rank=row[55],\n", 389 | " pf_rank=row[56],\n", 390 | " pfd_rank=row[57],\n", 391 | " pts_rank=row[58],\n", 392 | " plus_minus_rank=row[59],\n", 393 | " nba_fantasy_pts_rank=row[60],\n", 394 | " dd2_rank=row[61],\n", 395 | " td3_rank=row[62],\n", 396 | " cfid=row[63],\n", 397 | " cfparams=row[64])\n", 398 | " \n", 399 | " player.save()\n", 400 | " \n", 401 | "print (\"Done inserting player general traditional season total data to the database!\")" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "And we are all set!" 409 | ] 410 | } 411 | ], 412 | "metadata": { 413 | "kernelspec": { 414 | "display_name": "Python 3", 415 | "language": "python", 416 | "name": "python3" 417 | }, 418 | "language_info": { 419 | "codemirror_mode": { 420 | "name": "ipython", 421 | "version": 3 422 | }, 423 | "file_extension": ".py", 424 | "mimetype": "text/x-python", 425 | "name": "python", 426 | "nbconvert_exporter": "python", 427 | "pygments_lexer": "ipython3", 428 | "version": "3.6.6" 429 | } 430 | }, 431 | "nbformat": 4, 432 | "nbformat_minor": 2 433 | } 434 | -------------------------------------------------------------------------------- /stats/player_general_traditional_totals.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | from settings import Settings 4 | from models import PlayerGeneralTraditionalTotals 5 | 6 | settings = Settings() 7 | settings.db.create_tables([PlayerGeneralTraditionalTotals], safe=True) 8 | 9 | headers = { 10 | 'Connection': 'keep-alive', 11 | 'Accept': 'application/json, text/plain, */*', 12 | 'x-nba-stats-token': 'true', 13 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36', 14 | 'x-nba-stats-origin': 'stats', 15 | 'Sec-Fetch-Site': 'same-origin', 16 | 'Sec-Fetch-Mode': 'cors', 17 | 'Referer': 'https://stats.nba.com/', 18 | 'Accept-Encoding': 'gzip, deflate, br', 19 | 'Accept-Language': 'en-US,en;q=0.9', 20 | } 21 | 22 | season_list = [ 23 | '1996-97', 24 | '1997-98', 25 | '1998-99', 26 | '1999-00', 27 | '2000-01', 28 | '2001-02', 29 | '2002-03', 30 | '2003-04', 31 | '2004-05', 32 | '2005-06', 33 | '2006-07', 34 | '2007-08', 35 | '2008-09', 36 | '2009-10', 37 | '2010-11', 38 | '2011-12', 39 | '2012-13', 40 | '2013-14', 41 | '2014-15', 42 | '2015-16', 43 | '2016-17', 44 | '2017-18', 45 | '2018-19', 46 | '2019-20' 47 | ] 48 | 49 | #per_mode = 'Per100Possessions' 50 | per_mode = 'Totals' 51 | #per_mode = 'Per36' 52 | #per_mode = 'PerGame' 53 | 54 | # for loop to loop over seasons 55 | for season_id in season_list: 56 | # nba stats url to scrape 57 | # RUNNING INTO AN ERROR AROUND HERE 58 | # player_info_url = 'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=' + per_mode +'&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=' + season_id + '&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight=' 59 | player_info_url = 'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode={}&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight='.format(per_mode, season_id) 60 | # json response 61 | response = requests.get(url=player_info_url, headers=headers).json() 62 | # pulling just the data we want 63 | player_info = response['resultSets'][0]['rowSet'] 64 | # looping over data to insert into table 65 | for row in player_info: 66 | player = PlayerGeneralTraditionalTotals( 67 | season_id=season_id, # this is key, need this to join and sort by seasons 68 | player_id=row[0], 69 | player_name=row[1], 70 | team_id=row[2], 71 | team_abbreviation=row[3], 72 | age=row[4], 73 | gp=row[5], 74 | w=row[6], 75 | l=row[7], 76 | w_pct=row[8], 77 | min=row[9], 78 | fgm=row[10], 79 | fga=row[11], 80 | fg_pct=row[12], 81 | fg3m=row[13], 82 | fg3a=row[14], 83 | fg3_pct=row[15], 84 | ftm=row[16], 85 | fta=row[17], 86 | ft_pct=row[18], 87 | oreb=row[19], 88 | dreb=row[20], 89 | reb=row[21], 90 | ast=row[22], 91 | tov=row[23], 92 | stl=row[24], 93 | blk=row[25], 94 | blka=row[26], 95 | pf=row[27], 96 | pfd=row[28], 97 | pts=row[29], 98 | plus_minus=row[30], 99 | nba_fantasy_pts=row[31], 100 | dd2=row[32], 101 | td3=row[33], 102 | gp_rank=row[34], 103 | w_rank=row[35], 104 | l_rank=row[36], 105 | w_pct_rank=row[37], 106 | min_rank=row[38], 107 | fgm_rank=row[39], 108 | fga_rank=row[40], 109 | fg_pct_rank=row[41], 110 | fg3m_rank=row[42], 111 | fg3a_rank=row[43], 112 | fg3_pct_rank=row[44], 113 | ftm_rank=row[45], 114 | fta_rank=row[46], 115 | ft_pct_rank=row[47], 116 | oreb_rank=row[48], 117 | dreb_rank=row[49], 118 | reb_rank=row[50], 119 | ast_rank=row[51], 120 | tov_rank=row[52], 121 | stl_rank=row[53], 122 | blk_rank=row[54], 123 | blka_rank=row[55], 124 | pf_rank=row[56], 125 | pfd_rank=row[57], 126 | pts_rank=row[58], 127 | plus_minus_rank=row[59], 128 | nba_fantasy_pts_rank=row[60], 129 | dd2_rank=row[61], 130 | td3_rank=row[62], 131 | cfid=row[63], 132 | cfparams=row[64]) 133 | player.save() 134 | 135 | print ("Done inserting player general traditional season total data to the database!") -------------------------------------------------------------------------------- /stats/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | load_dotenv() 4 | 5 | from peewee import * 6 | 7 | DB_NAME = os.getenv('DB_NAME') 8 | DB_HOST = os.getenv('DB_HOST') 9 | DB_USER = os.getenv('DB_USER') 10 | DB_PASSWORD = os.getenv('DB_PASSWORD') 11 | 12 | class Settings: 13 | def __init__(self): 14 | self.db = MySQLDatabase( 15 | DB_NAME, 16 | host=DB_HOST, 17 | user=DB_USER, 18 | password=DB_PASSWORD, 19 | charset='utf8mb4' 20 | ) 21 | self.user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.82 Safari/537.36" 22 | 23 | 24 | ''' 25 | DB_NAME = 'nba_stats' 26 | DB_HOST = 'localhost' 27 | DB_USER = 'root' 28 | DB_PASSWORD = 'Jman4190m!' 29 | 30 | 31 | db = MySQLDatabase( 32 | DB_NAME, 33 | host=DB_HOST, 34 | user=DB_USER, 35 | password=DB_PASSWORD, 36 | charset='utf8mb4' 37 | )''' --------------------------------------------------------------------------------