├── data_balancer ├── __init__.py └── balancer.py ├── parsing_data ├── __init__.py ├── cell_func.py └── parser.py ├── requirements.txt ├── make_data.py ├── .gitignore ├── preprocess.py ├── model.py └── README.md /data_balancer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parsing_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.3 2 | beautifulsoup4==4.5.3 3 | bs4==0.0.1 4 | Keras==2.0.3 5 | lxml==3.7.3 6 | numpy==1.12.1 7 | packaging==16.8 8 | pandas==0.19.2 9 | protobuf==3.2.0 10 | pyparsing==2.2.0 11 | python-dateutil==2.6.0 12 | pytz==2017.2 13 | PyYAML==3.12 14 | requests==2.13.0 15 | scikit-learn==0.18.1 16 | scipy==0.19.0 17 | six==1.10.0 18 | tensorflow==1.0.0 19 | Theano==0.9.0 20 | -------------------------------------------------------------------------------- /make_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Before use this script, you should make nickname list file. 3 | also, file name is must be "nickname_list.txt" 4 | 5 | format like 6 | [nickname_1] 7 | [nickname_2] 8 | 9 | Ex) 10 | kcr0ng 11 | lol_zzang 12 | 달달한아침햇살 13 | """ 14 | 15 | from parsing_data.parser import GameInfoParser 16 | 17 | 18 | PARSE_DATASET_FILENAME = 'train_data/human_readable.csv' 19 | AFTER_PROCESS_FILENAME = 'train_data/computer_trainable.csv' 20 | 21 | if __name__ == '__main__': 22 | with open('nickname_list.txt', 'r', encoding='utf8') as f: 23 | nickname_list = f.read().splitlines() 24 | 25 | for nickname in nickname_list: 26 | print(f"{nickname} start!") 27 | g = GameInfoParser(nickname, file=PARSE_DATASET_FILENAME) 28 | g.run() 29 | 30 | # Remove duplicate data 31 | from data_balancer.balancer import set_balance 32 | set_balance(PARSE_DATASET_FILENAME) 33 | 34 | from preprocess import process 35 | process(PARSE_DATASET_FILENAME, AFTER_PROCESS_FILENAME) 36 | -------------------------------------------------------------------------------- /data_balancer/balancer.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | 5 | 6 | # Pasta! 7 | 8 | def set_balance(input_file): 9 | csv_obj = pd.read_csv(input_file) 10 | 11 | for i in csv_obj.index: 12 | find_value = list(csv_obj.ix[i][['game_id', 'summoner_name']].values) 13 | 14 | if [list(tmp_data) for tmp_data in csv_obj[['game_id', 'summoner_name']].values].count(find_value) > 1: 15 | csv_obj.ix[i] = None 16 | 17 | csv_obj.to_csv(input_file, encoding='utf8', index=False) 18 | 19 | # Remove None Row 20 | 21 | output_f = open('tmp.csv', 'w', encoding='utf8') 22 | 23 | with open(input_file, encoding='utf8') as f: 24 | output_f.write(f.readline()) # Skip check CSV header 25 | lines = f.readlines() 26 | for line in lines: 27 | if 'game_id' in line or ',,,,' in line: # DUP HEADER or None DATA 28 | continue 29 | else: 30 | output_f.write(line) 31 | 32 | os.remove(input_file) 33 | os.rename('tmp.csv', input_file) 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # OS X files 2 | .DS_Store 3 | 4 | # IDE files 5 | .idea/ 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | env/ 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *,cover 52 | .hypothesis/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # IPython Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # dotenv 85 | .env 86 | 87 | # virtualenv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | -------------------------------------------------------------------------------- /parsing_data/cell_func.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from bs4 import BeautifulSoup 4 | 5 | 6 | def ChampionImage_parser(data): 7 | champion = data.find('div', {'class': 'Image'}).text 8 | level = int(data.find('div', {'class': 'Level'}).text) 9 | 10 | return ['champion_name', 'level'], [champion, level] 11 | 12 | 13 | def SummonerSpell_parser(data): 14 | spell_list = [BeautifulSoup(img_tag['title'], 'lxml').find('b').text for img_tag in data.find_all('img')] 15 | 16 | return ['spell_1', 'spell_2'], spell_list 17 | 18 | 19 | def KeystoneMastery_parser(data): 20 | try: 21 | mastery = BeautifulSoup(data.find('img')['title'], 'lxml').find('b').text 22 | except KeyError: 23 | # No mastery 24 | mastery = 'noMastery' 25 | 26 | return ['mastery'], [mastery] 27 | 28 | 29 | def SummonerName_parser(data): 30 | summoner_name = data.find('a').text 31 | 32 | return ['summoner_name'], [summoner_name] 33 | 34 | 35 | def Items_parser(data): 36 | item_list = [] 37 | 38 | for item in data.find_all('div', {'class': 'Item'}): 39 | try: 40 | item_name = item.find('img')['alt'] 41 | except TypeError: 42 | item_name = 'noItem' 43 | item_list.append(item_name) 44 | 45 | return ['item%d' % i for i in range(len(item_list))], item_list 46 | 47 | 48 | def KDA_parser(data): 49 | name_list = [] 50 | value_list = [] 51 | 52 | for info in data.find_all('span'): 53 | name_list.append(info['class'][0]) 54 | try: 55 | data = int(info.text) 56 | except ValueError: 57 | data = info.text 58 | 59 | value_list.append(data) 60 | 61 | return name_list, value_list 62 | 63 | 64 | def Damage_parser(data): 65 | damage = int(data.find('div', {'class': 'ChampionDamage'}).text.replace(',', '')) 66 | 67 | return ['ChampionDamage'], [damage] 68 | 69 | 70 | def Ward_parser(data): 71 | bought_pink_ward = int(data.find('span', {'class': 'SightWard'}).text) 72 | installed_ward, removed_ward = (span_tag.text for span_tag in data.find('div', {'class': 'Stats'}).find_all('span')) 73 | 74 | return ['bought_pink_ward', 'installed_ward', 'removed_ward'], \ 75 | [bought_pink_ward, int(installed_ward), int(removed_ward)] 76 | 77 | 78 | def CS_parser(data): 79 | total_cs = int(data.find('div', {'class': 'CS'}).text) 80 | cs_per_minute = int(re.findall('\d+', data.find('div', {'class': 'CSPerMinute'}).text)[0]) 81 | 82 | return ['total_cs', 'cs_per_minute'], [total_cs, cs_per_minute] 83 | 84 | 85 | def Gold_parser(data): 86 | gold_str = data.text 87 | 88 | assert 'k' in gold_str 89 | gold = int(re.findall('\d+', gold_str)[0]) * 1000 90 | 91 | return ['gold'], [gold] 92 | 93 | 94 | def Tier_parser(data): 95 | return ['tier'], [data.text.strip()] 96 | -------------------------------------------------------------------------------- /preprocess.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from random import choice 3 | 4 | import os 5 | 6 | from collections import defaultdict 7 | 8 | import pandas as pd 9 | 10 | # Pre-processing with features 11 | import re 12 | 13 | TEAM_LIST = ['red_', 'blue_'] 14 | 15 | 16 | def process(csv_name, output_name): 17 | train_data = pd.read_csv(csv_name, delimiter=',') 18 | 19 | changed_data_dict = defaultdict(dict) 20 | 21 | # CKRate (percentage) to integer 22 | train_data['CKRate'] = [int(re.findall('\d+', rate)[0]) for rate in train_data['CKRate']] 23 | 24 | # Make team data using each user's data 25 | 26 | def save_new_features_for_team(team_data): 27 | def transform_tier_to_integer(tier): 28 | if 'Level' in tier: 29 | return int(tier.split()[1]) 30 | elif 'Master' in tier: 31 | return 25 + 30 32 | elif 'Challenger' in tier: 33 | return 30 + 30 34 | elif 'Unranked' in tier: 35 | return 30 36 | else: 37 | tier_dict = { 38 | 'Bronze': 0, 39 | 'Silver': 5, 40 | 'Gold': 10, 41 | 'Platinum': 15, 42 | 'Diamond': 20, 43 | } 44 | name, level = tier.split() 45 | 46 | return (tier_dict[name] + (5 - int(level))) + 30 47 | 48 | game_dict = changed_data_dict[team_data['game_id'].values[0]] 49 | 50 | if not game_dict: 51 | team_type = choice(TEAM_LIST) 52 | else: 53 | team_type = { 54 | 'blue': 'red_', 55 | 'red': 'blue_' 56 | }[next(iter(game_dict.keys())).split('_')[0]] 57 | 58 | game_dict[team_type + 'team_level'] = team_data['level'].sum() 59 | game_dict[team_type + 'team_kill'] = team_data['Kill'].sum() 60 | game_dict[team_type + 'team_death'] = team_data['Death'].sum() 61 | game_dict[team_type + 'team_assist'] = team_data['Assist'].sum() 62 | game_dict[team_type + 'team_ckrate'] = team_data['CKRate'].sum() 63 | game_dict[team_type + 'team_damage'] = team_data['ChampionDamage'].sum() 64 | game_dict[team_type + 'team_bought_pink_ward'] = team_data['bought_pink_ward'].sum() 65 | game_dict[team_type + 'team_installed_ward'] = team_data['installed_ward'].sum() 66 | game_dict[team_type + 'team_removed_ward'] = team_data['removed_ward'].sum() 67 | game_dict[team_type + 'team_cs'] = team_data['total_cs'].sum() 68 | game_dict[team_type + 'team_cs_per_minute'] = team_data['cs_per_minute'].mean() 69 | game_dict[team_type + 'team_gold'] = team_data['gold'].sum() 70 | game_dict[team_type + 'team_tier'] = team_data['tier'].apply(transform_tier_to_integer).sum() 71 | 72 | if (team_data['result'] == 1).all(): # if this team win 73 | game_dict['result'] = team_type 74 | 75 | # Grouping by each games 76 | train_data.groupby(['game_id', 'result']).apply(save_new_features_for_team) 77 | 78 | writer = csv.DictWriter(open(output_name, 'w', encoding='utf8'), 79 | next(iter(changed_data_dict.values())).keys()) 80 | writer.writeheader() 81 | writer.writerows(changed_data_dict.values()) 82 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from keras.layers import Dense, Activation 4 | from keras.models import Sequential 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.preprocessing import LabelEncoder, Imputer, StandardScaler 7 | 8 | train_data = pd.read_csv('train_data/data.csv', delimiter=',') 9 | 10 | 11 | x = train_data[train_data.keys()[:-1]].values 12 | y = train_data['result'].values 13 | 14 | result_encoder = LabelEncoder() 15 | result_encoder.fit(y) 16 | y = result_encoder.transform(y) 17 | 18 | x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) 19 | 20 | # Data normalization 21 | imputer = Imputer(strategy='mean') 22 | imputer.fit(x_train) 23 | X_train = imputer.transform(x_train) 24 | X_test = imputer.transform(x_test) 25 | scaler = StandardScaler() 26 | scaler.fit(X_train) 27 | X_train = scaler.transform(X_train) 28 | X_test = scaler.transform(X_test) 29 | 30 | model = Sequential([ 31 | Dense(26, input_dim=x.shape[1]), 32 | Activation('relu'), 33 | Dense(1), 34 | Activation('sigmoid') 35 | ]) 36 | 37 | model.compile( 38 | optimizer='RMSprop', 39 | loss='binary_crossentropy', 40 | metrics=['acc'] 41 | ) 42 | 43 | 44 | model.fit(X_train, y_train, batch_size=5, nb_epoch=20, verbose=1) 45 | 46 | """ 47 | ['champion_name', 'level', 'spell_1', 'spell_2', 'mastery', 48 | 'summoner_name', 'item0', 'item1', 'item2', 'item3', 'item4', 'item5', 49 | 'item6', 'KDARatio', 'Kill', 'Death', 'Assist', 'CKRate', 50 | 'ChampionDamage', 'bought_pink_ward', 'installed_ward', 'removed_ward', 51 | 'total_cs', 'cs_per_minute', 'gold', 'tier', 'game_id', 'result'] 52 | 53 | """ 54 | 55 | """ 56 | # Make integer Label for string data 57 | 58 | def get_columns_list(column_list): 59 | total = [] 60 | for column in column_list: 61 | total += list(train_data[column].values) 62 | return total 63 | 64 | 65 | def transform_train_data(encoder, column_data): 66 | if isinstance(column_data, list): 67 | for column_name in column_data: 68 | train_data[column_name] = encoder.transform(train_data[column_name]) 69 | elif isinstance(column_data, str): 70 | train_data[column_data] = encoder.transform(train_data[column_data]) 71 | else: 72 | raise TypeError("Wrong Type instance!!!") 73 | 74 | champion_encoder = LabelEncoder() 75 | champion_encoder.fit(train_data['champion_name']) 76 | # Replace string data to integer 77 | transform_train_data(champion_encoder, 'champion_name') 78 | 79 | spell_encoder = LabelEncoder() 80 | spell_encoder.fit(get_columns_list(['spell_1', 'spell_2'])) 81 | # Replace 82 | transform_train_data(spell_encoder, ['spell_1', 'spell_2']) 83 | 84 | mastery_encoder = LabelEncoder() 85 | mastery_encoder.fit(train_data['mastery']) 86 | # Replace 87 | transform_train_data(mastery_encoder, 'mastery') 88 | 89 | item_encoder = LabelEncoder() 90 | item_encoder.fit(get_columns_list(['item0', 'item1', 'item2', 'item3', 'item4', 'item5'])) 91 | # Replace 92 | transform_train_data(item_encoder, ['item0', 'item1', 'item2', 'item3', 'item4', 'item5']) 93 | 94 | trinket_encoder = LabelEncoder() 95 | trinket_encoder.fit(get_columns_list(['item6'])) 96 | # Replace 97 | transform_train_data(trinket_encoder, 'item6') 98 | 99 | tier_encoder = LabelEncoder() 100 | tier_encoder.fit(get_columns_list(['tier'])) 101 | # Replace 102 | transform_train_data(tier_encoder, 'tier') 103 | """ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # League-of-Fortuneteller 2 | 기계학습을 이용해 게임(리그 오브 레전드) 결과를 예측해보자 3 | 4 | ## Feature list 5 | * blue_team_level 6 | * blue_team_kill 7 | * blue_team_death 8 | * blue_team_assist 9 | * blue_team_ckrate (킬 기여 수) 10 | * blue_team_damage 11 | * blue_team_bought_pink_ward 12 | * blue_team_installed_ward 13 | * blue_team_removed_ward 14 | * blue_team_cs 15 | * blue_team_cs_per_minute 16 | * blue_team_gold 17 | * blue_team_tier (Bronze, Silver 등 단계에 따라 숫자로 환산한 데이터) 18 | * red_team_level 19 | * red_team_kill 20 | * red_team_death 21 | * red_team_assist 22 | * red_team_ckrate 23 | * red_team_damage 24 | * red_team_bought_pink_ward 25 | * red_team_installed_ward 26 | * red_team_removed_ward 27 | * red_team_cs 28 | * red_team_cs_per_minute 29 | * red_team_gold 30 | * red_team_tier 31 | 32 | ## Model 33 | * Dense(same size with input) with Activation ReLu 34 | * Dense_1 with Activation Sigmoid 35 | 36 | ## Result 37 | **Accuracy: 95.37%** 38 | ```text 39 | 5/2289 [..............................] - ETA: 0s - loss: 0.0028 - acc: 1.0000 40 | 150/2289 [>.............................] - ETA: 0s - loss: 0.0845 - acc: 0.9667 41 | 305/2289 [==>...........................] - ETA: 0s - loss: 0.1004 - acc: 0.9639 42 | 425/2289 [====>.........................] - ETA: 0s - loss: 0.0920 - acc: 0.9671 43 | 570/2289 [======>.......................] - ETA: 0s - loss: 0.0971 - acc: 0.9614 44 | 710/2289 [========>.....................] - ETA: 0s - loss: 0.1036 - acc: 0.9577 45 | 845/2289 [==========>...................] - ETA: 0s - loss: 0.1042 - acc: 0.9598 46 | 1005/2289 [============>.................] - ETA: 0s - loss: 0.1038 - acc: 0.9612 47 | 1150/2289 [==============>...............] - ETA: 0s - loss: 0.1063 - acc: 0.9617 48 | 1285/2289 [===============>..............] - ETA: 0s - loss: 0.1059 - acc: 0.9603 49 | 1435/2289 [=================>............] - ETA: 0s - loss: 0.1044 - acc: 0.9610 50 | 1595/2289 [===================>..........] - ETA: 0s - loss: 0.1106 - acc: 0.9574 51 | 1755/2289 [======================>.......] - ETA: 0s - loss: 0.1127 - acc: 0.9561 52 | 1910/2289 [========================>.....] - ETA: 0s - loss: 0.1221 - acc: 0.9534 53 | 2075/2289 [==========================>...] - ETA: 0s - loss: 0.1193 - acc: 0.9533 54 | 2230/2289 [============================>.] - ETA: 0s - loss: 0.1192 - acc: 0.9534 55 | 2289/2289 [==============================] - 0s - loss: 0.1178 - acc: 0.9537 56 | ``` 57 | 58 | 59 | # Usage 60 | 61 | ## Make training dataset 62 | `make_data.py` is only for make training data. 63 | > *FYI, our repo has already good [train_data](https://github.com/Kcrong/League-of-Fortuneteller/blob/master/train_data/data.csv), so you can use it.* 64 | 65 | after make `nickname_list.txt` file with nickname list, run `make_data.py`. 66 | ```bash 67 | (env3) $ python make_data.py 68 | ``` 69 | Then, if you see `train_data` folder, there will be two csv files. `computer_trainable.csv` and `human_readable.csv`. 70 | you need `computer_trainable.csv`. so rename the `computer_trainable.csv` to `data.csv`. 71 | 72 | 73 | ## Train model 74 | `model.py` is our predict model. 75 | Just run `model.py` 76 | 77 | ```bash 78 | (env3) $ python model.py 79 | W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations. 80 | Epoch 1/20 81 | W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations. 82 | W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations. 83 | W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations. 84 | W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations. 85 | ... 86 | ... 87 | ... 88 | ``` 89 | 90 | ### Thanks 91 | -------------------------------------------------------------------------------- /parsing_data/parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | 1. Get summonerId 3 | 2. Add Queue game info 4 | 3. Parsing and add DB 5 | 6 | DB - SQLite3 7 | Back Queue - Celery 8 | """ 9 | import csv 10 | 11 | import re 12 | 13 | import requests 14 | from parsing_data import cell_func 15 | 16 | from urllib.parse import urljoin 17 | from bs4 import BeautifulSoup 18 | 19 | 20 | def run_once(f): 21 | def wrapper(self, *args, **kwargs): 22 | if self.writer is None: 23 | return f(self, *args, **kwargs) 24 | 25 | return wrapper 26 | 27 | 28 | class GameInfoParser: 29 | """ 30 | Parsing Game Info from op.gg. 31 | """ 32 | 33 | host = 'https://www.op.gg/' 34 | summoner_info_url_format = urljoin(host, 'summoner/userName=%s') 35 | ingame_info_url_format = urljoin(host, 'summoner/matches/ajax/detail/gameId=%s&summonerId=%s') 36 | 37 | def __init__(self, nickname, file): 38 | self.nickname = nickname 39 | if isinstance(file, str): 40 | self.file = open(file, 'a', encoding='utf8') 41 | else: 42 | self.file = file 43 | 44 | # will be initialize later 45 | self.writer = None 46 | 47 | self.summonerId, self.games = self.get_summoner_id_and_games() 48 | 49 | @staticmethod 50 | def get_response_with_soup(url): 51 | return BeautifulSoup(requests.get(url).content, 'lxml') 52 | 53 | def get_summoner_id_and_games(self): 54 | soup = self.get_response_with_soup(self.summoner_info_url_format % self.nickname) 55 | container = soup.find('div', {'class': 'GameListContainer'}) 56 | summoner_id = container['data-summoner-id'] 57 | 58 | def get_game_list(cont): 59 | game_htmls = cont.findAll('div', {'class': 'GameItemWrap'}) 60 | 61 | game_ids = [] 62 | 63 | for html in game_htmls: 64 | a_tag = html.find('a', {'class': 'Button MatchDetail'}) 65 | event_string = a_tag['onclick'] 66 | 67 | game_id, game_summoner_id = re.findall('\d+', event_string) 68 | 69 | try: 70 | assert game_summoner_id == summoner_id 71 | except AssertionError: 72 | continue 73 | else: 74 | game_ids.append(game_id) 75 | 76 | return game_ids 77 | 78 | return summoner_id, get_game_list(container) 79 | 80 | @staticmethod 81 | def parse_team_info(team_html_table): 82 | """ 83 | 'ChampionImage': None, 84 | 'SummonerSpell': None, 85 | 'KeystoneMastery': None, 86 | 'SummonerName': None, 87 | 'Items': None, 88 | 'KDA': None, 89 | 'Damage': None, 90 | 'Ward': None, 91 | 'CS': None, 92 | 'Gold': None, 93 | 'Tier': None 94 | """ 95 | 96 | content_tbody = team_html_table.find('tbody', {'class': 'Content'}) 97 | rows = content_tbody.find_all('tr', {'class': 'Row'}) 98 | 99 | team_member_info_list = [] 100 | 101 | for row in rows: 102 | cells = row.find_all('td', {'class': 'Cell'}) 103 | 104 | data_dict = {} 105 | 106 | for cell in cells: 107 | cell_type = cell['class'][0] 108 | parse_func = getattr(cell_func, cell_type + '_parser') 109 | 110 | # name, value = parse_func(cell) 111 | # data_dict[name] = value 112 | 113 | for name, value in zip(*parse_func(cell)): 114 | data_dict[name] = value 115 | 116 | team_member_info_list.append(data_dict) 117 | 118 | return team_member_info_list 119 | 120 | @run_once 121 | def init_csv_writer(self, key_list): 122 | self.writer = csv.DictWriter(self.file, key_list) 123 | self.writer.writeheader() 124 | 125 | def save(self, data): 126 | self.writer.writerows(data) 127 | 128 | def run(self): 129 | def add_meta_info(info_dict_list, game_id, result): 130 | 131 | for info_dict in info_dict_list: 132 | info_dict['game_id'] = game_id 133 | info_dict['result'] = result 134 | 135 | return info_dict_list 136 | 137 | for game_id in self.games: 138 | game_info_url = self.ingame_info_url_format % (game_id, self.summonerId) 139 | soup = self.get_response_with_soup(game_info_url) 140 | table_wrapper = soup.find('div', {'class': 'GameDetailTableWrap'}) 141 | 142 | # Be careful with draw 143 | winner_table = table_wrapper.find('table', {'class': 'Result-WIN'}) 144 | looser_table = table_wrapper.find('table', {'class': 'Result-LOSE'}) 145 | try: 146 | winner_info = add_meta_info(self.parse_team_info(winner_table), int(game_id), 1) 147 | looser_info = add_meta_info(self.parse_team_info(looser_table), int(game_id), 0) 148 | except AttributeError: 149 | # 무승부나 탈주일경우, 이 게임은 넘어감 150 | continue 151 | 152 | self.init_csv_writer(winner_info[0].keys()) 153 | 154 | self.save(winner_info) 155 | self.save(looser_info) 156 | 157 | 158 | if __name__ == '__main__': 159 | g = GameInfoParser('달달한아침햇살', file='output.csv') 160 | g.run() 161 | --------------------------------------------------------------------------------