├── MANIFEST.in ├── playground.py ├── LICENSE ├── .gitignore ├── setup.py ├── hsdata ├── __init__.py ├── career_names.json ├── hsbox.py ├── hearthstats.py ├── utils.py └── core.py ├── README.md └── tests.py /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md LICENSE hsdata/career_names.json playground.py tests.py 2 | -------------------------------------------------------------------------------- /playground.py: -------------------------------------------------------------------------------- 1 | import hsdata 2 | 3 | # 获取卡组数据 4 | decks = hsdata.HSBoxDecks() 5 | # 若未找到本地数据,会自动从网络获取 6 | print('从炉石盒子获取到', len(decks), '个卡组数据!') 7 | 8 | # 更新卡组数据 9 | # decks.update() 10 | 11 | # 搜索卡组 12 | found = decks.search( 13 | career='萨满', 14 | mode=hsdata.MODE_STANDARD, 15 | min_games=10000, 16 | win_rate_top_n=5) 17 | print('其中5个胜率最高的萨满卡组:') 18 | for deck in found: 19 | print('{}: {} 场, {:.2%} 胜'.format( 20 | deck.name, deck.games, deck.win_rate)) 21 | 22 | # 查看卡组中的卡牌 23 | print('其中第一个卡组用了这些卡牌') 24 | print(found[0].cards) 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2016 Youfou 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # PyCharm 92 | .idea 93 | 94 | # Private 95 | data/ 96 | p_* 97 | 98 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from setuptools import setup, find_packages 4 | 5 | readme_file = 'README.md' 6 | 7 | try: 8 | import pypandoc 9 | long_description = pypandoc.convert(readme_file, to='rst') 10 | except ImportError: 11 | logging.warning('pypandoc module not found, long_description will be the raw text instead.') 12 | with open(readme_file, encoding='utf-8') as fp: 13 | long_description = fp.read() 14 | 15 | setup( 16 | name='hsdata', 17 | version='0.2.16', 18 | packages=find_packages(), 19 | package_data={ 20 | '': ['*.md'], 21 | 'hsdata': ['career_names.json'] 22 | }, 23 | include_package_data=True, 24 | install_requires=[ 25 | 'requests>=2.0', 26 | 'scrapy>=1.0' 27 | ], 28 | url='https://github.com/youfou/hsdata', 29 | license='Apache 2.0', 30 | author='Youfou', 31 | author_email='youfou@qq.com', 32 | description='用数据玩炉石!快速收集和分析炉石传说的卡牌及卡组数据。', 33 | long_description=long_description, 34 | keywords=[ 35 | '炉石', 36 | 'Hearthstone', 37 | '数据' 38 | ], 39 | classifiers=[ 40 | 'Development Status :: 4 - Beta', 41 | 'License :: OSI Approved :: Apache Software License', 42 | 'Programming Language :: Python :: 3', 43 | 'Operating System :: OS Independent', 44 | 'Natural Language :: Chinese (Simplified)', 45 | 'Topic :: Games/Entertainment :: Simulation', 46 | 'Topic :: Scientific/Engineering :: Information Analysis' 47 | ] 48 | ) 49 | -------------------------------------------------------------------------------- /hsdata/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding: utf-8 3 | 4 | 5 | """ 6 | 用数据玩炉石! 7 | ~~~~~~~~~~~~ 8 | 9 | 快速收集和分析炉石传说的卡牌及卡组数据 10 | 11 | 12 | 快速上手: 13 | 14 | >>> import hsdata 15 | >>> 16 | >>> # 获取卡组数据 17 | >>> decks = hsdata.HSBoxDecks() 18 | >>> # 若未找到本地数据,会自动从网络获取 19 | >>> print('从炉石盒子获取到', len(decks), '个卡组数据!') 20 | >>> 21 | >>> # 更新卡组数据 22 | >>> # decks.update() 23 | >>> 24 | >>> # 搜索卡组 25 | >>> found = decks.search( 26 | >>> career='萨满', 27 | >>> mode=hsdata.MODE_STANDARD, 28 | >>> min_games=10000, 29 | >>> win_rate_top_n=5) 30 | >>> print('其中5个胜率最高的萨满卡组:') 31 | >>> for deck in found: 32 | >>> print('{}: {} 场, {:.2%} 胜'.format( 33 | >>> deck.name, deck.games, deck.win_rate)) 34 | >>> 35 | >>> # 查看卡组中的卡牌 36 | >>> print('其中第一个卡组用了这些卡牌') 37 | >>> print(found[0].cards) 38 | 39 | ---- 40 | 41 | GitHub: https://github.com/youfou/hsdata 42 | 43 | ---- 44 | 45 | :copyright: (c) 2016 by Youfou. 46 | :license: Apache 2.0, see LICENSE for more details. 47 | 48 | """ 49 | 50 | import logging 51 | 52 | from .core import ( 53 | Career, Careers, Card, Cards, Deck, Decks, 54 | MODE_STANDARD, MODE_WILD, CAREERS, CARDS, 55 | set_data_dir, set_main_language, get_career, can_have, days_ago 56 | ) 57 | from .hearthstats import HearthStatsDeck, HearthStatsDecks 58 | from .hsbox import HSBoxDeck, HSBoxDecks 59 | from .utils import ( 60 | DeckGenerator, 61 | diff_decks, decks_expired, get_all_decks, 62 | cards_value, print_cards, cards_to_csv 63 | ) 64 | 65 | logging.getLogger('scrapy').propagate = False 66 | logging.getLogger('requests').propagate = False 67 | logging.basicConfig(level=logging.INFO) 68 | 69 | __title__ = 'hsdata' 70 | __version__ = '0.2.16' 71 | __author__ = 'Youfou' 72 | __license__ = 'Apache 2.0' 73 | __copyright__ = 'Copyright 2016 Youfou' 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hsdata 2 | 3 | **用数据玩炉石** 4 | 5 | 快速收集和分析炉石传说的卡牌及卡组数据 6 | 7 | ## 运行环境 8 | 9 | hsdata 使用 Python 3 编写,引用了 requests 和 scrapy 两个模块,理论上可以在所有支持这两个模块的系统环境中运行。 10 | 11 | ## 如何安装 12 | 13 | 推荐使用 pip 安装 14 | 15 | pip3 install -U hsdata 16 | 17 | ## 快速上手 18 | 19 | ```python 20 | 21 | import hsdata 22 | 23 | # 获取卡组数据 24 | decks = hsdata.HSBoxDecks() 25 | # 若未找到本地数据,会自动从网络获取 26 | print('从炉石盒子获取到', len(decks), '个卡组数据!') 27 | 28 | # 更新卡组数据 29 | # decks.update() 30 | 31 | # 搜索卡组 32 | found = decks.search( 33 | career='萨满', 34 | mode=hsdata.MODE_STANDARD, 35 | min_games=10000, 36 | win_rate_top_n=5) 37 | print('其中5个胜率最高的萨满卡组:') 38 | for deck in found: 39 | print('{}: {} 场, {:.2%} 胜'.format( 40 | deck.name, deck.games, deck.win_rate)) 41 | 42 | # 查看卡组中的卡牌 43 | print('其中第一个卡组用了这些卡牌') 44 | print(found[0].cards) 45 | 46 | ``` 47 | 48 | 运行结果类似这样 49 | 50 | > 从炉石盒子获取到 1574 个卡组数据! 51 | > 52 | > 其中5个胜率最高的萨满卡组: 53 | > 【黄金赛冠军】OmegaZero中速萨: 124830 场, 63.47% 胜 54 | > 【欧服登顶】Janos 中速萨: 172444 场, 63.02% 胜 55 | > 【EULC冠军】Pavel中速萨: 61187 场, 62.73% 胜 56 | > 【欧服前50】Toymachine中速萨: 41754 场, 60.95% 胜 57 | > 【外服登顶】Ownerism 中速萨: 152966 场, 60.94% 胜 58 | > 59 | > 其中第一个卡组用了这些卡牌 60 | > Counter({\: 2, 61 | > \: 2, 62 | > \: 2, 63 | > \: 2, 64 | > \: 2, 65 | > \: 1, 66 | > \: 1, 67 | > \: 1, 68 | > \: 2, 69 | > \: 2, 70 | > \: 2, 71 | > \: 2, 72 | > \: 2, 73 | > \: 2, 74 | > \: 2, 75 | > \: 1, 76 | > \: 2}) 77 | 78 | 以上只是个帮助入门的例子,发挥想象力,用它来探索更多吧! 79 | 80 | ## 数据来源 81 | 82 | 目前 hsdata 采用了以下数据来源,这些数据的版权为各数据源所有。 83 | 84 | * 卡牌数据 85 | * [HearthstoneJSON](https://hearthstonejson.com/) 86 | * 卡组数据 87 | * [网易炉石盒子](http://lushi.163.com/) 88 | * [HearthStats](http://hearthstats.net/) 89 | 90 | ---- 91 | 92 | 许可协议: Apache License, Version 2.0 93 | -------------------------------------------------------------------------------- /hsdata/career_names.json: -------------------------------------------------------------------------------- 1 | { 2 | "deDE": { 3 | "DRUID": "Druide", 4 | "HUNTER": "Jäger", 5 | "MAGE": "Magier", 6 | "NEUTRAL": "Neutral", 7 | "PALADIN": "Paladin", 8 | "PRIEST": "Priester", 9 | "ROGUE": "Schurke", 10 | "SHAMAN": "Schamane", 11 | "WARLOCK": "Hexenmeister", 12 | "WARRIOR": "Krieger" 13 | }, 14 | "enUS": { 15 | "DRUID": "Druid", 16 | "HUNTER": "Hunter", 17 | "MAGE": "Mage", 18 | "NEUTRAL": "Neutral", 19 | "PALADIN": "Paladin", 20 | "PRIEST": "Priest", 21 | "ROGUE": "Rogue", 22 | "SHAMAN": "Shaman", 23 | "WARLOCK": "Warlock", 24 | "WARRIOR": "Warrior" 25 | }, 26 | "esES": { 27 | "DRUID": "Druida", 28 | "HUNTER": "Cazador", 29 | "MAGE": "Mago", 30 | "NEUTRAL": "Neutral", 31 | "PALADIN": "Paladín", 32 | "PRIEST": "Sacerdote", 33 | "ROGUE": "Pícaro", 34 | "SHAMAN": "Chamán", 35 | "WARLOCK": "Brujo", 36 | "WARRIOR": "Guerrero" 37 | }, 38 | "esMX": { 39 | "DRUID": "Druida", 40 | "HUNTER": "Cazador", 41 | "MAGE": "Mago", 42 | "NEUTRAL": "Neutral", 43 | "PALADIN": "Paladín", 44 | "PRIEST": "Sacerdote", 45 | "ROGUE": "Pícaro", 46 | "SHAMAN": "Chamán", 47 | "WARLOCK": "Brujo", 48 | "WARRIOR": "Guerrero" 49 | }, 50 | "frFR": { 51 | "DRUID": "Druide", 52 | "HUNTER": "Chasseur", 53 | "MAGE": "Mage", 54 | "NEUTRAL": "Neutre", 55 | "PALADIN": "Paladin", 56 | "PRIEST": "Prêtre", 57 | "ROGUE": "Voleur", 58 | "SHAMAN": "Chaman", 59 | "WARLOCK": "Démoniste", 60 | "WARRIOR": "Guerrier" 61 | }, 62 | "itIT": { 63 | "DRUID": "Druido", 64 | "HUNTER": "Cacciatore", 65 | "MAGE": "Mago", 66 | "NEUTRAL": "Generiche", 67 | "PALADIN": "Paladino", 68 | "PRIEST": "Sacerdote", 69 | "ROGUE": "Ladro", 70 | "SHAMAN": "Sciamano", 71 | "WARLOCK": "Stregone", 72 | "WARRIOR": "Guerriero" 73 | }, 74 | "jaJP": { 75 | "DRUID": "ドルイド", 76 | "HUNTER": "ハンター", 77 | "MAGE": "メイジ", 78 | "NEUTRAL": "中立", 79 | "PALADIN": "パラディン", 80 | "PRIEST": "プリースト", 81 | "ROGUE": "ローグ", 82 | "SHAMAN": "シャーマン", 83 | "WARLOCK": "ウォーロック", 84 | "WARRIOR": "ウォリアー" 85 | }, 86 | "koKR": { 87 | "DRUID": "드루이드", 88 | "HUNTER": "사냥꾼", 89 | "MAGE": "마법사", 90 | "NEUTRAL": "중립", 91 | "PALADIN": "성기사", 92 | "PRIEST": "사제", 93 | "ROGUE": "도적", 94 | "SHAMAN": "주술사", 95 | "WARLOCK": "흑마법사", 96 | "WARRIOR": "전사" 97 | }, 98 | "plPL": { 99 | "DRUID": "Druid", 100 | "HUNTER": "Łowca", 101 | "MAGE": "Mag", 102 | "NEUTRAL": "Neutralne", 103 | "PALADIN": "Paladyn", 104 | "PRIEST": "Kapłan", 105 | "ROGUE": "Łotr", 106 | "SHAMAN": "Szaman", 107 | "WARLOCK": "Czarnoksiężnik", 108 | "WARRIOR": "Wojownik" 109 | }, 110 | "ptBR": { 111 | "DRUID": "Druida", 112 | "HUNTER": "Caçador", 113 | "MAGE": "Mago", 114 | "NEUTRAL": "Neutro", 115 | "PALADIN": "Paladino", 116 | "PRIEST": "Sacerdote", 117 | "ROGUE": "Ladino", 118 | "SHAMAN": "Xamã", 119 | "WARLOCK": "Bruxo", 120 | "WARRIOR": "Guerreiro" 121 | }, 122 | "ruRU": { 123 | "DRUID": "Друид", 124 | "HUNTER": "Охотник", 125 | "MAGE": "Маг", 126 | "NEUTRAL": "Общие", 127 | "PALADIN": "Паладин", 128 | "PRIEST": "Жрец", 129 | "ROGUE": "Разбойник", 130 | "SHAMAN": "Шаман", 131 | "WARLOCK": "Чернокнижник", 132 | "WARRIOR": "Воин" 133 | }, 134 | "thTH": { 135 | "DRUID": "ดรูอิด", 136 | "HUNTER": "ฮันเตอร์", 137 | "MAGE": "เมจ", 138 | "NEUTRAL": "เป็นกลาง", 139 | "PALADIN": "พาลาดิน", 140 | "PRIEST": "พรีสต์", 141 | "ROGUE": "โร้ก", 142 | "SHAMAN": "ชาแมน", 143 | "WARLOCK": "วอร์ล็อค", 144 | "WARRIOR": "วอริเออร์" 145 | }, 146 | "zhCN": { 147 | "DRUID": "德鲁伊", 148 | "HUNTER": "猎人", 149 | "MAGE": "法师", 150 | "NEUTRAL": "中立", 151 | "PALADIN": "圣骑士", 152 | "PRIEST": "牧师", 153 | "ROGUE": "潜行者", 154 | "SHAMAN": "萨满祭司", 155 | "WARLOCK": "术士", 156 | "WARRIOR": "战士" 157 | }, 158 | "zhTW": { 159 | "DRUID": "德魯伊", 160 | "HUNTER": "獵人", 161 | "MAGE": "法師", 162 | "NEUTRAL": "中立", 163 | "PALADIN": "聖騎士", 164 | "PRIEST": "牧師", 165 | "ROGUE": "盜賊", 166 | "SHAMAN": "薩滿", 167 | "WARLOCK": "術士", 168 | "WARRIOR": "戰士" 169 | } 170 | } -------------------------------------------------------------------------------- /tests.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import unittest 4 | 5 | import hsdata 6 | 7 | logging.getLogger('scrapy').propagate = True 8 | logging.getLogger('requests').propagate = True 9 | 10 | 11 | class Tests(unittest.TestCase): 12 | def setUp(self): 13 | if hsdata.core.MAIN_LANGUAGE != 'zhCN': 14 | hsdata.set_main_language('zhCN') 15 | 16 | @staticmethod 17 | def remove_if_exists(path): 18 | if os.path.exists(path): 19 | os.remove(path) 20 | 21 | def test_career(self): 22 | career = hsdata.Career('MAGE') 23 | self.assertEqual(career.name, '法师') 24 | self.assertEqual(career.__repr__(), '') 25 | 26 | hsdata.set_main_language('enUS') 27 | career = hsdata.Career('MAGE') 28 | self.assertEqual(career.name, 'Mage') 29 | 30 | def test_careers(self): 31 | self.assertEqual(len(hsdata.CAREERS), 11) 32 | self.assertEqual(hsdata.CAREERS.get('HUNTER').name, '猎人') 33 | 34 | hsdata.Cards() 35 | self.assertEqual(hsdata.CAREERS.search('雷 萨').name, '猎人') 36 | self.assertEqual(hsdata.CAREERS.search('迪,麦 文').name, '法师') 37 | 38 | hsdata.set_main_language('enUS') 39 | hsdata.Cards() 40 | self.assertEqual(hsdata.CAREERS.search('Rexxar').name, 'Hunter') 41 | 42 | def test_card(self): 43 | card = hsdata.Cards().get('OG_134') 44 | self.assertEqual(card.name, '尤格-萨隆') 45 | self.assertEqual(card.career.name, '中立') 46 | 47 | def test_cards(self): 48 | cards = hsdata.Cards() 49 | found = cards.search('萨隆', '每 施放', return_first=False) 50 | self.assertEqual(len(found), 1) 51 | card = cards.get(found[0].id) 52 | self.assertEqual(found[0], card) 53 | self.assertEqual(cards.search(in_text='在你召唤一个随从后 随机 敌方 伤害').name, '飞刀杂耍者') 54 | self.assertIsNone(cards.search('关门放狗', career='mage')) 55 | self.assertIsInstance(cards.search('海盗', return_first=False), list) 56 | 57 | def test_cards_update(self): 58 | test_path = 'p_cards_update_test.json' 59 | 60 | self.remove_if_exists(test_path) 61 | 62 | try: 63 | cards = hsdata.Cards(test_path) 64 | cards.update(hs_version_code=14366) 65 | finally: 66 | self.remove_if_exists(test_path) 67 | 68 | self.assertEqual(cards.search('兽群 呼唤', '三种').cost, 8) 69 | 70 | def test_deck(self): 71 | decks = hsdata.HSBoxDecks() 72 | deck = decks[10] 73 | self.assertIsInstance(deck.career, hsdata.Career) 74 | self.assertIsInstance(list(deck.cards.keys())[0], hsdata.Card) 75 | self.assertEqual(len(list(deck.cards.elements())), 30) 76 | 77 | def test_hsbox_decks(self): 78 | 79 | test_path = 'p_hsbox_decks_test.json' 80 | self.remove_if_exists(test_path) 81 | 82 | try: 83 | updated_decks = hsdata.HSBoxDecks(json_path=test_path) 84 | updated_deck = updated_decks[100] 85 | loaded_decks = hsdata.HSBoxDecks(json_path=test_path) 86 | loaded_deck = loaded_decks.get(updated_deck.id) 87 | finally: 88 | self.remove_if_exists(test_path) 89 | 90 | self.assertEqual(len(updated_decks), len(loaded_decks)) 91 | self.assertEqual(updated_deck.cards, loaded_deck.cards) 92 | 93 | self.assertIsNotNone(loaded_decks.source) 94 | self.assertIsNotNone(loaded_deck.source) 95 | 96 | self.assertTrue( 97 | updated_decks.source == 98 | updated_deck.source == 99 | loaded_decks.source == 100 | loaded_deck.source 101 | ) 102 | 103 | self.assertIs(loaded_decks.get(loaded_deck.id), loaded_deck) 104 | 105 | found = loaded_decks.search('萨满', hsdata.MODE_STANDARD, 0.5, 10000, 5) 106 | self.assertLessEqual(len(found), 5) 107 | last_win_rate = 1 108 | for deck in found: 109 | self.assertEqual(deck.career, hsdata.CAREERS.get('SHAMAN')) 110 | self.assertEqual(deck.mode, hsdata.MODE_STANDARD) 111 | self.assertGreaterEqual(deck.win_rate, 0.5) 112 | self.assertGreaterEqual(deck.games, 10000) 113 | self.assertLessEqual(deck.win_rate, last_win_rate) 114 | last_win_rate = deck.win_rate 115 | 116 | def test_can_have(self): 117 | cards = hsdata.Cards() 118 | 119 | self.assertTrue(hsdata.can_have('萨满', cards.search('叫嚣的中士'))) 120 | self.assertTrue(hsdata.CAREERS.search('萨满').can_have(cards.search('叫嚣的中士'))) 121 | self.assertTrue(hsdata.can_have('猎人', cards.search('关门放狗'))) 122 | self.assertTrue(hsdata.CAREERS.search('猎人').can_have(cards.search('关门放狗'))) 123 | 124 | self.assertFalse(hsdata.can_have('萨满', cards.search('关门放狗'))) 125 | self.assertFalse(hsdata.CAREERS.search('萨满').can_have(cards.search('关门放狗'))) 126 | self.assertFalse(hsdata.can_have('猎人', cards.search('玉莲帮密探'))) 127 | self.assertFalse(hsdata.CAREERS.search('猎人').can_have(cards.search('玉莲帮密探'))) 128 | 129 | 130 | if __name__ == '__main__': 131 | unittest.main() 132 | -------------------------------------------------------------------------------- /hsdata/hsbox.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding: utf-8 3 | 4 | 5 | """ 6 | 炉石盒子的卡组和卡组合集类 7 | """ 8 | 9 | import json 10 | import logging 11 | import multiprocessing 12 | import re 13 | from datetime import datetime 14 | 15 | import requests 16 | import scrapy 17 | from scrapy.crawler import CrawlerProcess 18 | 19 | from .core import ( 20 | DATE_TIME_FORMAT, 21 | Deck, Decks, CAREERS 22 | ) 23 | 24 | # 该来源的标识 25 | SOURCE_NAME = 'HSBOX' 26 | 27 | # 默认的载入和保存文件名,将与 DATA_DIR 拼接 28 | JSON_FILE_NAME = 'Decks_{}.json'.format(SOURCE_NAME) 29 | 30 | CAREER_MAP = { 31 | 1: CAREERS.get('WARRIOR'), 32 | 2: CAREERS.get('SHAMAN'), 33 | 3: CAREERS.get('ROGUE'), 34 | 4: CAREERS.get('PALADIN'), 35 | 5: CAREERS.get('HUNTER'), 36 | 6: CAREERS.get('DRUID'), 37 | 7: CAREERS.get('WARLOCK'), 38 | 8: CAREERS.get('MAGE'), 39 | 9: CAREERS.get('PRIEST'), 40 | } 41 | 42 | 43 | class HSBoxDeck(Deck): 44 | # from: http://hs.gameyw.netease.com/box_groups.html 45 | # 该类卡组的 source 属性 46 | source = SOURCE_NAME 47 | DECK_URL_TEMPLATE = 'http://hs.gameyw.netease.com/box_group_details.html?code={}' 48 | 49 | def __init__(self): 50 | super(HSBoxDeck, self).__init__() 51 | self.ranked_games = 0 52 | self.ranked_wins = 0 53 | self.users = 0 54 | self.created_at = None 55 | self.duration = None 56 | 57 | @property 58 | def ranked_win_rate(self): 59 | if self.ranked_games: 60 | return self.ranked_wins / self.ranked_games 61 | 62 | @property 63 | def ranked_losses(self): 64 | if self.ranked_games: 65 | return self.ranked_games - (self.ranked_wins or 0) 66 | 67 | def to_dict(self): 68 | dct = super(HSBoxDeck, self).to_dict() 69 | dct['created_at'] = self.created_at.strftime(DATE_TIME_FORMAT) 70 | return dct 71 | 72 | def from_dict(self, dct, cards=None): 73 | created_at = dct.pop('created_at') 74 | if created_at: 75 | self.created_at = datetime.strptime(created_at, DATE_TIME_FORMAT) 76 | super(HSBoxDeck, self).from_dict(dct, cards) 77 | 78 | 79 | class HSBoxDecks(Decks): 80 | # 当从本地JSON载入卡组时,将把每个卡组转化为该类 81 | deck_class = HSBoxDeck 82 | 83 | def __init__(self, json_path=None, auto_load=True): 84 | logging.info('初始化卡组合集 (网易炉石盒子)') 85 | super(HSBoxDecks, self).__init__(json_path=json_path, auto_load=auto_load) 86 | 87 | def update(self, json_path=None): 88 | """ 89 | 从"炉石传说盒子"获取最新的卡组数据,并保存为JSON 90 | :param json_path: JSON的保存路径 91 | """ 92 | 93 | if not json_path: 94 | json_path = self.json_path 95 | 96 | logging.info('开始更新炉石盒子卡组数据,将保存到 {}'.format(json_path)) 97 | 98 | # 卡组的主要信息 99 | url_data = 'http://hs.gameyw.netease.com/json/pm20835.js' 100 | url_duration = 'http://hsimg.gameyw.netease.com/pm19022.js' 101 | 102 | rp_json_in_js = re.compile(r'var\s+(\w+)\s*=\s*(.+);') 103 | session = requests.Session() 104 | 105 | def get_num(parent, key_name, to_float=False): 106 | num = parent.get(key_name) 107 | if num == '': 108 | num = None 109 | if num is not None: 110 | if to_float: 111 | num = float(num) 112 | else: 113 | num = int(num) 114 | return num 115 | 116 | def get_json(url): 117 | resp = session.get(url) 118 | resp.raise_for_status() 119 | m = rp_json_in_js.search(resp.text) 120 | return json.loads(m.group(2)) 121 | 122 | decks_data = get_json(url_data) 123 | decks_duration = get_json(url_duration) 124 | 125 | # 清除原有的数据 126 | self.clear() 127 | 128 | for data in decks_data: 129 | deck = HSBoxDeck() 130 | 131 | deck.name = data.get('title') 132 | deck.id = data.get('md5key') 133 | 134 | deck.career = CAREER_MAP.get(get_num(data, 'job')) 135 | 136 | skip_this_deck = False 137 | 138 | for card_count in data['deckString']['toPage'].split(','): 139 | card_id, count = card_count.split(':') 140 | card = self.cards.get(card_id) 141 | 142 | # 炉石盒子的BUG,一些卡组会引用不存在,不可收集,或职业错误的卡牌 143 | if not card or not card.collectible or deck.career not in card.careers: 144 | skip_this_deck = True 145 | break 146 | 147 | if not card: 148 | raise ValueError('缺少卡牌: {}'.format(card_id)) 149 | count = int(count) 150 | deck.cards[card] = count 151 | else: 152 | num_of_cards = sum(deck.cards.values()) 153 | if num_of_cards != 30: 154 | raise ValueError('{} 的卡牌数量为 {},应为 30'.format( 155 | deck, num_of_cards)) 156 | 157 | if skip_this_deck: 158 | logging.debug('跳过错误卡组: {}'.format(deck.name)) 159 | continue 160 | 161 | deck.created_at = datetime.strptime(data.get('time'), '%Y-%m-%d %H:%M:%S') 162 | 163 | duration = decks_duration.get(deck.id) 164 | if duration: 165 | deck.duration = duration.get('ctime') 166 | 167 | # 加入到卡组合集 168 | self.append(deck) 169 | 170 | logging.info('获取到 {} 个卡组'.format(len(self))) 171 | deck_ids = list(self._index.keys()) 172 | 173 | # 使用单独进程来运行爬虫,绕过 twisted reactor 无法重用的问题 174 | with multiprocessing.Pool() as p: 175 | results = p.apply(self._crawl, (deck_ids,)) 176 | 177 | for result in results: 178 | deck = self.get(result['deck_id']) 179 | deck.games = result['games'] 180 | deck.wins = result['wins'] 181 | deck.ranked_games = result['ranked_games'] 182 | deck.ranked_wins = result['ranked_wins'] 183 | deck.users = result['users'] 184 | 185 | # 保存卡组合集 186 | self.save(json_path) 187 | 188 | logging.info('炉石盒子卡组数据更新完成') 189 | 190 | @staticmethod 191 | def _crawl(deck_ids): 192 | logging.info('正在获取游戏结果数据') 193 | results = list() 194 | cp = CrawlerProcess({'ITEM_PIPELINES': {'hsdata.hsbox.HSBoxScrapyPipeline': 1}}) 195 | cp.crawl(HSBoxScrapySpider, deck_ids=deck_ids, results=results) 196 | cp.start() 197 | logging.info('获取到 {} 个卡组的游戏结果数据'.format(len(results))) 198 | return results 199 | 200 | 201 | class HSBoxScrapyItem(scrapy.Item): 202 | games = scrapy.Field() 203 | wins = scrapy.Field() 204 | ranked_games = scrapy.Field() 205 | ranked_wins = scrapy.Field() 206 | users = scrapy.Field() 207 | deck_id = scrapy.Field() 208 | 209 | 210 | class HSBoxScrapySpider(scrapy.Spider): 211 | name = 'hsbox_results' 212 | url_base = 'http://hs.gameyw.netease.com/hs/c/get-cg-info?&cgcode={}' 213 | 214 | def __init__(self, deck_ids, results): 215 | super(HSBoxScrapySpider, self).__init__() 216 | self.deck_ids = deck_ids 217 | self.results = results 218 | 219 | def start_requests(self): 220 | request_list = list() 221 | 222 | for deck_id in self.deck_ids: 223 | request_list.append(scrapy.http.Request( 224 | url=self.url_base.format(deck_id), 225 | meta=dict(deck_id=deck_id) 226 | )) 227 | return request_list 228 | 229 | def parse(self, response): 230 | data = json.loads(response.text) 231 | if data['status']: 232 | item = HSBoxScrapyItem() 233 | r = data['data'] 234 | 235 | item['games'] = r['offensive_count'] + r['subsequent_count'] 236 | item['wins'] = r['offensive_win'] + r['subsequent_win'] 237 | item['ranked_games'] = r['rank_count'] 238 | item['ranked_wins'] = r['rank_win'] 239 | item['users'] = r['users'] 240 | 241 | item['deck_id'] = response.meta['deck_id'] 242 | 243 | yield item 244 | 245 | 246 | class HSBoxScrapyPipeline: 247 | @staticmethod 248 | def process_item(item, spider): 249 | spider.results.append(dict(item)) 250 | -------------------------------------------------------------------------------- /hsdata/hearthstats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding: utf-8 3 | 4 | """ 5 | HearthStats 的卡组和卡组合集类 6 | """ 7 | 8 | import json 9 | import logging 10 | import multiprocessing 11 | import re 12 | from collections import Counter 13 | from datetime import datetime 14 | from urllib.parse import urlencode 15 | 16 | import requests 17 | import scrapy 18 | from scrapy.crawler import CrawlerProcess 19 | 20 | from .core import ( 21 | DATE_TIME_FORMAT, 22 | Deck, Decks, CAREERS, CARDS, 23 | get_career, days_ago 24 | ) 25 | 26 | # 该来源的标识 27 | SOURCE_NAME = 'HEARTHSTATS' 28 | 29 | # 默认的载入和保存文件名,将与 DATA_DIR 拼接 30 | JSON_FILE_NAME = 'Decks_{}.json'.format(SOURCE_NAME) 31 | 32 | ORDER_BY_DESC = 'desc' 33 | ORDER_BY_ASC = 'asc' 34 | 35 | SORT_BY_CREATED_AT = 'created_at' 36 | SORT_BY_LOSSES = 'num_losses' 37 | SORT_BY_GAMES = 'num_matches' 38 | SORT_BY_MINIONS = 'num_minions' 39 | SORT_BY_NAME = 'name' 40 | SORT_BY_SPELLS = 'num_spells' 41 | SORT_BY_USERS = 'num_users' 42 | SORT_BY_WEAPONS = 'num_weapons' 43 | SORT_BY_WIN_RATE = 'winrate' 44 | SORT_BY_WINS = 'num_wins' 45 | 46 | CAREER_MAP = { 47 | 'DRUID': 1, 48 | 'HUNTER': 2, 49 | 'MAGE': 3, 50 | 'PALADIN': 4, 51 | 'PRIEST': 5, 52 | 'ROGUE': 6, 53 | 'SHAMAN': 7, 54 | 'WARLOCK': 8, 55 | 'WARRIOR': 9, 56 | } 57 | 58 | 59 | class HearthStatsDeck(Deck): 60 | # from: http://hearthstats.net/decks/search 61 | # 该类卡组的 source 属性 62 | source = SOURCE_NAME 63 | DECK_URL_TEMPLATE = 'http://hearthstats.net/decks/{}/public_show' 64 | 65 | def __init__(self): 66 | super(HearthStatsDeck, self).__init__() 67 | self.creator_id = None 68 | self.win_rate_by_rank = dict() 69 | 70 | def from_dict(self, dct, cards=None): 71 | win_rate_by_rank = dct.pop('win_rate_by_rank') 72 | for rank, win_rate in win_rate_by_rank.items(): 73 | self.win_rate_by_rank[int(rank)] = win_rate 74 | super(HearthStatsDeck, self).from_dict(dct, cards) 75 | 76 | 77 | class HearthStatsDecks(Decks): 78 | # 当从本地JSON载入卡组时,将把每个卡组转化为该类 79 | deck_class = HearthStatsDeck 80 | 81 | def __init__(self, email=None, password=None, json_path=None, auto_load=True): 82 | """ 83 | 使用 HearthStats 数据源,必须先注册其网站账号后,并在登录后使用 84 | 该数据源没有无需使用 update 方法,请通过 search_online 方法获取卡组数据 85 | 注册页面: http://hearthstats.net/users/sign_up 86 | :param email: 登录邮箱 87 | :param password: 登录密码 88 | """ 89 | logging.info('初始化卡组合集 (HearthStats)') 90 | super(HearthStatsDecks, self).__init__( 91 | json_path=json_path, 92 | auto_load=auto_load, 93 | update_if_not_found=False) 94 | 95 | self.session = requests.Session() 96 | self._logged_in = False 97 | self.search_url = None 98 | 99 | self.login(email, password) 100 | 101 | @property 102 | def logged_in(self): 103 | return self._logged_in 104 | 105 | def update(self, json_path=None): 106 | logging.warning('该数据来源不支持 update 方法,请直接使用 search_online 方法') 107 | 108 | def login(self, email, password): 109 | if not email or not password: 110 | self._logged_in = False 111 | return 112 | logging.info('正在登录 HearthStats') 113 | r = self.session.post( 114 | url='http://hearthstats.net/api/v3/users/sign_in', 115 | json=dict(user_login=dict(email=email, password=password)) 116 | ) 117 | r.raise_for_status() 118 | if r.json().get('success'): 119 | self._logged_in = True 120 | logging.info('登录成功') 121 | else: 122 | raise Exception('登陆失败: {}'.format(r.json().get('message'))) 123 | 124 | def search_online( 125 | self, 126 | career=None, 127 | created_after=days_ago(30), 128 | min_games=300, 129 | name='', 130 | sort_by=SORT_BY_WIN_RATE, 131 | order_by=ORDER_BY_DESC, 132 | ): 133 | """ 134 | 在 hearthstats 网站中搜索卡组 135 | :param career: 职业 136 | :param created_after: 在 XXXX-XX-XX 后创建 137 | :param min_games: 最少游戏次数 138 | :param name: 卡组名称 139 | :param sort_by: 排列方式 140 | :param order_by: 正序或倒序 141 | """ 142 | 143 | if not self._logged_in: 144 | logging.warning('尚未登录账号') 145 | return 146 | 147 | if not career: 148 | career = '' 149 | else: 150 | career = get_career(career) 151 | career = CAREER_MAP[career.class_name] 152 | 153 | if not created_after: 154 | created_after = '' 155 | elif isinstance(created_after, datetime): 156 | created_after = created_after.strftime(DATE_TIME_FORMAT) 157 | 158 | qs = urlencode({ 159 | 'utf8': '✓', 160 | 'q[klass_id_eq]': career, 161 | 'q[unique_deck_created_at_gteq]': created_after, 162 | 'q[unique_deck_num_matches_gteq]': min_games, 163 | 'q[name_cont]': name, 164 | 'items': 1000000, 165 | 'sort': sort_by, 166 | 'order': order_by, 167 | 'commit': 'Apply', 168 | }) 169 | 170 | self.search_url = 'http://hearthstats.net/decks/search?{}'.format(qs) 171 | 172 | logging.info('正在搜索卡组') 173 | 174 | r = self.session.get(self.search_url) 175 | r.raise_for_status() 176 | 177 | deck_ids = re.findall(r'(?<=href="/decks/)[^/]+(?=/public_show)', r.text) 178 | 179 | if not deck_ids: 180 | logging.info('未找到符合条件的卡组,试试放宽条件吧') 181 | return 182 | 183 | logging.info('找到 {} 个符合条件的卡组'.format(len(deck_ids))) 184 | 185 | # 清除原有的数据 186 | self.clear() 187 | 188 | # 使用单独进程来运行爬虫,绕过 twisted reactor 无法重用的问题 189 | with multiprocessing.Pool() as p: 190 | decks = p.apply(self._crawl, (deck_ids,)) 191 | 192 | if decks: 193 | # 爬完后的内容是乱序的,需恢复为原结果列表的顺序 194 | decks.sort(key=lambda x: deck_ids.index(x.id)) 195 | # 加入到卡组合集中 196 | self.extend(decks) 197 | 198 | logging.info('卡组数据获取完成 ({}/{})'.format( 199 | len(self), len(deck_ids) 200 | )) 201 | 202 | self.save() 203 | 204 | @staticmethod 205 | def _crawl(deck_ids): 206 | logging.info('正在获取卡组数据') 207 | decks = list() 208 | cp = CrawlerProcess({'ITEM_PIPELINES': {'hsdata.hearthstats.HearthStatsScrapyPipeline': 1}}) 209 | cp.crawl(HearthStatsScrapySpider, deck_ids=deck_ids, decks=decks) 210 | cp.start() 211 | return decks 212 | 213 | 214 | class HearthStatsScrapyItem(scrapy.Item): 215 | name = scrapy.Field() 216 | id = scrapy.Field() 217 | career = scrapy.Field() 218 | cards = scrapy.Field() 219 | games = scrapy.Field() 220 | wins = scrapy.Field() 221 | draws = scrapy.Field() 222 | creator_id = scrapy.Field() 223 | win_rate_by_rank = scrapy.Field() 224 | 225 | 226 | class HearthStatsScrapySpider(scrapy.Spider): 227 | name = 'hearthstats_decks' 228 | 229 | def __init__(self, deck_ids, decks): 230 | super(HearthStatsScrapySpider, self).__init__() 231 | self.deck_ids = deck_ids 232 | self.decks = decks 233 | 234 | def start_requests(self): 235 | request_list = list() 236 | 237 | for deck_id in self.deck_ids: 238 | request_list.append(scrapy.http.Request( 239 | url=HearthStatsDeck.DECK_URL_TEMPLATE.format(deck_id), 240 | meta=dict(deck_id=deck_id) 241 | )) 242 | return request_list 243 | 244 | def parse(self, response): 245 | 246 | item = HearthStatsScrapyItem() 247 | 248 | item['name'] = response.xpath('//meta[@name="description"]/@content').extract()[0] 249 | item['id'] = response.meta['deck_id'] 250 | 251 | block_1 = response.css('div.col-md-4.col-sm-4.col-xs-4 div.win-count') 252 | 253 | item['creator_id'] = block_1[0].xpath('.//a/@href').extract()[0].rsplit('/', 1)[1] 254 | item['career'] = CAREERS.get(block_1[1].xpath('.//img/@alt').extract()[0].upper()) 255 | 256 | block_2 = response.css('div.col-md-2.col-sm-2.col-xs-4 div.win-count span') 257 | item['wins'] = int(block_2[0].xpath('text()')[0].extract()) 258 | losses = int(block_2[1].xpath('text()')[0].extract()) 259 | item['draws'] = int(block_2[2].xpath('text()')[0].extract()) 260 | item['games'] = item['wins'] + losses + item['draws'] 261 | 262 | cards = Counter() 263 | 264 | for card_div in response.css('div.card.cardWrapper'): 265 | img_src = card_div.xpath('img[@class="image"]/@src').extract()[0] 266 | card_id = img_src.rsplit('/', 1)[1].split('.', 1)[0] 267 | card = CARDS.get(card_id) 268 | count = int(card_div.xpath('div[@class="qty"]/text()')[0].extract()) 269 | cards[card] = count 270 | 271 | item['cards'] = cards 272 | 273 | try: 274 | m = re.search(r'(?<=gon\.rank_wr=)[\[\],.\d\s]+?(?=;)', response.text) 275 | win_rate_by_rank = dict(json.loads(m.group())) 276 | if win_rate_by_rank: 277 | for rank in win_rate_by_rank: 278 | win_rate_by_rank[rank] /= 100 279 | item['win_rate_by_rank'] = win_rate_by_rank 280 | except (ValueError, TypeError, json.JSONDecodeError): 281 | item['win_rate_by_rank'] = dict() 282 | 283 | yield item 284 | 285 | 286 | class HearthStatsScrapyPipeline: 287 | @staticmethod 288 | def process_item(item, spider): 289 | deck = HearthStatsDeck() 290 | 291 | deck.name = item['name'] 292 | deck.id = item['id'] 293 | deck.career = item['career'] 294 | deck.cards = item['cards'] 295 | deck.games = item['games'] 296 | deck.wins = item['wins'] 297 | deck.draws = item['draws'] 298 | deck.creator_id = item['creator_id'] 299 | deck.win_rate_by_rank = item['win_rate_by_rank'] 300 | 301 | spider.decks.append(deck) 302 | -------------------------------------------------------------------------------- /hsdata/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding: utf-8 3 | 4 | 5 | """ 6 | 一些实用的小功能 7 | """ 8 | import csv 9 | import logging 10 | import os 11 | from collections import Counter 12 | from datetime import datetime, timedelta 13 | 14 | from .core import ( 15 | MODE_STANDARD, 16 | Decks, 17 | days_ago, 18 | Career, CAREERS, Cards) 19 | from .hearthstats import HearthStatsDecks 20 | from .hsbox import HSBoxDecks 21 | 22 | 23 | def diff_decks(*decks): 24 | """ 25 | 卡组对比 26 | :param decks: 两个或以上的卡组 27 | :return: 返回每个卡组特有的部分 28 | """ 29 | 30 | intersection = decks[0].cards & decks[1].cards 31 | for deck in decks[2:]: 32 | intersection &= deck.cards 33 | 34 | differs = dict(intersection=intersection) 35 | 36 | for deck in decks: 37 | differs[deck] = deck.cards - intersection 38 | 39 | return differs 40 | 41 | 42 | def decks_expired(decks, expired=timedelta(days=1)): 43 | """ 44 | 检查 Decks 是否已过期 45 | :param decks: Decks 对象 46 | :param expired: 有效期长度 47 | :return: 若已过期则返回True 48 | """ 49 | if os.path.isfile(decks.json_path): 50 | m_time = os.path.getmtime(decks.json_path) 51 | if datetime.fromtimestamp(m_time) < (datetime.today() - expired): 52 | return True 53 | else: 54 | return False 55 | return True 56 | 57 | 58 | def cards_value(decks, mode=MODE_STANDARD): 59 | """ 60 | 区分职业的单卡价值排名,可在纠结是否合成或拆解时作为参考 61 | 62 | decks: 所在卡组数量 63 | games: 所在卡组游戏次数总和 64 | wins: 所在卡组获胜次数总和 65 | win_rate: 所在卡组平均胜率 (wins/games) 66 | *_rank: 在当前职业所有卡牌中的 * 排名 67 | *_rank%: 在当前职业所有卡牌中的 * 排名百分比 (排名/卡牌数) 68 | 69 | :param decks: 卡组合集,作为分析数据源 70 | :param mode: 模式 71 | :return: 单卡价值排名数据 72 | """ 73 | 74 | if not isinstance(decks, Decks): 75 | raise TypeError('from_decks 须为 Decks 对象') 76 | 77 | total = 'total' 78 | ranked_keys = 'decks', 'games', 'wins', 'win_rate' 79 | rpf = '_rank' 80 | ppf = '%' 81 | 82 | stats = dict() 83 | stats[total] = dict() 84 | 85 | for deck in decks.search(mode=mode): 86 | career = deck.career 87 | if career not in stats: 88 | stats[career] = dict() 89 | for card, count in deck.cards.items(): 90 | for k in total, career: 91 | if card not in stats[k]: 92 | stats[k][card] = dict( 93 | decks=0, games=0, wins=0, count=0) 94 | stats[k][card]['decks'] += 1 95 | stats[k][card]['games'] += deck.games or 0 96 | stats[k][card]['wins'] += deck.wins or 0 97 | stats[k][card]['count'] += count 98 | 99 | for k in stats: 100 | for c in stats[k]: 101 | try: 102 | stats[k][c]['win_rate'] = stats[k][c]['wins'] / stats[k][c]['games'] 103 | except ZeroDivisionError: 104 | stats[k][c]['win_rate'] = None 105 | stats[k][c]['avg_count'] = stats[k][c]['count'] / stats[k][c]['decks'] 106 | 107 | rkvl = dict() 108 | 109 | for k in stats: 110 | if k not in rkvl: 111 | rkvl[k] = dict() 112 | for rk in ranked_keys: 113 | vl = [s[rk] for c, s in stats[k].items()] 114 | vl = list(filter(lambda x: x, vl)) 115 | vl.sort(reverse=True) 116 | rkvl[k][rk] = vl 117 | 118 | for k in stats: 119 | for c in stats[k]: 120 | for rk in ranked_keys: 121 | if stats[k][c][rk]: 122 | rank = rkvl[k][rk].index(stats[k][c][rk]) + 1 123 | stats[k][c][rk + rpf] = rank 124 | stats[k][c][rk + rpf + ppf] = rank / len(stats[k]) 125 | else: 126 | stats[k][c][rk + rpf] = None 127 | stats[k][c][rk + ppf] = None 128 | 129 | return stats 130 | 131 | 132 | def get_all_decks( 133 | hsn_email=None, hsn_password=None, 134 | hsn_min_games=300, hsn_created_after=days_ago(30), 135 | expired=timedelta(days=1) 136 | ): 137 | """ 138 | 获得获取所有卡组数据 139 | :param hsn_email: Hearthstats 的登陆邮箱 140 | :param hsn_password: Hearthstats 的登陆密码 141 | :param hsn_min_games: Hearthstats 的搜索参数 最少游戏次数 142 | :param hsn_created_after: Hearthstats 最早更新时间 143 | :param expired: 过期时间,若载入的数据是次时间前获得的,则重新获取新数据 144 | :return: 返回 Decks 对象,包含所有数据源的卡组 145 | """ 146 | 147 | decks = Decks() 148 | 149 | hsb = HSBoxDecks() 150 | if decks_expired(hsb, expired): 151 | hsb.update() 152 | decks.extend(hsb) 153 | 154 | if hsn_email and hsn_password: 155 | hsn = HearthStatsDecks() 156 | if decks_expired(hsn, expired): 157 | hsn.login(hsn_email, hsn_password) 158 | hsn.search_online(min_games=hsn_min_games, created_after=hsn_created_after) 159 | decks.extend(hsn) 160 | 161 | return decks 162 | 163 | 164 | class DeckGenerator: 165 | def __init__( 166 | self, 167 | career, decks, 168 | include=None, exclude=None, 169 | mode=MODE_STANDARD): 170 | 171 | """ 172 | 通过若干包含游戏次数和胜率的卡组合集,找出其中高价值的卡牌,生成新的卡组(.cards) 173 | 174 | :param career: 指定职业 175 | :param decks: 来源卡组合集 176 | :param include: 生成的新卡组中将包含这些卡,应为 dict 对象,key为卡牌,value为数量 177 | :param exclude: 生成的新卡组中将排除这些卡,应为 dict 对象,key为卡牌,value为数量 178 | :param mode: 指定模式 179 | """ 180 | 181 | self._career = None 182 | self.cards_stats = None 183 | self.top_decks = None 184 | 185 | self.career = career 186 | 187 | if decks and not isinstance(decks, list): 188 | raise TypeError('decks 应为 list') 189 | self.decks = decks or list() 190 | 191 | if include and not isinstance(include, dict): 192 | raise TypeError('include 应为 dict') 193 | self.include = include or Counter() 194 | 195 | if exclude and not isinstance(exclude, dict): 196 | raise TypeError('exclude 应为 dict') 197 | self.exclude = exclude or Counter() 198 | 199 | self.mode = mode 200 | 201 | self.top_decks_total_games = None 202 | self._gen_cards_stats() 203 | 204 | @property 205 | def cards(self): 206 | 207 | cards = Counter(self.include) 208 | 209 | exclude = Counter(self.exclude) 210 | 211 | for card, stats in self.cards_stats: 212 | count = 2 if stats['avg_count'] > 1.5 else 1 213 | 214 | if cards.get(card, 0) > count: 215 | count = 1 if card.rarity == 'LEGENDARY' else 2 216 | 217 | if card in exclude: 218 | count -= exclude.get(card) 219 | if count < 1: 220 | logging.info('排除卡牌: {}'.format(card.name)) 221 | continue 222 | 223 | games_percentage = stats['total_games'] / self.top_decks_total_games 224 | if card not in self.include and games_percentage < 0.1: 225 | logging.info('排除冷门卡牌: {} (使用率 {:.2%})'.format( 226 | card.name, games_percentage)) 227 | continue 228 | 229 | cards[card] = count 230 | 231 | cards_count = sum(list(cards.values())) 232 | if cards_count == 30: 233 | break 234 | elif cards_count > 30: 235 | cards.subtract([card]) 236 | break 237 | 238 | total_count = sum(cards.values()) 239 | if total_count < 30: 240 | logging.warning('推荐卡牌数量不足,仅为 {} 张!'.format(total_count)) 241 | 242 | return Counter(dict(filter(lambda x: x[1] > 0, cards.items()))) 243 | 244 | @property 245 | def career(self): 246 | return self._career 247 | 248 | @career.setter 249 | # TODO: 考虑做成公共的 250 | def career(self, value): 251 | if not value: 252 | raise ValueError('career 不可为空') 253 | if isinstance(value, Career): 254 | career = value 255 | elif isinstance(value, str): 256 | career = CAREERS.search(value) 257 | else: 258 | raise TypeError('career 不支持 {} 类型的数值'.format(type(value).__name__)) 259 | 260 | if career in (CAREERS.get('NEUTRAL'), CAREERS.get('DREAM')): 261 | raise ValueError('不能为该职业: {}'.format(career.name)) 262 | 263 | if not career: 264 | raise ValueError('未找到该职业: {}'.format(value)) 265 | 266 | self._career = career 267 | logging.info('设置职业为: {}'.format(career.name)) 268 | 269 | def __setattr__(self, key, value): 270 | super(DeckGenerator, self).__setattr__(key, value) 271 | if key in ('career', 'decks', 'mode') and self.cards_stats: 272 | self._gen_cards_stats() 273 | 274 | def _gen_cards_stats(self): 275 | decks = list(filter(lambda x: x.games, self.decks)) 276 | self.decks = Decks(decks) 277 | 278 | cards_stats, self.top_decks = self.decks.career_cards_stats( 279 | career=self.career, mode=self.mode, top_win_rate_percentage=0.1) 280 | 281 | self.top_decks_total_games = sum(map(lambda x: x.games, self.top_decks)) 282 | 283 | self.cards_stats = list(cards_stats.items()) 284 | self.cards_stats.sort(key=lambda x: x[1]['avg_win_rate'], reverse=True) 285 | 286 | def add_include(self, card, count=1): 287 | self.include.update({card: count}) 288 | 289 | def add_exclude(self, card, count=1): 290 | self.exclude.update({card: count}) 291 | 292 | def remove_include(self, card, count=1): 293 | self.include.subtract({card: count}) 294 | 295 | def remove_exclude(self, card, count=1): 296 | self.exclude.subtract({card: count}) 297 | 298 | 299 | def print_cards(cards, return_text_only=False, sep=' ', rarity=True): 300 | """ 301 | 但法力值从小到大打印卡牌列表 302 | :param cards: 卡牌 list 或 Counter 303 | :param return_text_only: 选项,仅返回文本 304 | :param sep: 卡牌名称和数量之间的分隔符 305 | """ 306 | 307 | if isinstance(cards, list): 308 | cards = Counter(cards) 309 | elif not isinstance(cards, Counter): 310 | raise TypeError('cards 参数应为 list 或 Counter 类型') 311 | 312 | cards = list(cards.items()) 313 | cards.sort(key=lambda x: x[0].name) 314 | cards.sort(key=lambda x: x[0].cost or 0) 315 | 316 | text = list() 317 | for card, count in cards: 318 | line = '{}{}{}'.format(card.name, sep, count) 319 | if rarity and card.rarity not in ('FREE', 'COMMON'): 320 | line = '({}) {}'.format(card.rarity[0], line) 321 | text.append(line) 322 | text = '\n'.join(text) 323 | 324 | if return_text_only: 325 | return text 326 | else: 327 | print(text) 328 | 329 | 330 | def cards_to_csv(save_path, cards=None): 331 | 332 | """ 333 | 将卡牌保存为 CSV 文件,方便使用 Excel 等工具进行分析 334 | :param cards: cards 对象 335 | :param save_path: 保存路径,例如 cards.csv 336 | """ 337 | 338 | if cards is None: 339 | cards = Cards() 340 | 341 | # 仅列出相对常用的字段 342 | fields = [ 343 | 'id', 'name', 'text', 'cost', 'overload', 'type', 'race', 344 | 'careers', 'multiClassGroup', 'set', 'collectible', 345 | 'rarity', 'dust', 'howToEarn', 'howToEarnGolden', 346 | 'health', 'attack', 'durability', 'spellDamage', 347 | ] 348 | 349 | with open(save_path, 'w') as f: 350 | writer = csv.writer(f) 351 | writer.writerow(fields) 352 | for card in cards: 353 | row = list() 354 | for field in fields: 355 | field = getattr(card, field) 356 | if isinstance(field, (list, tuple, set)): 357 | field = ', '.join(list(map(str, field))) 358 | elif isinstance(field, type(None)): 359 | field = '' 360 | elif not isinstance(field, (str, int, float)): 361 | field = str(field) 362 | row.append(field) 363 | writer.writerow(row) 364 | -------------------------------------------------------------------------------- /hsdata/core.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding: utf-8 3 | 4 | """ 5 | 核心模块,包含所有基本类 6 | ~~~~~~~~~~~~~~~~~~~~~ 7 | 8 | 包括: 9 | 10 | * Career: 单个职业 11 | * Careers: 职业合集,附带一些实用的方法 12 | * Card: 单张卡牌 13 | * Cards: 卡牌合集,附带一些实用的方法 14 | * Deck: 单个卡组 15 | * Decks: 卡组合集,附带一些实用的方法 16 | 17 | """ 18 | 19 | import json 20 | import logging 21 | import os 22 | import re 23 | import webbrowser 24 | from collections import Counter 25 | from copy import deepcopy 26 | from datetime import datetime, timedelta 27 | 28 | import requests 29 | 30 | DATA_DIR = 'data' 31 | 32 | MODE_STANDARD = 'STANDARD' 33 | MODE_WILD = 'WILD' 34 | 35 | DATE_TIME_FORMAT = '%Y-%m-%d %H:%M:%S' 36 | 37 | CARDS_SOURCE_URL = 'https://api.hearthstonejson.com/v1/' 38 | 39 | PACKAGE_DIR = os.path.dirname(os.path.realpath(__file__)) 40 | 41 | 42 | class Career: 43 | def __init__(self, class_name): 44 | self.class_name = class_name 45 | 46 | @property 47 | def name(self): 48 | """ 49 | 获取当前职业在当前主语言中的名称 50 | :return: 职业名称 51 | """ 52 | 53 | try: 54 | return CAREER_NAMES[self.class_name] 55 | except (TypeError, KeyError): 56 | return self.class_name 57 | 58 | @property 59 | def heroes(self): 60 | return Careers.CAREER_HEROES.get(self.class_name, list()) 61 | 62 | def __repr__(self): 63 | return '<{}: {} ({})>'.format( 64 | self.__class__.__name__, 65 | self.name, 66 | self.class_name) 67 | 68 | def __str__(self): 69 | return self.name 70 | 71 | def __eq__(self, other): 72 | if isinstance(other, Career): 73 | return self.class_name == other.class_name 74 | 75 | def __hash__(self): 76 | return hash('<__hsdata.Career__: class_name="{}">'.format(self.class_name)) 77 | 78 | def can_have(self, card): 79 | return self in card.careers 80 | 81 | 82 | class Careers(list): 83 | """ 84 | 职业合集,附带一些实用的方法 85 | """ 86 | 87 | CLASS_NAMES = ( 88 | 'HUNTER', 'PRIEST', 'SHAMAN', 89 | 'ROGUE', 'DRUID', 'PALADIN', 90 | 'MAGE', 'WARRIOR', 'WARLOCK', 91 | 'NEUTRAL', 92 | 'DREAM', 93 | ) 94 | 95 | # 各职业的英雄名称,将在 Cards.load() 时被添加 96 | CAREER_HEROES = dict() 97 | 98 | def __init__(self): 99 | super(Careers, self).__init__() 100 | 101 | self._index = dict() 102 | for class_name in self.CLASS_NAMES: 103 | career = Career(class_name) 104 | self.append(career) 105 | 106 | def append(self, career): 107 | self._index[career.class_name] = career 108 | return super(Careers, self).append(career) 109 | 110 | def clear(self): 111 | self._index.clear() 112 | return super(Careers, self).clear() 113 | 114 | def get(self, class_name): 115 | """ 116 | 根据 class_name 获取职业 117 | :param class_name: 可以理解为职业的 ID 118 | :return: 单个职业 119 | """ 120 | return self._index.get(class_name) 121 | 122 | def search(self, keywords): 123 | """ 124 | 根据关键词搜索职业,将在 class_name,职业名称,英雄名称 中进行搜索 125 | :param keywords: 关键词,可以是列表或字串 126 | :return: 单个职业 127 | """ 128 | 129 | if not keywords: 130 | return self.get('NEUTRAL') 131 | 132 | # 需要载入卡牌来填充各职业的英雄关键词 133 | CARDS.load_if_empty() 134 | 135 | if isinstance(keywords, str): 136 | keywords = _split_keywords(keywords) 137 | 138 | for career in self: 139 | if _all_keywords_in_text(keywords, career.class_name): 140 | return career 141 | 142 | for career in self: 143 | if _all_keywords_in_text(keywords, career.name): 144 | return career 145 | 146 | for career in self: 147 | for hero in career.heroes: 148 | if _all_keywords_in_text(keywords, hero): 149 | return career 150 | 151 | @property 152 | def basic(self): 153 | return self[:9] 154 | 155 | 156 | class Card: 157 | """单张卡牌""" 158 | 159 | def __init__(self): 160 | self.id = None 161 | self.type = None 162 | self.set = None 163 | self.name = None 164 | self.playerClass = None 165 | self.text = None 166 | self.cost = None 167 | self.rarity = None 168 | self.health = None 169 | self.attack = None 170 | self.artist = None 171 | self.collectible = None 172 | self.flavor = None 173 | self.mechanics = None 174 | self.dust = None 175 | self.playRequirements = None 176 | self.race = None 177 | self.howToEarnGolden = None 178 | self.howToEarn = None 179 | self.faction = None 180 | self.durability = None 181 | self.entourage = None 182 | self.targetingArrowText = None 183 | self.overload = None 184 | self.spellDamage = None 185 | # 201612: 加基森版本新增了3个字段 186 | self.classes = None 187 | self.multiClassGroup = None 188 | self.collectionText = None 189 | 190 | @property 191 | def career(self): 192 | return CAREERS.get(self.playerClass) 193 | 194 | @property 195 | def careers(self): 196 | if self.classes: 197 | return list(map(lambda x: CAREERS.get(x), self.classes)) 198 | elif self.career == CAREERS.get('NEUTRAL'): 199 | return CAREERS.basic 200 | elif self.career: 201 | return [self.career] 202 | else: 203 | return list() 204 | 205 | @property 206 | def mode(self): 207 | if self.set in EXPIRED_SETS: 208 | return MODE_WILD 209 | else: 210 | return MODE_STANDARD 211 | 212 | def __repr__(self): 213 | return '<{}: {} ({})>'.format(self.__class__.__name__, self.name, self.id) 214 | 215 | def __eq__(self, other): 216 | return self.id.lower() == other.id.lower() 217 | 218 | def __hash__(self): 219 | return hash('<__hs.Card__: name="{}", id="{}">'.format(self.name, self.id)) 220 | 221 | 222 | class Cards(list): 223 | """ 224 | 卡牌合集,附带一些实用的方法 225 | """ 226 | 227 | def __init__(self, json_path=None, update_if_not_found=True, lazy_load=False): 228 | """ 229 | :param json_path: 读取或保存的JSON路径 230 | :param update_if_not_found: 选项,若上述文件不存在,则自动更新 231 | :param lazy_load: 选项,若为True,则在初始化时不载入实际数据,直到调用 get 或 search 方法 232 | """ 233 | super(Cards, self).__init__() 234 | 235 | if not json_path: 236 | json_path = os.path.join(DATA_DIR, CARDS_JSON_FILE_NAME) 237 | self.json_path = json_path 238 | 239 | self._index = dict() 240 | 241 | self.update_if_not_found = update_if_not_found 242 | 243 | if not lazy_load: 244 | self.load() 245 | 246 | def append(self, card): 247 | self._index[card.id] = card 248 | return super(Cards, self).append(card) 249 | 250 | def clear(self): 251 | self._index.clear() 252 | return super(Cards, self).clear() 253 | 254 | def load_if_empty(self, json_path=None): 255 | """ 256 | 避免在模块初始化时执行载入(会产生文件) 257 | """ 258 | if not self: 259 | self.load(json_path) 260 | 261 | def load(self, json_path=None): 262 | """ 263 | 载入本地的卡牌数据 264 | :param json_path: 文件路径 265 | """ 266 | 267 | if not json_path: 268 | json_path = self.json_path 269 | 270 | if not os.path.isfile(json_path): 271 | if self.update_if_not_found: 272 | logging.info('未找到卡牌数据,将自动获取最新的数据') 273 | self.update(json_path) 274 | else: 275 | logging.warning('未找到卡牌数据,请使用 Cards().update() 获取最新的数据') 276 | return 277 | 278 | with open(json_path) as f: 279 | json_data = json.load(f) 280 | 281 | self.clear() 282 | 283 | logging.info('载入卡牌数据 {}'.format(json_path)) 284 | 285 | for data in json_data: 286 | card = Card() 287 | 288 | for k, v in data.items(): 289 | setattr(card, k, v) 290 | 291 | # HearthstoneJSON 中不可收集的卡牌没有设置 collectible 属性,添加该属性 292 | if card.collectible is None: 293 | card.collectible = False 294 | 295 | if card.type == 'HERO': 296 | # 将发现的英雄添加到 Careers.CAREER_HEROES 中 297 | if card.playerClass not in Careers.CAREER_HEROES: 298 | Careers.CAREER_HEROES[card.playerClass] = list() 299 | if card.name not in Careers.CAREER_HEROES[card.playerClass]: 300 | Careers.CAREER_HEROES[card.playerClass].append(card.name) 301 | 302 | self.append(card) 303 | 304 | def update(self, json_path=None, hs_version_code=None): 305 | """ 306 | 获取卡牌数据,保存为JSON,并返回一个新的 Cards 对象 307 | :param json_path: 保存路径 308 | :param hs_version_code: 炉石版本号,不填写则自动获取最新的 309 | :return: 新的 Cards 对象 310 | """ 311 | 312 | if not json_path: 313 | json_path = self.json_path 314 | 315 | logging.info('开始更新卡牌数据,将保存到 {}'.format(json_path)) 316 | s = requests.Session() 317 | 318 | if not hs_version_code: 319 | r = s.get(CARDS_SOURCE_URL) 320 | r.raise_for_status() 321 | hs_version_codes = re.findall(r'href="/v1/(\d+)/all/"', r.text) 322 | hs_version_code = max(list(map(int, hs_version_codes))) 323 | logging.info('找到最新的对应炉石版本号: {}'.format(hs_version_code)) 324 | 325 | json_url = '{}{}/{}/cards.json'.format( 326 | CARDS_SOURCE_URL, hs_version_code, MAIN_LANGUAGE) 327 | 328 | logging.info('正在下载卡牌数据') 329 | r = s.get(json_url) 330 | r.raise_for_status() 331 | 332 | # 校验JSON 333 | r.json() 334 | 335 | _prepare_dir(json_path) 336 | 337 | with open(json_path, 'wb') as f: 338 | f.write(r.content) 339 | 340 | self.load(json_path) 341 | 342 | logging.info('卡牌数据更新完成') 343 | 344 | def get(self, card_id): 345 | """ 346 | 根据 ID 获取卡牌 347 | :param card_id: 卡牌 ID 348 | :return: 单张卡牌 349 | """ 350 | self.load_if_empty() 351 | return self._index.get(card_id) 352 | 353 | def search( 354 | self, 355 | in_name=None, in_text=None, career=None, 356 | cost=None, collectible=None, return_first=True 357 | ): 358 | """ 359 | 根据指定条件搜索卡牌 360 | :param in_name: 名称关键词 361 | :param in_text: 卡牌描述关键词 362 | :param career: 对应职业 363 | :param cost: 卡牌的法力消耗值 364 | :param collectible: 是否可收集 365 | :param return_first: 选项,只返回首个匹配的卡牌 366 | :return: 根据 return_first 参数返回 单个职业/None 或 列表 367 | """ 368 | 369 | self.load_if_empty() 370 | 371 | if in_name: 372 | name_keywords = _split_keywords(in_name) 373 | else: 374 | name_keywords = None 375 | 376 | if in_text: 377 | text_keywords = _split_keywords(in_text) 378 | else: 379 | text_keywords = None 380 | 381 | if career: 382 | career = get_career(career) 383 | 384 | found = None if return_first else list() 385 | 386 | for card in self: 387 | if name_keywords and not _all_keywords_in_text(name_keywords, card.name or ''): 388 | continue 389 | elif text_keywords and not _all_keywords_in_text(text_keywords, card.text or ''): 390 | continue 391 | elif career and not card.career == career: 392 | continue 393 | elif cost is not None and not card.cost == cost: 394 | continue 395 | elif collectible is not None and not card.collectible == collectible: 396 | continue 397 | else: 398 | if return_first: 399 | return card 400 | else: 401 | found.append(card) 402 | 403 | return found 404 | 405 | 406 | class Deck: 407 | source = None 408 | DECK_URL_TEMPLATE = None 409 | 410 | def __init__(self): 411 | self.name = '' 412 | self.id = '' 413 | 414 | self.career = None 415 | self.cards = Counter() 416 | 417 | self.games = 0 418 | self.wins = 0 419 | self.draws = 0 420 | 421 | @property 422 | def win_rate(self): 423 | if self.games: 424 | return self.wins / self.games 425 | 426 | @property 427 | def losses(self): 428 | if self.games: 429 | return self.games - (self.wins or 0) - (self.draws or 0) 430 | 431 | @property 432 | def mode(self): 433 | for card in self.cards: 434 | if card.set in EXPIRED_SETS: 435 | return MODE_WILD 436 | else: 437 | return MODE_STANDARD 438 | 439 | @property 440 | def url(self): 441 | if self.id: 442 | return self.DECK_URL_TEMPLATE.format(self.id) 443 | 444 | @property 445 | def crafting_cost(self): 446 | dust = 0 447 | for card, count in self.cards.items(): 448 | if card.dust: 449 | dust += card.dust[0] * count 450 | return dust 451 | 452 | def to_dict(self): 453 | """ 454 | 用于保存为JSON 455 | :return 字典对象 456 | """ 457 | dct = deepcopy(self.__dict__) 458 | dct['career'] = self.career.class_name 459 | 460 | cards_dict = dict() 461 | for card, count in self.cards.items(): 462 | cards_dict[card.id] = count 463 | dct['cards'] = cards_dict 464 | 465 | return dct 466 | 467 | def from_dict(self, dct, cards=None): 468 | """ 469 | 用于从JSON读取 470 | :param dct: 读取到的字典对象 471 | :param cards: 用于将卡牌ID转化为卡牌对象 472 | """ 473 | 474 | class_name = dct.pop('career') 475 | cards_dict = dct.pop('cards', dict()) 476 | self.career = CAREERS.get(class_name) 477 | 478 | if not cards: 479 | cards = CARDS 480 | 481 | for card_id, count in cards_dict.items(): 482 | self.cards[cards.get(card_id)] = count 483 | 484 | for k, v in dct.items(): 485 | setattr(self, k, v) 486 | 487 | def open(self): 488 | if self.url: 489 | webbrowser.open(self.url) 490 | else: 491 | logging.warning('无法在浏览器中打开{},缺少URL'.format(self)) 492 | 493 | def __repr__(self): 494 | return '<{}: {}>'.format(self.__class__.__name__, self.name) 495 | 496 | 497 | class Decks(list): 498 | deck_class = Deck 499 | 500 | def __init__( 501 | self, deck_list=None, json_path=None, auto_load=False, update_if_not_found=True, cards=None): 502 | """ 503 | :param deck_list: 一个 Deck 列表,用于直接转换为 Decks 对象 504 | :param cards: Cards 对象,用于将卡组内的卡牌ID转化为Card对象 505 | """ 506 | super(Decks, self).__init__() 507 | 508 | self.source = self.deck_class.source 509 | 510 | self._index = dict() 511 | 512 | if deck_list: 513 | self.extend(deck_list) 514 | 515 | if not json_path: 516 | json_path = os.path.join(DATA_DIR, 'DECKS_{}.json'.format(self.source)) 517 | 518 | self.json_path = json_path 519 | self.update_if_not_found = update_if_not_found 520 | 521 | if not cards: 522 | cards = CARDS 523 | self.cards = cards 524 | self.cards.load_if_empty() 525 | 526 | if auto_load: 527 | self.load(self.json_path) 528 | 529 | def append(self, deck): 530 | if not isinstance(deck, Deck): 531 | raise TypeError('{} 只能追加 Deck 对象'.format(self.__class__.__name__)) 532 | self._index[deck.id] = deck 533 | return super(Decks, self).append(deck) 534 | 535 | def extend(self, decks): 536 | for deck in decks: 537 | if not isinstance(deck, Deck): 538 | raise TypeError('应为 Deck 对象,得到了 {}'.format(type(deck).__name__)) 539 | self._index[deck.id] = deck 540 | return super(Decks, self).extend(decks) 541 | 542 | def remove(self, deck): 543 | del self._index[deck.id] 544 | return super(Decks, self).remove(deck) 545 | 546 | def clear(self): 547 | self._index.clear() 548 | return super(Decks, self).clear() 549 | 550 | def update(self, json_path=None): 551 | """ 552 | 从数据源获取卡组数据 553 | :param json_path: 保存路径 554 | """ 555 | 556 | # 具体的获取过程由子类实现 557 | pass 558 | 559 | def save(self, json_path=None): 560 | """ 561 | 将卡组合集保存为JSON文件 562 | :param json_path: 保存路径 563 | """ 564 | 565 | if not json_path: 566 | json_path = self.json_path 567 | 568 | save_list = list() 569 | 570 | for deck in self: 571 | save_list.append(deck.to_dict()) 572 | 573 | _prepare_dir(json_path) 574 | with open(json_path, 'w') as f: 575 | json.dump(save_list, f, ensure_ascii=False) 576 | 577 | logging.info('已保存到 {}'.format(json_path)) 578 | 579 | def load(self, json_path=None): 580 | """ 581 | 从JSON文件中载入卡组合集 582 | :param json_path: JSON文件路径 583 | """ 584 | 585 | if not json_path: 586 | json_path = self.json_path 587 | 588 | if not os.path.isfile(json_path): 589 | if self.update_if_not_found: 590 | logging.info('未找到卡组数据,将自动获取最新的数据') 591 | self.update(json_path) 592 | return 593 | 594 | logging.info('载入卡组数据 {}'.format(json_path)) 595 | 596 | with open(json_path) as f: 597 | data_list = json.load(f) 598 | 599 | self.clear() 600 | for deck_dict in data_list: 601 | deck = self.deck_class() 602 | deck.from_dict(deck_dict, self.cards) 603 | self.append(deck) 604 | 605 | def get(self, deck_id): 606 | return self._index.get(deck_id) 607 | 608 | def search( 609 | self, 610 | career=None, 611 | mode=MODE_STANDARD, 612 | min_win_rate=0.0, 613 | min_games=0, 614 | win_rate_top_n=None, 615 | ): 616 | """ 617 | 在当前卡组合集中搜索符合条件的卡组 618 | :param career: 职业 619 | :param mode: 模式,可以是 MODE_STANDARD 或 MODE_WILD 620 | :param min_win_rate: 最低胜率 621 | :param min_games: 最少游戏次数 622 | :param win_rate_top_n: 将结果按胜率倒排,并截取其中的前 n 个,若为负数则返回所有卡组 623 | :return: 符合条件的卡组列表 624 | """ 625 | 626 | if career: 627 | career = get_career(career) 628 | 629 | def match(deck): 630 | if (not career or deck.career == career) \ 631 | and (not mode or deck.mode == mode) \ 632 | and ((deck.win_rate or 0) >= min_win_rate) \ 633 | and ((deck.games or 0) >= min_games): 634 | return True 635 | 636 | found = list(filter(match, self)) 637 | 638 | if win_rate_top_n: 639 | found.sort(key=lambda x: x.win_rate or 0, reverse=True) 640 | if win_rate_top_n > 0: 641 | found = found[:win_rate_top_n] 642 | 643 | return Decks(found) 644 | 645 | @property 646 | def total_games(self): 647 | return sum(map(lambda x: x.games or 0, self)) 648 | 649 | @property 650 | def total_wins(self): 651 | return sum(map(lambda x: x.wins or 0, self)) 652 | 653 | @property 654 | def avg_win_rate(self): 655 | try: 656 | return self.total_wins / self.total_games 657 | except ZeroDivisionError: 658 | pass 659 | 660 | def career_cards_stats( 661 | self, career, mode=MODE_STANDARD, 662 | min_games=1000, top_win_rate_percentage=0.1 663 | ): 664 | """ 665 | 统计指定职业和模式的卡牌数据,可在组建卡组时作为参考 666 | 1. 选取当前职业和模式中符合 top_win_rate_percentage, min_games 条件的所有卡组 667 | 2. 选取上述卡组中所用到的卡牌 668 | 3. 统计这些卡牌在上述卡组中的表现数据 669 | 670 | 表现数据中包括 671 | avg_count: (在top_decks中的)平均使用数量 672 | avg_win_rate: 平均胜率(总胜率次数/总游戏次数) 673 | total_games: 总游戏次数 674 | used_in_decks: 用到该卡牌的卡组数 675 | 676 | :param career: 职业 677 | :param mode: 模式,可以是 MODE_STANDARD 或 MODE_WILD 678 | :param min_games: 最少游戏次数 679 | :param top_win_rate_percentage: 选取胜率最高的 n% 卡组,0.1 表示 10% 680 | """ 681 | 682 | career = get_career(career) 683 | 684 | top_decks = self.search( 685 | career=career, mode=mode, 686 | min_games=min_games, win_rate_top_n=-1) 687 | top_decks = top_decks[:round(len(top_decks) * top_win_rate_percentage)] 688 | 689 | "total_count, total_games, total_wins, used_in_decks, avg_count, avg_win_rate" 690 | 691 | cards_stats = dict() 692 | for deck in top_decks: 693 | for card, count in deck.cards.items(): 694 | if card not in cards_stats: 695 | cards_stats[card] = dict( 696 | total_count=0, 697 | total_games=0, 698 | total_wins=0, 699 | used_in_decks=0, 700 | ) 701 | cards_stats[card]['used_in_decks'] += 1 702 | cards_stats[card]['total_count'] += count 703 | cards_stats[card]['total_games'] += deck.games or 0 704 | cards_stats[card]['total_wins'] += deck.wins or 0 705 | 706 | for card, stats in cards_stats.items(): 707 | stats['avg_count'] = stats['total_count'] / stats['used_in_decks'] 708 | if stats['total_games']: 709 | stats['avg_win_rate'] = stats['total_wins'] / stats['total_games'] 710 | else: 711 | stats['avg_win_rate'] = None 712 | 713 | return cards_stats, top_decks 714 | 715 | def __getitem__(self, item): 716 | ret = super(Decks, self).__getitem__(item) 717 | if isinstance(item, slice): 718 | decks = Decks() 719 | decks.extend(ret) 720 | ret = decks 721 | return ret 722 | 723 | 724 | with open(os.path.join(PACKAGE_DIR, 'career_names.json')) as fp: 725 | CAREER_NAMES_ALL_LANGUAGES = json.load(fp) 726 | 727 | 728 | def set_data_dir(path): 729 | global DATA_DIR, CARDS 730 | DATA_DIR = path 731 | CARDS = Cards(lazy_load=True) 732 | 733 | 734 | def set_main_language(language): 735 | """ 736 | 设置主要语言,包括职业和卡牌的描述文本 737 | Set main language, including description texts for Career and Card objects 738 | :param language: deDE, enUS, esES, esMX, frFR, itIT, jaJP, koKR, plPL, ptBR, ruRU, thTH, zhCN, zhTW 739 | """ 740 | 741 | global MAIN_LANGUAGE, CARDS_JSON_FILE_NAME, CAREER_NAMES, CAREERS, CARDS 742 | 743 | CAREER_NAMES = CAREER_NAMES_ALL_LANGUAGES.get(language) 744 | if not CAREER_NAMES: 745 | raise ValueError('language: should in {}'.format( 746 | ', '.join(CAREER_NAMES_ALL_LANGUAGES.keys()))) 747 | 748 | MAIN_LANGUAGE = language 749 | CARDS_JSON_FILE_NAME = 'CARDS_{}.json'.format(language) 750 | CAREERS = Careers() 751 | CARDS = Cards(lazy_load=True) 752 | 753 | 754 | def _split_keywords(keywords): 755 | if isinstance(keywords, str): 756 | keywords = re.findall(r'\w+', keywords) 757 | return keywords 758 | 759 | 760 | def _all_keywords_in_text(keywords, text): 761 | if isinstance(keywords, str): 762 | keywords = _split_keywords(keywords) 763 | for keyword in keywords: 764 | if keyword.lower() not in text.lower(): 765 | return False 766 | else: 767 | return True 768 | 769 | 770 | def _prepare_dir(path): 771 | file_dir = os.path.dirname(path) 772 | if file_dir: 773 | os.makedirs(file_dir, exist_ok=True) 774 | 775 | 776 | def get_career(keywords_or_career=None): 777 | """ 778 | 获取指定职业(Career)对象 779 | :param keywords_or_career: 指定职业的关键词或Career对象 780 | :return: 职业(Career)对象 781 | """ 782 | if isinstance(keywords_or_career, Career): 783 | career = keywords_or_career 784 | elif isinstance(keywords_or_career, (str, list, type(None))): 785 | career = CAREERS.search(keywords_or_career) 786 | else: 787 | raise TypeError('不支持使用 {} 作为参数'.format( 788 | type(keywords_or_career).__name__)) 789 | 790 | return career 791 | 792 | 793 | def can_have(career, card): 794 | """ 795 | 判断一个职业是否可拥有一张卡牌 796 | :param career: 职业 797 | :param card: 卡牌 798 | :return: True 表示可拥有;False 反之 799 | """ 800 | career = get_career(career) 801 | return career in card.careers 802 | 803 | 804 | def days_ago(n): 805 | return datetime.today() - timedelta(days=n) 806 | 807 | 808 | MAIN_LANGUAGE = 'zhCN' 809 | CARDS_JSON_FILE_NAME = 'CARDS_{}.json'.format(MAIN_LANGUAGE) 810 | CAREER_NAMES = CAREER_NAMES_ALL_LANGUAGES.get(MAIN_LANGUAGE) 811 | 812 | CAREERS = Careers() 813 | CARDS = Cards(lazy_load=True) 814 | 815 | # 用于判断卡组模式:若卡组中包含已过期卡包的卡牌,则认为是狂野模式 816 | # 这个列表需要跟随游戏不断更新! 817 | EXPIRED_SETS = ('REWARD', 'NAXX', 'GVG') 818 | --------------------------------------------------------------------------------