├── .gitignore ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── README.md └── scraper.py /.gitignore: -------------------------------------------------------------------------------- 1 | ### Python ### 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Scrapy stuff: 59 | .scrapy 60 | 61 | # Sphinx documentation 62 | docs/_build/ 63 | 64 | # PyBuilder 65 | target/ 66 | 67 | # pyenv 68 | .python-version 69 | 70 | # pipenv 71 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 72 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 73 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 74 | # install all needed dependencies. 75 | #Pipfile.lock 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # Spyder project settings 84 | .spyderproject 85 | .spyproject 86 | 87 | # Rope project settings 88 | .ropeproject 89 | 90 | # Mr Developer 91 | .mr.developer.cfg 92 | .project 93 | .pydevproject 94 | 95 | # mkdocs documentation 96 | /site 97 | 98 | # mypy 99 | .mypy_cache/ 100 | .dmypy.json 101 | dmypy.json 102 | 103 | # Pyre type checker 104 | .pyre/ 105 | 106 | ### VisualStudioCode ### 107 | .vscode/* 108 | !.vscode/settings.json 109 | !.vscode/tasks.json 110 | !.vscode/launch.json 111 | !.vscode/extensions.json 112 | 113 | ### VisualStudioCode Patch ### 114 | # Ignore all local history of files 115 | .history 116 | 117 | ### Windows ### 118 | # Windows thumbnail cache files 119 | Thumbs.db 120 | Thumbs.db:encryptable 121 | ehthumbs.db 122 | ehthumbs_vista.db 123 | 124 | # Dump file 125 | *.stackdump 126 | 127 | # Folder config file 128 | [Dd]esktop.ini 129 | 130 | # Recycle Bin used on file shares 131 | $RECYCLE.BIN/ 132 | 133 | # Windows Installer files 134 | *.cab 135 | *.msi 136 | *.msix 137 | *.msm 138 | *.msp 139 | 140 | # Windows shortcuts 141 | *.lnk 142 | 143 | #debug 144 | debug.log -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 mikeku1116 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | autopep8 = "*" 8 | 9 | [packages] 10 | beautifulsoup4 = "*" 11 | requests = "*" 12 | pylint = "*" 13 | lxml = "*" 14 | pymysql = "*" 15 | openpyxl = "*" 16 | gspread = "*" 17 | oauth2client = "*" 18 | selenium = "*" 19 | webdriver-manager = "*" 20 | 21 | [requires] 22 | python_version = "3.8" 23 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "2f6e4df259e3c2bad7be930f474d05cda6f858cd3f7087e6b6778f7ce5a435ec" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.8" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "astroid": { 20 | "hashes": [ 21 | "sha256:2f4078c2a41bf377eea06d71c9d2ba4eb8f6b1af2135bec27bbbb7d8f12bb703", 22 | "sha256:bc58d83eb610252fd8de6363e39d4f1d0619c894b0ed24603b881c02e64c7386" 23 | ], 24 | "version": "==2.4.2" 25 | }, 26 | "beautifulsoup4": { 27 | "hashes": [ 28 | "sha256:73cc4d115b96f79c7d77c1c7f7a0a8d4c57860d1041df407dd1aae7f07a77fd7", 29 | "sha256:a6237df3c32ccfaee4fd201c8f5f9d9df619b93121d01353a64a73ce8c6ef9a8", 30 | "sha256:e718f2342e2e099b640a34ab782407b7b676f47ee272d6739e60b8ea23829f2c" 31 | ], 32 | "index": "pypi", 33 | "version": "==4.9.1" 34 | }, 35 | "cachetools": { 36 | "hashes": [ 37 | "sha256:513d4ff98dd27f85743a8dc0e92f55ddb1b49e060c2d5961512855cda2c01a98", 38 | "sha256:bbaa39c3dede00175df2dc2b03d0cf18dd2d32a7de7beb68072d13043c9edb20" 39 | ], 40 | "version": "==4.1.1" 41 | }, 42 | "certifi": { 43 | "hashes": [ 44 | "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3", 45 | "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41" 46 | ], 47 | "version": "==2020.6.20" 48 | }, 49 | "chardet": { 50 | "hashes": [ 51 | "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", 52 | "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" 53 | ], 54 | "version": "==3.0.4" 55 | }, 56 | "colorama": { 57 | "hashes": [ 58 | "sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff", 59 | "sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1" 60 | ], 61 | "markers": "sys_platform == 'win32'", 62 | "version": "==0.4.3" 63 | }, 64 | "configparser": { 65 | "hashes": [ 66 | "sha256:2ca44140ee259b5e3d8aaf47c79c36a7ab0d5e94d70bd4105c03ede7a20ea5a1", 67 | "sha256:cffc044844040c7ce04e9acd1838b5f2e5fa3170182f6fda4d2ea8b0099dbadd" 68 | ], 69 | "version": "==5.0.0" 70 | }, 71 | "crayons": { 72 | "hashes": [ 73 | "sha256:bd33b7547800f2cfbd26b38431f9e64b487a7de74a947b0fafc89b45a601813f", 74 | "sha256:e73ad105c78935d71fe454dd4b85c5c437ba199294e7ffd3341842bc683654b1" 75 | ], 76 | "version": "==0.4.0" 77 | }, 78 | "et-xmlfile": { 79 | "hashes": [ 80 | "sha256:614d9722d572f6246302c4491846d2c393c199cfa4edc9af593437691683335b" 81 | ], 82 | "version": "==1.0.1" 83 | }, 84 | "google-auth": { 85 | "hashes": [ 86 | "sha256:982e1f82cace752134660b4c0ff660761b32146a55abb3ad6d225529012af87c", 87 | "sha256:f2498ad9cac3d2942d6c509ba18c4639656b366681881a1805f44f2a0c2d46f1" 88 | ], 89 | "version": "==1.21.0" 90 | }, 91 | "google-auth-oauthlib": { 92 | "hashes": [ 93 | "sha256:88d2cd115e3391eb85e1243ac6902e76e77c5fe438b7276b297fbe68015458dd", 94 | "sha256:a92a0f6f41a0fb6138454fbc02674e64f89d82a244ea32f98471733c8ef0e0e1" 95 | ], 96 | "version": "==0.4.1" 97 | }, 98 | "gspread": { 99 | "hashes": [ 100 | "sha256:273da28275eb8dc664b1ca944e59255949d75ac3cac62d65797003dbb419a2cd", 101 | "sha256:e04f1a6267b3929fc1600424c5ec83906d439672cafdd61a9d5b916a139f841c" 102 | ], 103 | "index": "pypi", 104 | "version": "==3.6.0" 105 | }, 106 | "httplib2": { 107 | "hashes": [ 108 | "sha256:8af66c1c52c7ffe1aa5dc4bcd7c769885254b0756e6e69f953c7f0ab49a70ba3", 109 | "sha256:ca2914b015b6247791c4866782fa6042f495b94401a0f0bd3e1d6e0ba2236782" 110 | ], 111 | "version": "==0.18.1" 112 | }, 113 | "idna": { 114 | "hashes": [ 115 | "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", 116 | "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" 117 | ], 118 | "version": "==2.10" 119 | }, 120 | "isort": { 121 | "hashes": [ 122 | "sha256:60a1b97e33f61243d12647aaaa3e6cc6778f5eb9f42997650f1cc975b6008750", 123 | "sha256:d488ba1c5a2db721669cc180180d5acf84ebdc5af7827f7aaeaa75f73cf0e2b8" 124 | ], 125 | "version": "==5.4.2" 126 | }, 127 | "jdcal": { 128 | "hashes": [ 129 | "sha256:1abf1305fce18b4e8aa248cf8fe0c56ce2032392bc64bbd61b5dff2a19ec8bba", 130 | "sha256:472872e096eb8df219c23f2689fc336668bdb43d194094b5cc1707e1640acfc8" 131 | ], 132 | "version": "==1.4.1" 133 | }, 134 | "lazy-object-proxy": { 135 | "hashes": [ 136 | "sha256:0c4b206227a8097f05c4dbdd323c50edf81f15db3b8dc064d08c62d37e1a504d", 137 | "sha256:194d092e6f246b906e8f70884e620e459fc54db3259e60cf69a4d66c3fda3449", 138 | "sha256:1be7e4c9f96948003609aa6c974ae59830a6baecc5376c25c92d7d697e684c08", 139 | "sha256:4677f594e474c91da97f489fea5b7daa17b5517190899cf213697e48d3902f5a", 140 | "sha256:48dab84ebd4831077b150572aec802f303117c8cc5c871e182447281ebf3ac50", 141 | "sha256:5541cada25cd173702dbd99f8e22434105456314462326f06dba3e180f203dfd", 142 | "sha256:59f79fef100b09564bc2df42ea2d8d21a64fdcda64979c0fa3db7bdaabaf6239", 143 | "sha256:8d859b89baf8ef7f8bc6b00aa20316483d67f0b1cbf422f5b4dc56701c8f2ffb", 144 | "sha256:9254f4358b9b541e3441b007a0ea0764b9d056afdeafc1a5569eee1cc6c1b9ea", 145 | "sha256:9651375199045a358eb6741df3e02a651e0330be090b3bc79f6d0de31a80ec3e", 146 | "sha256:97bb5884f6f1cdce0099f86b907aa41c970c3c672ac8b9c8352789e103cf3156", 147 | "sha256:9b15f3f4c0f35727d3a0fba4b770b3c4ebbb1fa907dbcc046a1d2799f3edd142", 148 | "sha256:a2238e9d1bb71a56cd710611a1614d1194dc10a175c1e08d75e1a7bcc250d442", 149 | "sha256:a6ae12d08c0bf9909ce12385803a543bfe99b95fe01e752536a60af2b7797c62", 150 | "sha256:ca0a928a3ddbc5725be2dd1cf895ec0a254798915fb3a36af0964a0a4149e3db", 151 | "sha256:cb2c7c57005a6804ab66f106ceb8482da55f5314b7fcb06551db1edae4ad1531", 152 | "sha256:d74bb8693bf9cf75ac3b47a54d716bbb1a92648d5f781fc799347cfc95952383", 153 | "sha256:d945239a5639b3ff35b70a88c5f2f491913eb94871780ebfabb2568bd58afc5a", 154 | "sha256:eba7011090323c1dadf18b3b689845fd96a61ba0a1dfbd7f24b921398affc357", 155 | "sha256:efa1909120ce98bbb3777e8b6f92237f5d5c8ea6758efea36a473e1d38f7d3e4", 156 | "sha256:f3900e8a5de27447acbf900b4750b0ddfd7ec1ea7fbaf11dfa911141bc522af0" 157 | ], 158 | "version": "==1.4.3" 159 | }, 160 | "lxml": { 161 | "hashes": [ 162 | "sha256:05a444b207901a68a6526948c7cc8f9fe6d6f24c70781488e32fd74ff5996e3f", 163 | "sha256:08fc93257dcfe9542c0a6883a25ba4971d78297f63d7a5a26ffa34861ca78730", 164 | "sha256:107781b213cf7201ec3806555657ccda67b1fccc4261fb889ef7fc56976db81f", 165 | "sha256:121b665b04083a1e85ff1f5243d4a93aa1aaba281bc12ea334d5a187278ceaf1", 166 | "sha256:1fa21263c3aba2b76fd7c45713d4428dbcc7644d73dcf0650e9d344e433741b3", 167 | "sha256:2b30aa2bcff8e958cd85d907d5109820b01ac511eae5b460803430a7404e34d7", 168 | "sha256:4b4a111bcf4b9c948e020fd207f915c24a6de3f1adc7682a2d92660eb4e84f1a", 169 | "sha256:5591c4164755778e29e69b86e425880f852464a21c7bb53c7ea453bbe2633bbe", 170 | "sha256:59daa84aef650b11bccd18f99f64bfe44b9f14a08a28259959d33676554065a1", 171 | "sha256:5a9c8d11aa2c8f8b6043d845927a51eb9102eb558e3f936df494e96393f5fd3e", 172 | "sha256:5dd20538a60c4cc9a077d3b715bb42307239fcd25ef1ca7286775f95e9e9a46d", 173 | "sha256:74f48ec98430e06c1fa8949b49ebdd8d27ceb9df8d3d1c92e1fdc2773f003f20", 174 | "sha256:786aad2aa20de3dbff21aab86b2fb6a7be68064cbbc0219bde414d3a30aa47ae", 175 | "sha256:7ad7906e098ccd30d8f7068030a0b16668ab8aa5cda6fcd5146d8d20cbaa71b5", 176 | "sha256:80a38b188d20c0524fe8959c8ce770a8fdf0e617c6912d23fc97c68301bb9aba", 177 | "sha256:8f0ec6b9b3832e0bd1d57af41f9238ea7709bbd7271f639024f2fc9d3bb01293", 178 | "sha256:92282c83547a9add85ad658143c76a64a8d339028926d7dc1998ca029c88ea6a", 179 | "sha256:94150231f1e90c9595ccc80d7d2006c61f90a5995db82bccbca7944fd457f0f6", 180 | "sha256:9dc9006dcc47e00a8a6a029eb035c8f696ad38e40a27d073a003d7d1443f5d88", 181 | "sha256:a76979f728dd845655026ab991df25d26379a1a8fc1e9e68e25c7eda43004bed", 182 | "sha256:aa8eba3db3d8761db161003e2d0586608092e217151d7458206e243be5a43843", 183 | "sha256:bea760a63ce9bba566c23f726d72b3c0250e2fa2569909e2d83cda1534c79443", 184 | "sha256:c3f511a3c58676147c277eff0224c061dd5a6a8e1373572ac817ac6324f1b1e0", 185 | "sha256:c9d317efde4bafbc1561509bfa8a23c5cab66c44d49ab5b63ff690f5159b2304", 186 | "sha256:cc411ad324a4486b142c41d9b2b6a722c534096963688d879ea6fa8a35028258", 187 | "sha256:cdc13a1682b2a6241080745b1953719e7fe0850b40a5c71ca574f090a1391df6", 188 | "sha256:cfd7c5dd3c35c19cec59c63df9571c67c6d6e5c92e0fe63517920e97f61106d1", 189 | "sha256:e1cacf4796b20865789083252186ce9dc6cc59eca0c2e79cca332bdff24ac481", 190 | "sha256:e70d4e467e243455492f5de463b72151cc400710ac03a0678206a5f27e79ddef", 191 | "sha256:ecc930ae559ea8a43377e8b60ca6f8d61ac532fc57efb915d899de4a67928efd", 192 | "sha256:f161af26f596131b63b236372e4ce40f3167c1b5b5d459b29d2514bd8c9dc9ee" 193 | ], 194 | "index": "pypi", 195 | "version": "==4.5.2" 196 | }, 197 | "mccabe": { 198 | "hashes": [ 199 | "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", 200 | "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" 201 | ], 202 | "version": "==0.6.1" 203 | }, 204 | "oauth2client": { 205 | "hashes": [ 206 | "sha256:b8a81cc5d60e2d364f0b1b98f958dbd472887acaf1a5b05e21c28c31a2d6d3ac", 207 | "sha256:d486741e451287f69568a4d26d70d9acd73a2bbfa275746c535b4209891cccc6" 208 | ], 209 | "index": "pypi", 210 | "version": "==4.1.3" 211 | }, 212 | "oauthlib": { 213 | "hashes": [ 214 | "sha256:bee41cc35fcca6e988463cacc3bcb8a96224f470ca547e697b604cc697b2f889", 215 | "sha256:df884cd6cbe20e32633f1db1072e9356f53638e4361bef4e8b03c9127c9328ea" 216 | ], 217 | "version": "==3.1.0" 218 | }, 219 | "openpyxl": { 220 | "hashes": [ 221 | "sha256:18e11f9a650128a12580a58e3daba14e00a11d9e907c554a17ea016bf1a2c71b", 222 | "sha256:f7d666b569f729257082cf7ddc56262431878f602dcc2bc3980775c59439cdab" 223 | ], 224 | "index": "pypi", 225 | "version": "==3.0.5" 226 | }, 227 | "pyasn1": { 228 | "hashes": [ 229 | "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", 230 | "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba" 231 | ], 232 | "version": "==0.4.8" 233 | }, 234 | "pyasn1-modules": { 235 | "hashes": [ 236 | "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e", 237 | "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74" 238 | ], 239 | "version": "==0.2.8" 240 | }, 241 | "pylint": { 242 | "hashes": [ 243 | "sha256:bb4a908c9dadbc3aac18860550e870f58e1a02c9f2c204fdf5693d73be061210", 244 | "sha256:bfe68f020f8a0fece830a22dd4d5dddb4ecc6137db04face4c3420a46a52239f" 245 | ], 246 | "index": "pypi", 247 | "version": "==2.6.0" 248 | }, 249 | "pymysql": { 250 | "hashes": [ 251 | "sha256:adef15ceccf1ff544a23a6f46609f65187261dc8b0cf94c9644189c173b0a451", 252 | "sha256:e14070bc84e050e0f80bf6063e31d276f03a0bb4d46b9eca2854566c4ae19837" 253 | ], 254 | "index": "pypi", 255 | "version": "==0.10.0" 256 | }, 257 | "requests": { 258 | "hashes": [ 259 | "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b", 260 | "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898" 261 | ], 262 | "index": "pypi", 263 | "version": "==2.24.0" 264 | }, 265 | "requests-oauthlib": { 266 | "hashes": [ 267 | "sha256:7f71572defaecd16372f9006f33c2ec8c077c3cfa6f5911a9a90202beb513f3d", 268 | "sha256:b4261601a71fd721a8bd6d7aa1cc1d6a8a93b4a9f5e96626f8e4d91e8beeaa6a" 269 | ], 270 | "version": "==1.3.0" 271 | }, 272 | "rsa": { 273 | "hashes": [ 274 | "sha256:109ea5a66744dd859bf16fe904b8d8b627adafb9408753161e766a92e7d681fa", 275 | "sha256:6166864e23d6b5195a5cfed6cd9fed0fe774e226d8f854fcb23b7bbef0350233" 276 | ], 277 | "markers": "python_version >= '3.5'", 278 | "version": "==4.6" 279 | }, 280 | "selenium": { 281 | "hashes": [ 282 | "sha256:2d7131d7bc5a5b99a2d9b04aaf2612c411b03b8ca1b1ee8d3de5845a9be2cb3c", 283 | "sha256:deaf32b60ad91a4611b98d8002757f29e6f2c2d5fcaf202e1c9ad06d6772300d" 284 | ], 285 | "index": "pypi", 286 | "version": "==3.141.0" 287 | }, 288 | "six": { 289 | "hashes": [ 290 | "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", 291 | "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" 292 | ], 293 | "version": "==1.15.0" 294 | }, 295 | "soupsieve": { 296 | "hashes": [ 297 | "sha256:1634eea42ab371d3d346309b93df7870a88610f0725d47528be902a0d95ecc55", 298 | "sha256:a59dc181727e95d25f781f0eb4fd1825ff45590ec8ff49eadfd7f1a537cc0232" 299 | ], 300 | "version": "==2.0.1" 301 | }, 302 | "toml": { 303 | "hashes": [ 304 | "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f", 305 | "sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88" 306 | ], 307 | "version": "==0.10.1" 308 | }, 309 | "urllib3": { 310 | "hashes": [ 311 | "sha256:91056c15fa70756691db97756772bb1eb9678fa585d9184f24534b100dc60f4a", 312 | "sha256:e7983572181f5e1522d9c98453462384ee92a0be7fac5f1413a1e35c56cc0461" 313 | ], 314 | "version": "==1.25.10" 315 | }, 316 | "webdriver-manager": { 317 | "hashes": [ 318 | "sha256:18a665c6400bb7cf1a9ec9e1030ac5539cd5c892c97075f58940c62971470ce3", 319 | "sha256:c2d4ee0a78226c355f3657dd0337e515187585a1497229af2ce5f4705234da9c" 320 | ], 321 | "index": "pypi", 322 | "version": "==3.2.2" 323 | }, 324 | "wrapt": { 325 | "hashes": [ 326 | "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7" 327 | ], 328 | "version": "==1.12.1" 329 | } 330 | }, 331 | "develop": { 332 | "autopep8": { 333 | "hashes": [ 334 | "sha256:d21d3901cb0da6ebd1e83fc9b0dfbde8b46afc2ede4fe32fbda0c7c6118ca094" 335 | ], 336 | "index": "pypi", 337 | "version": "==1.5.4" 338 | }, 339 | "pycodestyle": { 340 | "hashes": [ 341 | "sha256:2295e7b2f6b5bd100585ebcb1f616591b652db8a741695b3d8f5d28bdc934367", 342 | "sha256:c58a7d2815e0e8d7972bf1803331fb0152f867bd89adf8a01dfd55085434192e" 343 | ], 344 | "version": "==2.6.0" 345 | }, 346 | "toml": { 347 | "hashes": [ 348 | "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f", 349 | "sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88" 350 | ], 351 | "version": "==0.10.1" 352 | } 353 | } 354 | } 355 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python-stock-scraper # 2 | 3 | ## 專案介紹 ## 4 | 5 | 本專案以Yahoo奇摩股市為例,開發Python網頁爬蟲取得關注的股票當日行情,並且提供了四個方法(Method), 6 | 包含: 7 | * scrape() 8 | * save() 9 | * export() 10 | * gsheet() 11 | * daily() 12 | 13 | 其中,scrape()方法(Method)為爬取關注的股票當日行情資料,而save()方法(Method)提供存入MySQL資料庫的功能,可以搭配[[Python爬蟲教學]輕鬆學會Python網頁爬蟲與MySQL資料庫的整合方式](https://www.learncodewithmike.com/2020/08/python-scraper-integrate-with-mysql.html)部落格文章來進行學習。 14 | 15 | 另外,export()方法(Method)整合openpyxl套件,提供將Python網頁爬蟲所取得的股票當日行情資料,匯出成Excel檔案,並且在其中的漲跌欄位,客製化顯示儲存格的文字顏色,可以搭配[[Python爬蟲教學]活用openpyxl套件將爬取的資料寫入Excel檔案](https://www.learncodewithmike.com/2020/08/python-write-to-an-excel-file-using-openpyxl-module.html)部落格文章來進行學習。 16 | 17 | 而gsheet()方法(Method)則是透過Google Sheet API,將Python網頁爬蟲取得的股票當日行情資料,寫入雲端Google Sheet試算表中,可以搭配[[Python爬蟲教學]解析如何串接Google Sheet試算表寫入爬取的資料](https://www.learncodewithmike.com/2020/08/python-write-to-google-sheet.html)部落格文章來進行學習。 18 | 19 | 最後,daily()方法(Method)整合Python的Selenium及BeautifulSoup套件,爬取台灣證券交易所的「個股日收盤價及月平均價」查詢式網頁,動態輸入查詢條件,並且爬取結果,可以搭配[[Python爬蟲教學]想爬取查詢式網頁?你要學會使用Selenium及BeautifulSoup套件](https://www.learncodewithmike.com/2020/08/python-integrate-selenium-and-beautifulsoup.html)部落格文章來進行學習。 20 | 21 | ## 前置作業 ## 22 | 23 | 將專案複製(Clone)下來後,假設沒有pipenv套件管理工具,可以透過以下指令來進行安裝: 24 | 25 | `$ pip install pipenv` 26 | 27 | 有了pipenv套件管理工具後,就可以執行以下指令,來安裝專案所需的套件: 28 | 29 | `$ pipenv install --ignore-pipfile` 30 | 31 | 接著,登入虛擬環境: 32 | 33 | `$ pipenv shell` -------------------------------------------------------------------------------- /scraper.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import requests 3 | import pymysql 4 | import openpyxl 5 | from openpyxl.styles import Font 6 | import gspread 7 | from oauth2client.service_account import ServiceAccountCredentials 8 | from selenium import webdriver 9 | from selenium.webdriver.support.ui import Select 10 | from webdriver_manager.chrome import ChromeDriverManager 11 | import time 12 | 13 | 14 | class Stock: 15 | def __init__(self, *stock_numbers): 16 | self.stock_numbers = stock_numbers 17 | 18 | def scrape(self): 19 | 20 | result = list() 21 | 22 | for stock_number in self.stock_numbers: 23 | 24 | response = requests.get( 25 | "https://tw.stock.yahoo.com/q/q?s=" + stock_number) 26 | soup = BeautifulSoup(response.text.replace("加到投資組合", ""), "lxml") 27 | 28 | stock_date = soup.find( 29 | "font", {"class": "tt"}).getText().strip()[-9:] # 資料日期 30 | 31 | tables = soup.find_all("table")[2] # 取得網頁中第三個表格 32 | tds = tables.find_all("td")[0:11] # 取得表格中1到10格 33 | 34 | result.append((stock_date,) + 35 | tuple(td.getText().strip() for td in tds)) 36 | return result 37 | 38 | def save(self, stocks): 39 | 40 | db_settings = { 41 | "host": "127.0.0.1", 42 | "port": 3306, 43 | "user": "root", 44 | "password": "******", 45 | "db": "stock", 46 | "charset": "utf8" 47 | } 48 | 49 | try: 50 | conn = pymysql.connect(**db_settings) 51 | 52 | with conn.cursor() as cursor: 53 | sql = """INSERT INTO market( 54 | market_date, 55 | stock_name, 56 | market_time, 57 | final_price, 58 | buy_price, 59 | sell_price, 60 | ups_and_downs, 61 | lot, 62 | yesterday_price, 63 | opening_price, 64 | highest_price, 65 | lowest_price) 66 | VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""" 67 | 68 | for stock in stocks: 69 | cursor.execute(sql, stock) 70 | conn.commit() 71 | 72 | except Exception as ex: 73 | print("Exception:", ex) 74 | 75 | def export(self, stocks): 76 | wb = openpyxl.Workbook() 77 | sheet = wb.create_sheet("Yahoo股市", 0) 78 | 79 | response = requests.get( 80 | "https://tw.stock.yahoo.com/q/q?s=2451") 81 | soup = BeautifulSoup(response.text, "lxml") 82 | 83 | tables = soup.find_all("table")[2] 84 | ths = tables.find_all("th")[0:11] 85 | titles = ("資料日期",) + tuple(th.getText() for th in ths) 86 | sheet.append(titles) 87 | 88 | for index, stock in enumerate(stocks): 89 | sheet.append(stock) 90 | 91 | if "△" in stock[6]: 92 | sheet.cell(row=index+2, column=7).font = Font(color='FF0000') 93 | elif "▽" in stock[6]: 94 | sheet.cell(row=index+2, column=7).font = Font(color='00A600') 95 | 96 | wb.save("yahoostock.xlsx") 97 | 98 | def gsheet(self, stocks): 99 | scopes = ["https://spreadsheets.google.com/feeds"] 100 | 101 | credentials = ServiceAccountCredentials.from_json_keyfile_name( 102 | "credentials.json", scopes) 103 | 104 | client = gspread.authorize(credentials) 105 | 106 | sheet = client.open_by_key( 107 | "YOUR GOOGLE SHEET KEY").sheet1 108 | 109 | response = requests.get( 110 | "https://tw.stock.yahoo.com/q/q?s=2451") 111 | soup = BeautifulSoup(response.text, "lxml") 112 | 113 | tables = soup.find_all("table")[2] 114 | ths = tables.find_all("th")[0:11] 115 | titles = ("資料日期",) + tuple(th.getText() for th in ths) 116 | sheet.append_row(titles, 1) 117 | 118 | for stock in stocks: 119 | sheet.append_row(stock) 120 | 121 | def daily(self, year, month): 122 | browser = webdriver.Chrome(ChromeDriverManager().install()) 123 | browser.get( 124 | "https://www.twse.com.tw/zh/page/trading/exchange/STOCK_DAY_AVG.html") 125 | 126 | select_year = Select(browser.find_element_by_name("yy")) 127 | select_year.select_by_value(year) # 選擇傳入的年份 128 | 129 | select_month = Select(browser.find_element_by_name("mm")) 130 | select_month.select_by_value(month) # 選擇傳入的月份 131 | 132 | stockno = browser.find_element_by_name("stockNo") # 定位股票代碼輸入框 133 | 134 | result = [] 135 | for stock_number in self.stock_numbers: 136 | stockno.clear() # 清空股票代碼輸入框 137 | stockno.send_keys(stock_number) 138 | stockno.submit() 139 | 140 | time.sleep(2) 141 | 142 | soup = BeautifulSoup(browser.page_source, "lxml") 143 | 144 | table = soup.find("table", {"id": "report-table"}) 145 | 146 | elements = table.find_all( 147 | "td", {"class": "dt-head-center dt-body-center"}) 148 | 149 | data = (stock_number,) + tuple(element.getText() 150 | for element in elements) 151 | result.append(data) 152 | 153 | print(result) 154 | 155 | 156 | stock = Stock('2451', '2454', '2369') # 建立Stock物件 157 | stock.daily("2019", "7") # 動態爬取指定的年月份中,股票代碼的每日收盤價 158 | 159 | # stock.gsheet(stock.scrape()) # 將爬取的股票當日行情資料寫入Google Sheet工作表 160 | # stock.export(stock.scrape()) # 將爬取的股票當日行情資料匯出成Excel檔案 161 | # stock.save(stock.scrape()) # 將爬取的股票當日行情資料存入MySQL資料庫中 162 | --------------------------------------------------------------------------------