├── .coveragerc ├── .github └── workflows │ ├── python-package.yml │ └── python-publish.yml ├── .gitignore ├── LICENSE ├── README.md ├── poetry.lock ├── pyproject.toml ├── stweet ├── __init__.py ├── auth │ ├── __init__.py │ ├── auth_token_provider.py │ ├── fail_strategy │ │ ├── __init__.py │ │ ├── auth_fail_strategy.py │ │ ├── tor_ip_change_auth_fail_strategy.py │ │ └── wait_auth_fail_strategy.py │ └── simple_auth_token_provider.py ├── exceptions │ ├── __init__.py │ ├── refresh_token_exception.py │ ├── scrap_batch_bad_response.py │ ├── too_many_requests_exception.py │ └── user_suspended_exception.py ├── get_user_runner │ ├── __init__.py │ ├── get_users_context.py │ ├── get_users_result.py │ ├── get_users_runner.py │ ├── get_users_task.py │ └── user_parser.py ├── http_request │ ├── __init__.py │ ├── http_method.py │ ├── interceptor │ │ ├── __init__.py │ │ ├── logging_requests_web_client_interceptor.py │ │ └── params_response_log_web_client_interceptor.py │ ├── request_details.py │ ├── request_response.py │ ├── requests │ │ ├── __init__.py │ │ ├── requests_web_client.py │ │ └── requests_web_client_proxy_config.py │ └── web_client.py ├── model │ ├── __init__.py │ ├── cursor.py │ ├── language.py │ ├── raw_data.py │ ├── tweet_raw.py │ ├── user_raw.py │ └── user_tweet_raw.py ├── raw_output │ ├── __init__.py │ ├── collector_raw_output.py │ ├── json_line_file_raw_output.py │ ├── print_every_n_raw_output.py │ ├── print_first_in_batch_raw_output.py │ ├── print_raw_output.py │ └── raw_data_output.py ├── search_runner │ ├── __init__.py │ ├── replies_filter.py │ ├── search_run_context.py │ ├── search_runner.py │ ├── search_tweets_result.py │ ├── search_tweets_task.py │ └── tweet_raw_parser.py ├── tweets_by_ids_runner │ ├── __init__.py │ ├── tweet_raw_parser.py │ ├── tweets_by_id_context.py │ ├── tweets_by_id_result.py │ ├── tweets_by_id_runner.py │ └── tweets_by_id_task.py └── twitter_api │ ├── __init__.py │ ├── default_twitter_web_client_provider.py │ ├── twitter_api_requests.py │ └── twitter_auth_web_client_interceptor.py ├── test-services-docker-compose.yml ├── tests ├── __init__.py ├── integration │ ├── all_languages_test.py │ ├── exception_test.py │ ├── export_import_test.py │ ├── get_tweet_by_id_test.py │ ├── get_user_test.py │ ├── import_older_version_test.py │ ├── interceptor_test.py │ ├── large_iterator_test.py │ ├── parse_media_test.py │ ├── print_test.py │ ├── proxy_client_requests_test.py │ ├── reply_filter_test.py │ ├── search_in_language_test.py │ ├── search_return_objest_test.py │ ├── serialization_test.py │ ├── time_period_test.py │ ├── tweets_count_test.py │ ├── username_search_test.py │ └── word_search_test.py ├── mock_web_client.py ├── resources │ ├── tweets_v1.1.2.csv │ ├── tweets_v1.1.2.jl │ ├── users_v1.3.0.csv │ └── users_v1.3.0.jl ├── test_file_manager.py ├── test_util.py ├── tweet_output_export_call_counter.py ├── tweet_output_tweets_counter.py └── unit │ └── language_test.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = stweet/search_runner/parse/base_tweet_parser.py -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | pull_request: 8 | branches: [ master, develop ] 9 | paths-ignore: 10 | - 'README.md' 11 | - 'docs/**' 12 | schedule: 13 | - cron: '0 0 * * *' 14 | 15 | jobs: 16 | build: 17 | 18 | runs-on: ubuntu-22.04 19 | strategy: 20 | matrix: 21 | python-version: [ 3.7, 3.8, 3.9 ] 22 | 23 | steps: 24 | - uses: actions/checkout@v2 25 | 26 | - name: Set up Python ${{ matrix.python-version }} 27 | uses: actions/setup-python@v2 28 | with: 29 | python-version: ${{ matrix.python-version }} 30 | 31 | - name: Install poetry 32 | uses: abatilo/actions-poetry@v2.0.0 33 | with: 34 | poetry-version: 1.3.1 35 | 36 | - name: Install deps 37 | run: poetry install -vv 38 | 39 | # - name: Run tox 40 | # run: tox -v 41 | # 42 | # - name: Upload coverage to Codecov 43 | # uses: codecov/codecov-action@v1 44 | # with: 45 | # token: ${{ secrets.CODECOV_TOKEN }} 46 | # file: ./coverage.xml 47 | # flags: unittests 48 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package & deploy 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: [ 3.8 ] 17 | poetry-version: [ 1.3.1 ] 18 | 19 | steps: 20 | - uses: actions/checkout@v2 21 | 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | 27 | - name: Install poetry 28 | uses: abatilo/actions-poetry@v2.0.0 29 | with: 30 | poetry-version: ${{ matrix.poetry-version }} 31 | 32 | - name: Install deps 33 | run: poetry install -vv 34 | 35 | # - name: Run tox 36 | # run: tox -v 37 | # 38 | # - name: Upload coverage to Codecov 39 | # uses: codecov/codecov-action@v1 40 | # with: 41 | # token: ${{ secrets.CODECOV_TOKEN }} 42 | # file: ./coverage.xml 43 | # flags: unittests 44 | 45 | - name: Build and publish 46 | run: | 47 | poetry build 48 | poetry publish --username ${{ secrets.PYPI_USERNAME }} --password ${{ secrets.PYPI_PASSWORD }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | cover/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | .pybuilder/ 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | # For a library or package, you might want to ignore these files since the code is 89 | # intended to run in multiple environments; otherwise, check them in: 90 | # .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 100 | __pypackages__/ 101 | 102 | # Celery stuff 103 | celerybeat-schedule 104 | celerybeat.pid 105 | 106 | # SageMath parsed files 107 | *.sage.py 108 | 109 | # Environments 110 | .env 111 | .venv 112 | env/ 113 | venv/ 114 | ENV/ 115 | env.bak/ 116 | venv.bak/ 117 | 118 | # Spyder project settings 119 | .spyderproject 120 | .spyproject 121 | 122 | # Rope project settings 123 | .ropeproject 124 | 125 | # mkdocs documentation 126 | /site 127 | 128 | # mypy 129 | .mypy_cache/ 130 | .dmypy.json 131 | dmypy.json 132 | 133 | # Pyre type checker 134 | .pyre/ 135 | 136 | # pytype static type analyzer 137 | .pytype/ 138 | 139 | # Cython debug symbols 140 | cython_debug/ 141 | 142 | tmp/test_run.py 143 | clean.sh 144 | tmp 145 | sample_tweets.json 146 | .pypirc 147 | 148 | 149 | .DS_Store 150 | stweet_run.py -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Marcin Wątroba 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # stweet 2 | 3 | [![Open Source Love](https://badges.frapsoft.com/os/v2/open-source.svg?v=103)](https://github.com/ellerbrock/open-source-badges/) 4 | ![Python package](https://github.com/markowanga/stweet/workflows/Python%20package/badge.svg?branch=master) 5 | [![PyPI version](https://badge.fury.io/py/stweet.svg)](https://badge.fury.io/py/stweet) 6 | [![MIT Licence](https://badges.frapsoft.com/os/mit/mit.svg?v=103)](https://opensource.org/licenses/mit-license.php) 7 | 8 | A modern fast python library to scrap tweets and users quickly from Twitter unofficial API. 9 | 10 | This tool helps you to scrap tweets by a search phrase, tweets by ids and user by usernames. It uses 11 | the Twitter API, the same API is used on a website. 12 | 13 | ## Inspiration for the creation of the library 14 | 15 | I have used twint to scrap tweets, but it has many errors, and it doesn't work properly. The code 16 | was not simple to understand. All tasks have one config, and the user has to know the exact 17 | parameter. The last important thing is the fact that Api can change — Twitter is the API owner and 18 | changes depend on it. It is annoying when something does not work and users must report bugs as 19 | issues. 20 | 21 | ## Main advantages of the library 22 | 23 | - **Simple code** — the code is not only mine, every user can contribute to the library 24 | - **Domain objects and interfaces** — the main part of functionalities can be replaced (eg. calling 25 | web requests), the library has basic simple solution — if you want to expand it, you can do it 26 | without any problems and forks 27 | - ~~**100% coverage with integration tests** — this advantage can find the API changes, tests are 28 | carried out every week and when the task fails, we can find the source of change easily~~ – not in 29 | version 2.0 30 | - **Custom tweets and users output** — it is a part of the interface, if you want to save tweets and 31 | users custom format, it takes you a brief moment 32 | 33 | ## Installation 34 | 35 | ```shell script 36 | pip install -U stweet 37 | ``` 38 | 39 | ## Donate 40 | 41 | If you want to sponsor me, in thanks for the project, please send me some crypto 😁: 42 | 43 | | Coin | Wallet address | 44 | |---------|--------------------------------------------| 45 | | Bitcoin | 3EajE9DbLvEmBHLRzjDfG86LyZB4jzsZyg | 46 | | Etherum | 0xE43d8C2c7a9af286bc2fc0568e2812151AF9b1FD | 47 | 48 | ## Basic usage 49 | 50 | To make a simple request the scrap **task** must be prepared. The task should be processed by ** 51 | runner**. 52 | 53 | ```python 54 | import stweet as st 55 | 56 | 57 | def try_search(): 58 | search_tweets_task = st.SearchTweetsTask(all_words='#covid19') 59 | output_jl_tweets = st.JsonLineFileRawOutput('output_raw_search_tweets.jl') 60 | output_jl_users = st.JsonLineFileRawOutput('output_raw_search_users.jl') 61 | output_print = st.PrintRawOutput() 62 | st.TweetSearchRunner(search_tweets_task=search_tweets_task, 63 | tweet_raw_data_outputs=[output_print, output_jl_tweets], 64 | user_raw_data_outputs=[output_print, output_jl_users]).run() 65 | 66 | 67 | def try_user_scrap(): 68 | user_task = st.GetUsersTask(['iga_swiatek']) 69 | output_json = st.JsonLineFileRawOutput('output_raw_user.jl') 70 | output_print = st.PrintRawOutput() 71 | st.GetUsersRunner(get_user_task=user_task, raw_data_outputs=[output_print, output_json]).run() 72 | 73 | 74 | def try_tweet_by_id_scrap(): 75 | id_task = st.TweetsByIdTask('1447348840164564994') 76 | output_json = st.JsonLineFileRawOutput('output_raw_id.jl') 77 | output_print = st.PrintRawOutput() 78 | st.TweetsByIdRunner(tweets_by_id_task=id_task, 79 | raw_data_outputs=[output_print, output_json]).run() 80 | 81 | 82 | if __name__ == '__main__': 83 | try_search() 84 | try_user_scrap() 85 | try_tweet_by_id_scrap() 86 | ``` 87 | 88 | Example above shows that it is few lines of code required to scrap tweets. 89 | 90 | ## Export format 91 | 92 | Stweet uses api from website so there is no documentation about receiving response. Response is 93 | saving as raw so final user must parse it on his own. Maybe parser will be added in feature. 94 | 95 | Scrapped data can be exported in different ways by using `RawDataOutput` abstract class. List of 96 | these outputs can be passed in every runner – yes it is possible to export in two different ways. 97 | 98 | Currently, stweet have implemented: 99 | 100 | - **CollectorRawOutput** – can save data in memory and return as list of objects 101 | - **JsonLineFileRawOutput** – can export data as json lines 102 | - **PrintEveryNRawOutput** – prints every N-th item 103 | - **PrintFirstInBatchRawOutput** – prints first item in batch 104 | - **PrintRawOutput** – prints all items (not recommended in large scrapping) 105 | 106 | ## Using tor proxy 107 | 108 | Library is integrated with [tor-python-easy](https://github.com/markowanga/tor-python-easy). 109 | It allows using tor proxy with exposed control port – to change ip when it is needed. 110 | 111 | If you want to use tor proxy client you need to prepare custom web client and use it in runner. 112 | 113 | You need to run tor proxy -- you can run it on your local OS, or you can use this 114 | [docker-compose](https://github.com/markowanga/tor-python-easy/blob/main/docker-compose.yml). 115 | 116 | Code snippet below show how to use proxy: 117 | 118 | ```python 119 | import stweet as st 120 | 121 | if __name__ == '__main__': 122 | web_client = st.DefaultTwitterWebClientProvider.get_web_client_preconfigured_for_tor_proxy( 123 | socks_proxy_url='socks5://localhost:9050', 124 | control_host='localhost', 125 | control_port=9051, 126 | control_password='test1234' 127 | ) 128 | 129 | search_tweets_task = st.SearchTweetsTask(all_words='#covid19') 130 | output_jl_tweets = st.JsonLineFileRawOutput('output_raw_search_tweets.jl') 131 | output_jl_users = st.JsonLineFileRawOutput('output_raw_search_users.jl') 132 | output_print = st.PrintRawOutput() 133 | st.TweetSearchRunner(search_tweets_task=search_tweets_task, 134 | tweet_raw_data_outputs=[output_print, output_jl_tweets], 135 | user_raw_data_outputs=[output_print, output_jl_users], 136 | web_client=web_client).run() 137 | ``` 138 | 139 | ## Divide scrap periods recommended 140 | 141 | Twitter on guest client block multiple pagination. Sometimes in one query there is possible to call for 3 paginations. 142 | To avoid this limitation divide scrapping period for smaller parts. 143 | 144 | Twitter in 2023 block in API putting time range in timestamp – only format YYYY-MM-DD is acceptable. In arrow you can 145 | only put time without hours. 146 | 147 | ## Twint inspiration 148 | 149 | Small part of library uses code from [twint](https://github.com/twintproject/twint). Twint was also 150 | main inspiration to create stweet. 151 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Poetry and should not be changed by hand. 2 | 3 | [[package]] 4 | name = "arrow" 5 | version = "1.2.3" 6 | description = "Better dates & times for Python" 7 | category = "main" 8 | optional = false 9 | python-versions = ">=3.6" 10 | files = [ 11 | {file = "arrow-1.2.3-py3-none-any.whl", hash = "sha256:5a49ab92e3b7b71d96cd6bfcc4df14efefc9dfa96ea19045815914a6ab6b1fe2"}, 12 | {file = "arrow-1.2.3.tar.gz", hash = "sha256:3934b30ca1b9f292376d9db15b19446088d12ec58629bc3f0da28fd55fb633a1"}, 13 | ] 14 | 15 | [package.dependencies] 16 | python-dateutil = ">=2.7.0" 17 | 18 | [[package]] 19 | name = "atomicwrites" 20 | version = "1.4.1" 21 | description = "Atomic file writes." 22 | category = "dev" 23 | optional = false 24 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 25 | files = [ 26 | {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, 27 | ] 28 | 29 | [[package]] 30 | name = "attrs" 31 | version = "22.2.0" 32 | description = "Classes Without Boilerplate" 33 | category = "dev" 34 | optional = false 35 | python-versions = ">=3.6" 36 | files = [ 37 | {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"}, 38 | {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"}, 39 | ] 40 | 41 | [package.extras] 42 | cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"] 43 | dev = ["attrs[docs,tests]"] 44 | docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"] 45 | tests = ["attrs[tests-no-zope]", "zope.interface"] 46 | tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"] 47 | 48 | [[package]] 49 | name = "certifi" 50 | version = "2022.12.7" 51 | description = "Python package for providing Mozilla's CA Bundle." 52 | category = "main" 53 | optional = false 54 | python-versions = ">=3.6" 55 | files = [ 56 | {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"}, 57 | {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"}, 58 | ] 59 | 60 | [[package]] 61 | name = "charset-normalizer" 62 | version = "3.0.1" 63 | description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." 64 | category = "main" 65 | optional = false 66 | python-versions = "*" 67 | files = [ 68 | {file = "charset-normalizer-3.0.1.tar.gz", hash = "sha256:ebea339af930f8ca5d7a699b921106c6e29c617fe9606fa7baa043c1cdae326f"}, 69 | {file = "charset_normalizer-3.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88600c72ef7587fe1708fd242b385b6ed4b8904976d5da0893e31df8b3480cb6"}, 70 | {file = "charset_normalizer-3.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c75ffc45f25324e68ab238cb4b5c0a38cd1c3d7f1fb1f72b5541de469e2247db"}, 71 | {file = "charset_normalizer-3.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db72b07027db150f468fbada4d85b3b2729a3db39178abf5c543b784c1254539"}, 72 | {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62595ab75873d50d57323a91dd03e6966eb79c41fa834b7a1661ed043b2d404d"}, 73 | {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff6f3db31555657f3163b15a6b7c6938d08df7adbfc9dd13d9d19edad678f1e8"}, 74 | {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:772b87914ff1152b92a197ef4ea40efe27a378606c39446ded52c8f80f79702e"}, 75 | {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70990b9c51340e4044cfc394a81f614f3f90d41397104d226f21e66de668730d"}, 76 | {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:292d5e8ba896bbfd6334b096e34bffb56161c81408d6d036a7dfa6929cff8783"}, 77 | {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2edb64ee7bf1ed524a1da60cdcd2e1f6e2b4f66ef7c077680739f1641f62f555"}, 78 | {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:31a9ddf4718d10ae04d9b18801bd776693487cbb57d74cc3458a7673f6f34639"}, 79 | {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:44ba614de5361b3e5278e1241fda3dc1838deed864b50a10d7ce92983797fa76"}, 80 | {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:12db3b2c533c23ab812c2b25934f60383361f8a376ae272665f8e48b88e8e1c6"}, 81 | {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c512accbd6ff0270939b9ac214b84fb5ada5f0409c44298361b2f5e13f9aed9e"}, 82 | {file = "charset_normalizer-3.0.1-cp310-cp310-win32.whl", hash = "sha256:502218f52498a36d6bf5ea77081844017bf7982cdbe521ad85e64cabee1b608b"}, 83 | {file = "charset_normalizer-3.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:601f36512f9e28f029d9481bdaf8e89e5148ac5d89cffd3b05cd533eeb423b59"}, 84 | {file = "charset_normalizer-3.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0298eafff88c99982a4cf66ba2efa1128e4ddaca0b05eec4c456bbc7db691d8d"}, 85 | {file = "charset_normalizer-3.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a8d0fc946c784ff7f7c3742310cc8a57c5c6dc31631269876a88b809dbeff3d3"}, 86 | {file = "charset_normalizer-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:87701167f2a5c930b403e9756fab1d31d4d4da52856143b609e30a1ce7160f3c"}, 87 | {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e76c0f23218b8f46c4d87018ca2e441535aed3632ca134b10239dfb6dadd6b"}, 88 | {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0c0a590235ccd933d9892c627dec5bc7511ce6ad6c1011fdf5b11363022746c1"}, 89 | {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c7fe7afa480e3e82eed58e0ca89f751cd14d767638e2550c77a92a9e749c317"}, 90 | {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79909e27e8e4fcc9db4addea88aa63f6423ebb171db091fb4373e3312cb6d603"}, 91 | {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ac7b6a045b814cf0c47f3623d21ebd88b3e8cf216a14790b455ea7ff0135d18"}, 92 | {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:72966d1b297c741541ca8cf1223ff262a6febe52481af742036a0b296e35fa5a"}, 93 | {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f9d0c5c045a3ca9bedfc35dca8526798eb91a07aa7a2c0fee134c6c6f321cbd7"}, 94 | {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:5995f0164fa7df59db4746112fec3f49c461dd6b31b841873443bdb077c13cfc"}, 95 | {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4a8fcf28c05c1f6d7e177a9a46a1c52798bfe2ad80681d275b10dcf317deaf0b"}, 96 | {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:761e8904c07ad053d285670f36dd94e1b6ab7f16ce62b9805c475b7aa1cffde6"}, 97 | {file = "charset_normalizer-3.0.1-cp311-cp311-win32.whl", hash = "sha256:71140351489970dfe5e60fc621ada3e0f41104a5eddaca47a7acb3c1b851d6d3"}, 98 | {file = "charset_normalizer-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:9ab77acb98eba3fd2a85cd160851816bfce6871d944d885febf012713f06659c"}, 99 | {file = "charset_normalizer-3.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:84c3990934bae40ea69a82034912ffe5a62c60bbf6ec5bc9691419641d7d5c9a"}, 100 | {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74292fc76c905c0ef095fe11e188a32ebd03bc38f3f3e9bcb85e4e6db177b7ea"}, 101 | {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c95a03c79bbe30eec3ec2b7f076074f4281526724c8685a42872974ef4d36b72"}, 102 | {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f4c39b0e3eac288fedc2b43055cfc2ca7a60362d0e5e87a637beac5d801ef478"}, 103 | {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df2c707231459e8a4028eabcd3cfc827befd635b3ef72eada84ab13b52e1574d"}, 104 | {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93ad6d87ac18e2a90b0fe89df7c65263b9a99a0eb98f0a3d2e079f12a0735837"}, 105 | {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:59e5686dd847347e55dffcc191a96622f016bc0ad89105e24c14e0d6305acbc6"}, 106 | {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:cd6056167405314a4dc3c173943f11249fa0f1b204f8b51ed4bde1a9cd1834dc"}, 107 | {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:083c8d17153ecb403e5e1eb76a7ef4babfc2c48d58899c98fcaa04833e7a2f9a"}, 108 | {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:f5057856d21e7586765171eac8b9fc3f7d44ef39425f85dbcccb13b3ebea806c"}, 109 | {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:7eb33a30d75562222b64f569c642ff3dc6689e09adda43a082208397f016c39a"}, 110 | {file = "charset_normalizer-3.0.1-cp36-cp36m-win32.whl", hash = "sha256:95dea361dd73757c6f1c0a1480ac499952c16ac83f7f5f4f84f0658a01b8ef41"}, 111 | {file = "charset_normalizer-3.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:eaa379fcd227ca235d04152ca6704c7cb55564116f8bc52545ff357628e10602"}, 112 | {file = "charset_normalizer-3.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3e45867f1f2ab0711d60c6c71746ac53537f1684baa699f4f668d4c6f6ce8e14"}, 113 | {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cadaeaba78750d58d3cc6ac4d1fd867da6fc73c88156b7a3212a3cd4819d679d"}, 114 | {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:911d8a40b2bef5b8bbae2e36a0b103f142ac53557ab421dc16ac4aafee6f53dc"}, 115 | {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:503e65837c71b875ecdd733877d852adbc465bd82c768a067badd953bf1bc5a3"}, 116 | {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a60332922359f920193b1d4826953c507a877b523b2395ad7bc716ddd386d866"}, 117 | {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16a8663d6e281208d78806dbe14ee9903715361cf81f6d4309944e4d1e59ac5b"}, 118 | {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a16418ecf1329f71df119e8a65f3aa68004a3f9383821edcb20f0702934d8087"}, 119 | {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9d9153257a3f70d5f69edf2325357251ed20f772b12e593f3b3377b5f78e7ef8"}, 120 | {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:02a51034802cbf38db3f89c66fb5d2ec57e6fe7ef2f4a44d070a593c3688667b"}, 121 | {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:2e396d70bc4ef5325b72b593a72c8979999aa52fb8bcf03f701c1b03e1166918"}, 122 | {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:11b53acf2411c3b09e6af37e4b9005cba376c872503c8f28218c7243582df45d"}, 123 | {file = "charset_normalizer-3.0.1-cp37-cp37m-win32.whl", hash = "sha256:0bf2dae5291758b6f84cf923bfaa285632816007db0330002fa1de38bfcb7154"}, 124 | {file = "charset_normalizer-3.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:2c03cc56021a4bd59be889c2b9257dae13bf55041a3372d3295416f86b295fb5"}, 125 | {file = "charset_normalizer-3.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:024e606be3ed92216e2b6952ed859d86b4cfa52cd5bc5f050e7dc28f9b43ec42"}, 126 | {file = "charset_normalizer-3.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4b0d02d7102dd0f997580b51edc4cebcf2ab6397a7edf89f1c73b586c614272c"}, 127 | {file = "charset_normalizer-3.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:358a7c4cb8ba9b46c453b1dd8d9e431452d5249072e4f56cfda3149f6ab1405e"}, 128 | {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81d6741ab457d14fdedc215516665050f3822d3e56508921cc7239f8c8e66a58"}, 129 | {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8b8af03d2e37866d023ad0ddea594edefc31e827fee64f8de5611a1dbc373174"}, 130 | {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9cf4e8ad252f7c38dd1f676b46514f92dc0ebeb0db5552f5f403509705e24753"}, 131 | {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e696f0dd336161fca9adbb846875d40752e6eba585843c768935ba5c9960722b"}, 132 | {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c22d3fe05ce11d3671297dc8973267daa0f938b93ec716e12e0f6dee81591dc1"}, 133 | {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:109487860ef6a328f3eec66f2bf78b0b72400280d8f8ea05f69c51644ba6521a"}, 134 | {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:37f8febc8ec50c14f3ec9637505f28e58d4f66752207ea177c1d67df25da5aed"}, 135 | {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:f97e83fa6c25693c7a35de154681fcc257c1c41b38beb0304b9c4d2d9e164479"}, 136 | {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a152f5f33d64a6be73f1d30c9cc82dfc73cec6477ec268e7c6e4c7d23c2d2291"}, 137 | {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:39049da0ffb96c8cbb65cbf5c5f3ca3168990adf3551bd1dee10c48fce8ae820"}, 138 | {file = "charset_normalizer-3.0.1-cp38-cp38-win32.whl", hash = "sha256:4457ea6774b5611f4bed5eaa5df55f70abde42364d498c5134b7ef4c6958e20e"}, 139 | {file = "charset_normalizer-3.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:e62164b50f84e20601c1ff8eb55620d2ad25fb81b59e3cd776a1902527a788af"}, 140 | {file = "charset_normalizer-3.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8eade758719add78ec36dc13201483f8e9b5d940329285edcd5f70c0a9edbd7f"}, 141 | {file = "charset_normalizer-3.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8499ca8f4502af841f68135133d8258f7b32a53a1d594aa98cc52013fff55678"}, 142 | {file = "charset_normalizer-3.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3fc1c4a2ffd64890aebdb3f97e1278b0cc72579a08ca4de8cd2c04799a3a22be"}, 143 | {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00d3ffdaafe92a5dc603cb9bd5111aaa36dfa187c8285c543be562e61b755f6b"}, 144 | {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2ac1b08635a8cd4e0cbeaf6f5e922085908d48eb05d44c5ae9eabab148512ca"}, 145 | {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f6f45710b4459401609ebebdbcfb34515da4fc2aa886f95107f556ac69a9147e"}, 146 | {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ae1de54a77dc0d6d5fcf623290af4266412a7c4be0b1ff7444394f03f5c54e3"}, 147 | {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b590df687e3c5ee0deef9fc8c547d81986d9a1b56073d82de008744452d6541"}, 148 | {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab5de034a886f616a5668aa5d098af2b5385ed70142090e2a31bcbd0af0fdb3d"}, 149 | {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9cb3032517f1627cc012dbc80a8ec976ae76d93ea2b5feaa9d2a5b8882597579"}, 150 | {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:608862a7bf6957f2333fc54ab4399e405baad0163dc9f8d99cb236816db169d4"}, 151 | {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0f438ae3532723fb6ead77e7c604be7c8374094ef4ee2c5e03a3a17f1fca256c"}, 152 | {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:356541bf4381fa35856dafa6a965916e54bed415ad8a24ee6de6e37deccf2786"}, 153 | {file = "charset_normalizer-3.0.1-cp39-cp39-win32.whl", hash = "sha256:39cf9ed17fe3b1bc81f33c9ceb6ce67683ee7526e65fde1447c772afc54a1bb8"}, 154 | {file = "charset_normalizer-3.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:0a11e971ed097d24c534c037d298ad32c6ce81a45736d31e0ff0ad37ab437d59"}, 155 | {file = "charset_normalizer-3.0.1-py3-none-any.whl", hash = "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24"}, 156 | ] 157 | 158 | [[package]] 159 | name = "colorama" 160 | version = "0.4.6" 161 | description = "Cross-platform colored terminal text." 162 | category = "dev" 163 | optional = false 164 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" 165 | files = [ 166 | {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, 167 | {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, 168 | ] 169 | 170 | [[package]] 171 | name = "idna" 172 | version = "3.4" 173 | description = "Internationalized Domain Names in Applications (IDNA)" 174 | category = "main" 175 | optional = false 176 | python-versions = ">=3.5" 177 | files = [ 178 | {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, 179 | {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, 180 | ] 181 | 182 | [[package]] 183 | name = "iniconfig" 184 | version = "2.0.0" 185 | description = "brain-dead simple config-ini parsing" 186 | category = "dev" 187 | optional = false 188 | python-versions = ">=3.7" 189 | files = [ 190 | {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, 191 | {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, 192 | ] 193 | 194 | [[package]] 195 | name = "isort" 196 | version = "5.12.0" 197 | description = "A Python utility / library to sort Python imports." 198 | category = "dev" 199 | optional = false 200 | python-versions = ">=3.8.0" 201 | files = [ 202 | {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"}, 203 | {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"}, 204 | ] 205 | 206 | [package.extras] 207 | colors = ["colorama (>=0.4.3)"] 208 | pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"] 209 | plugins = ["setuptools"] 210 | requirements-deprecated-finder = ["pip-api", "pipreqs"] 211 | 212 | [[package]] 213 | name = "numpy" 214 | version = "1.24.2" 215 | description = "Fundamental package for array computing in Python" 216 | category = "main" 217 | optional = false 218 | python-versions = ">=3.8" 219 | files = [ 220 | {file = "numpy-1.24.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eef70b4fc1e872ebddc38cddacc87c19a3709c0e3e5d20bf3954c147b1dd941d"}, 221 | {file = "numpy-1.24.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e8d2859428712785e8a8b7d2b3ef0a1d1565892367b32f915c4a4df44d0e64f5"}, 222 | {file = "numpy-1.24.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6524630f71631be2dabe0c541e7675db82651eb998496bbe16bc4f77f0772253"}, 223 | {file = "numpy-1.24.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a51725a815a6188c662fb66fb32077709a9ca38053f0274640293a14fdd22978"}, 224 | {file = "numpy-1.24.2-cp310-cp310-win32.whl", hash = "sha256:2620e8592136e073bd12ee4536149380695fbe9ebeae845b81237f986479ffc9"}, 225 | {file = "numpy-1.24.2-cp310-cp310-win_amd64.whl", hash = "sha256:97cf27e51fa078078c649a51d7ade3c92d9e709ba2bfb97493007103c741f1d0"}, 226 | {file = "numpy-1.24.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7de8fdde0003f4294655aa5d5f0a89c26b9f22c0a58790c38fae1ed392d44a5a"}, 227 | {file = "numpy-1.24.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4173bde9fa2a005c2c6e2ea8ac1618e2ed2c1c6ec8a7657237854d42094123a0"}, 228 | {file = "numpy-1.24.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cecaed30dc14123020f77b03601559fff3e6cd0c048f8b5289f4eeabb0eb281"}, 229 | {file = "numpy-1.24.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a23f8440561a633204a67fb44617ce2a299beecf3295f0d13c495518908e910"}, 230 | {file = "numpy-1.24.2-cp311-cp311-win32.whl", hash = "sha256:e428c4fbfa085f947b536706a2fc349245d7baa8334f0c5723c56a10595f9b95"}, 231 | {file = "numpy-1.24.2-cp311-cp311-win_amd64.whl", hash = "sha256:557d42778a6869c2162deb40ad82612645e21d79e11c1dc62c6e82a2220ffb04"}, 232 | {file = "numpy-1.24.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d0a2db9d20117bf523dde15858398e7c0858aadca7c0f088ac0d6edd360e9ad2"}, 233 | {file = "numpy-1.24.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c72a6b2f4af1adfe193f7beb91ddf708ff867a3f977ef2ec53c0ffb8283ab9f5"}, 234 | {file = "numpy-1.24.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c29e6bd0ec49a44d7690ecb623a8eac5ab8a923bce0bea6293953992edf3a76a"}, 235 | {file = "numpy-1.24.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2eabd64ddb96a1239791da78fa5f4e1693ae2dadc82a76bc76a14cbb2b966e96"}, 236 | {file = "numpy-1.24.2-cp38-cp38-win32.whl", hash = "sha256:e3ab5d32784e843fc0dd3ab6dcafc67ef806e6b6828dc6af2f689be0eb4d781d"}, 237 | {file = "numpy-1.24.2-cp38-cp38-win_amd64.whl", hash = "sha256:76807b4063f0002c8532cfeac47a3068a69561e9c8715efdad3c642eb27c0756"}, 238 | {file = "numpy-1.24.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4199e7cfc307a778f72d293372736223e39ec9ac096ff0a2e64853b866a8e18a"}, 239 | {file = "numpy-1.24.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:adbdce121896fd3a17a77ab0b0b5eedf05a9834a18699db6829a64e1dfccca7f"}, 240 | {file = "numpy-1.24.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:889b2cc88b837d86eda1b17008ebeb679d82875022200c6e8e4ce6cf549b7acb"}, 241 | {file = "numpy-1.24.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64bb98ac59b3ea3bf74b02f13836eb2e24e48e0ab0145bbda646295769bd780"}, 242 | {file = "numpy-1.24.2-cp39-cp39-win32.whl", hash = "sha256:63e45511ee4d9d976637d11e6c9864eae50e12dc9598f531c035265991910468"}, 243 | {file = "numpy-1.24.2-cp39-cp39-win_amd64.whl", hash = "sha256:a77d3e1163a7770164404607b7ba3967fb49b24782a6ef85d9b5f54126cc39e5"}, 244 | {file = "numpy-1.24.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:92011118955724465fb6853def593cf397b4a1367495e0b59a7e69d40c4eb71d"}, 245 | {file = "numpy-1.24.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9006288bcf4895917d02583cf3411f98631275bc67cce355a7f39f8c14338fa"}, 246 | {file = "numpy-1.24.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:150947adbdfeceec4e5926d956a06865c1c690f2fd902efede4ca6fe2e657c3f"}, 247 | {file = "numpy-1.24.2.tar.gz", hash = "sha256:003a9f530e880cb2cd177cba1af7220b9aa42def9c4afc2a2fc3ee6be7eb2b22"}, 248 | ] 249 | 250 | [[package]] 251 | name = "packaging" 252 | version = "23.0" 253 | description = "Core utilities for Python packages" 254 | category = "dev" 255 | optional = false 256 | python-versions = ">=3.7" 257 | files = [ 258 | {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"}, 259 | {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"}, 260 | ] 261 | 262 | [[package]] 263 | name = "pandas" 264 | version = "1.5.3" 265 | description = "Powerful data structures for data analysis, time series, and statistics" 266 | category = "main" 267 | optional = false 268 | python-versions = ">=3.8" 269 | files = [ 270 | {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3749077d86e3a2f0ed51367f30bf5b82e131cc0f14260c4d3e499186fccc4406"}, 271 | {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:972d8a45395f2a2d26733eb8d0f629b2f90bebe8e8eddbb8829b180c09639572"}, 272 | {file = "pandas-1.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:50869a35cbb0f2e0cd5ec04b191e7b12ed688874bd05dd777c19b28cbea90996"}, 273 | {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ac844a0fe00bfaeb2c9b51ab1424e5c8744f89860b138434a363b1f620f354"}, 274 | {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a0a56cef15fd1586726dace5616db75ebcfec9179a3a55e78f72c5639fa2a23"}, 275 | {file = "pandas-1.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:478ff646ca42b20376e4ed3fa2e8d7341e8a63105586efe54fa2508ee087f328"}, 276 | {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6973549c01ca91ec96199e940495219c887ea815b2083722821f1d7abfa2b4dc"}, 277 | {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c39a8da13cede5adcd3be1182883aea1c925476f4e84b2807a46e2775306305d"}, 278 | {file = "pandas-1.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f76d097d12c82a535fda9dfe5e8dd4127952b45fea9b0276cb30cca5ea313fbc"}, 279 | {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e474390e60ed609cec869b0da796ad94f420bb057d86784191eefc62b65819ae"}, 280 | {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f2b952406a1588ad4cad5b3f55f520e82e902388a6d5a4a91baa8d38d23c7f6"}, 281 | {file = "pandas-1.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:bc4c368f42b551bf72fac35c5128963a171b40dce866fb066540eeaf46faa003"}, 282 | {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:14e45300521902689a81f3f41386dc86f19b8ba8dd5ac5a3c7010ef8d2932813"}, 283 | {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9842b6f4b8479e41968eced654487258ed81df7d1c9b7b870ceea24ed9459b31"}, 284 | {file = "pandas-1.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:26d9c71772c7afb9d5046e6e9cf42d83dd147b5cf5bcb9d97252077118543792"}, 285 | {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fbcb19d6fceb9e946b3e23258757c7b225ba450990d9ed63ccceeb8cae609f7"}, 286 | {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:565fa34a5434d38e9d250af3c12ff931abaf88050551d9fbcdfafca50d62babf"}, 287 | {file = "pandas-1.5.3-cp38-cp38-win32.whl", hash = "sha256:87bd9c03da1ac870a6d2c8902a0e1fd4267ca00f13bc494c9e5a9020920e1d51"}, 288 | {file = "pandas-1.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:41179ce559943d83a9b4bbacb736b04c928b095b5f25dd2b7389eda08f46f373"}, 289 | {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c74a62747864ed568f5a82a49a23a8d7fe171d0c69038b38cedf0976831296fa"}, 290 | {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c4c00e0b0597c8e4f59e8d461f797e5d70b4d025880516a8261b2817c47759ee"}, 291 | {file = "pandas-1.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a50d9a4336a9621cab7b8eb3fb11adb82de58f9b91d84c2cd526576b881a0c5a"}, 292 | {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd05f7783b3274aa206a1af06f0ceed3f9b412cf665b7247eacd83be41cf7bf0"}, 293 | {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f69c4029613de47816b1bb30ff5ac778686688751a5e9c99ad8c7031f6508e5"}, 294 | {file = "pandas-1.5.3-cp39-cp39-win32.whl", hash = "sha256:7cec0bee9f294e5de5bbfc14d0573f65526071029d036b753ee6507d2a21480a"}, 295 | {file = "pandas-1.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:dfd681c5dc216037e0b0a2c821f5ed99ba9f03ebcf119c7dac0e9a7b960b9ec9"}, 296 | {file = "pandas-1.5.3.tar.gz", hash = "sha256:74a3fd7e5a7ec052f183273dc7b0acd3a863edf7520f5d3a1765c04ffdb3b0b1"}, 297 | ] 298 | 299 | [package.dependencies] 300 | numpy = [ 301 | {version = ">=1.20.3", markers = "python_version < \"3.10\""}, 302 | {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, 303 | {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, 304 | ] 305 | python-dateutil = ">=2.8.1" 306 | pytz = ">=2020.1" 307 | 308 | [package.extras] 309 | test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] 310 | 311 | [[package]] 312 | name = "pluggy" 313 | version = "1.0.0" 314 | description = "plugin and hook calling mechanisms for python" 315 | category = "dev" 316 | optional = false 317 | python-versions = ">=3.6" 318 | files = [ 319 | {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, 320 | {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, 321 | ] 322 | 323 | [package.extras] 324 | dev = ["pre-commit", "tox"] 325 | testing = ["pytest", "pytest-benchmark"] 326 | 327 | [[package]] 328 | name = "py" 329 | version = "1.11.0" 330 | description = "library with cross-python path, ini-parsing, io, code, log facilities" 331 | category = "dev" 332 | optional = false 333 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 334 | files = [ 335 | {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, 336 | {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, 337 | ] 338 | 339 | [[package]] 340 | name = "pysocks" 341 | version = "1.7.1" 342 | description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." 343 | category = "main" 344 | optional = false 345 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 346 | files = [ 347 | {file = "PySocks-1.7.1-py27-none-any.whl", hash = "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299"}, 348 | {file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"}, 349 | {file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"}, 350 | ] 351 | 352 | [[package]] 353 | name = "pytest" 354 | version = "6.2.5" 355 | description = "pytest: simple powerful testing with Python" 356 | category = "dev" 357 | optional = false 358 | python-versions = ">=3.6" 359 | files = [ 360 | {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, 361 | {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, 362 | ] 363 | 364 | [package.dependencies] 365 | atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} 366 | attrs = ">=19.2.0" 367 | colorama = {version = "*", markers = "sys_platform == \"win32\""} 368 | iniconfig = "*" 369 | packaging = "*" 370 | pluggy = ">=0.12,<2.0" 371 | py = ">=1.8.2" 372 | toml = "*" 373 | 374 | [package.extras] 375 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] 376 | 377 | [[package]] 378 | name = "python-dateutil" 379 | version = "2.8.2" 380 | description = "Extensions to the standard Python datetime module" 381 | category = "main" 382 | optional = false 383 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" 384 | files = [ 385 | {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, 386 | {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, 387 | ] 388 | 389 | [package.dependencies] 390 | six = ">=1.5" 391 | 392 | [[package]] 393 | name = "pytz" 394 | version = "2022.7.1" 395 | description = "World timezone definitions, modern and historical" 396 | category = "main" 397 | optional = false 398 | python-versions = "*" 399 | files = [ 400 | {file = "pytz-2022.7.1-py2.py3-none-any.whl", hash = "sha256:78f4f37d8198e0627c5f1143240bb0206b8691d8d7ac6d78fee88b78733f8c4a"}, 401 | {file = "pytz-2022.7.1.tar.gz", hash = "sha256:01a0681c4b9684a28304615eba55d1ab31ae00bf68ec157ec3708a8182dbbcd0"}, 402 | ] 403 | 404 | [[package]] 405 | name = "requests" 406 | version = "2.28.2" 407 | description = "Python HTTP for Humans." 408 | category = "main" 409 | optional = false 410 | python-versions = ">=3.7, <4" 411 | files = [ 412 | {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"}, 413 | {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"}, 414 | ] 415 | 416 | [package.dependencies] 417 | certifi = ">=2017.4.17" 418 | charset-normalizer = ">=2,<4" 419 | idna = ">=2.5,<4" 420 | urllib3 = ">=1.21.1,<1.27" 421 | 422 | [package.extras] 423 | socks = ["PySocks (>=1.5.6,!=1.5.7)"] 424 | use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] 425 | 426 | [[package]] 427 | name = "six" 428 | version = "1.16.0" 429 | description = "Python 2 and 3 compatibility utilities" 430 | category = "main" 431 | optional = false 432 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" 433 | files = [ 434 | {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, 435 | {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, 436 | ] 437 | 438 | [[package]] 439 | name = "toml" 440 | version = "0.10.2" 441 | description = "Python Library for Tom's Obvious, Minimal Language" 442 | category = "dev" 443 | optional = false 444 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" 445 | files = [ 446 | {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, 447 | {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, 448 | ] 449 | 450 | [[package]] 451 | name = "tor-python-easy" 452 | version = "0.1.5" 453 | description = "Simple library to manage tor proxy and IP changes" 454 | category = "main" 455 | optional = false 456 | python-versions = ">=3.8,<4.0" 457 | files = [ 458 | {file = "tor-python-easy-0.1.5.tar.gz", hash = "sha256:e4b0618b9a0bc8e3415cf68274af5f7759b96f37060a34377845624e9f1ab01d"}, 459 | {file = "tor_python_easy-0.1.5-py3-none-any.whl", hash = "sha256:c154fa30e301fa8cf3b0f7563ec55bbc6abd54a57fccf6a96357ee3529b11aad"}, 460 | ] 461 | 462 | [package.dependencies] 463 | PySocks = ">=1.7.1,<2.0.0" 464 | requests = ">=2.26.0,<3.0.0" 465 | 466 | [[package]] 467 | name = "urllib3" 468 | version = "1.26.14" 469 | description = "HTTP library with thread-safe connection pooling, file post, and more." 470 | category = "main" 471 | optional = false 472 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" 473 | files = [ 474 | {file = "urllib3-1.26.14-py2.py3-none-any.whl", hash = "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"}, 475 | {file = "urllib3-1.26.14.tar.gz", hash = "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72"}, 476 | ] 477 | 478 | [package.extras] 479 | brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] 480 | secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] 481 | socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] 482 | 483 | [metadata] 484 | lock-version = "2.0" 485 | python-versions = "^3.8" 486 | content-hash = "66fdf491f1724d2864e6451ef720f3f48a95cb86bfffbc50acad33b5a0b3eff8" 487 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "stweet" 3 | version = "2.1.1" 4 | description = "Package to scrap tweets" 5 | authors = ["Marcin Wątroba "] 6 | license = "MIT" 7 | readme = "README.md" 8 | packages = [ 9 | { include = "stweet" } 10 | ] 11 | 12 | [tool.poetry.dependencies] 13 | python = "^3.8" 14 | requests = "^2.26.0" 15 | pandas = "^1.3.3" 16 | arrow = "^1.2.0" 17 | tor-python-easy = "^0.1.2" 18 | 19 | [tool.poetry.dev-dependencies] 20 | pytest = "^6.2.5" 21 | 22 | [tool.poetry.group.dev.dependencies] 23 | isort = "^5.12.0" 24 | 25 | [build-system] 26 | requires = ["poetry-core"] 27 | build-backend = "poetry.core.masonry.api" 28 | -------------------------------------------------------------------------------- /stweet/__init__.py: -------------------------------------------------------------------------------- 1 | from .get_user_runner import GetUsersResult, GetUsersRunner, GetUsersTask 2 | from .http_request import (RequestsWebClient, RequestsWebClientProxyConfig, 3 | WebClient) 4 | from .model import Language, UserTweetRaw 5 | from .raw_output import (CollectorRawOutput, JsonLineFileRawOutput, 6 | PrintEveryNRawOutput, PrintFirstInBatchRawOutput, 7 | PrintRawOutput) 8 | from .search_runner import (RepliesFilter, SearchTweetsResult, 9 | SearchTweetsTask, TweetSearchRunner) 10 | from .tweets_by_ids_runner import (TweetsByIdResult, TweetsByIdRunner, 11 | TweetsByIdTask) 12 | from .twitter_api import DefaultTwitterWebClientProvider 13 | -------------------------------------------------------------------------------- /stweet/auth/__init__.py: -------------------------------------------------------------------------------- 1 | from .auth_token_provider import AuthTokenProvider 2 | from .simple_auth_token_provider import SimpleAuthTokenProvider 3 | -------------------------------------------------------------------------------- /stweet/auth/auth_token_provider.py: -------------------------------------------------------------------------------- 1 | """Abstract class for get guest auth token.""" 2 | from abc import abstractmethod 3 | 4 | from ..http_request.web_client import WebClient 5 | 6 | 7 | class AuthTokenProvider: 8 | """Abstract class for get guest auth token.""" 9 | 10 | @abstractmethod 11 | def get_new_token(self, web_client: WebClient) -> str: 12 | """Method returns new token.""" 13 | -------------------------------------------------------------------------------- /stweet/auth/fail_strategy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markowanga/stweet/fe34e98254dc7646bde6e083b5f6f745a0ee8cb6/stweet/auth/fail_strategy/__init__.py -------------------------------------------------------------------------------- /stweet/auth/fail_strategy/auth_fail_strategy.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class AuthFailStrategy(ABC): 5 | 6 | @abstractmethod 7 | def run_strategy(self) -> None: 8 | pass 9 | -------------------------------------------------------------------------------- /stweet/auth/fail_strategy/tor_ip_change_auth_fail_strategy.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from tor_python_easy.tor_control_port_client import TorControlPortClient 4 | 5 | from stweet.auth.fail_strategy.auth_fail_strategy import AuthFailStrategy 6 | 7 | 8 | class TorIpChangeAuthFailStrategy(AuthFailStrategy): 9 | tor_control_port_client: TorControlPortClient 10 | 11 | def __init__(self, tor_control_port_client: TorControlPortClient): 12 | self.tor_control_port_client = tor_control_port_client 13 | 14 | def run_strategy(self) -> None: 15 | time.sleep(5) 16 | self.tor_control_port_client.change_connection_ip() 17 | -------------------------------------------------------------------------------- /stweet/auth/fail_strategy/wait_auth_fail_strategy.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from stweet.auth.fail_strategy.auth_fail_strategy import AuthFailStrategy 4 | 5 | 6 | class WaitAuthFailStrategy(AuthFailStrategy): 7 | ms_wait: int 8 | 9 | def __init__(self, ms_wait: int): 10 | self.ms_wait = ms_wait 11 | 12 | def run_strategy(self) -> None: 13 | time.sleep(self.ms_wait * 1.0 / 1000) 14 | -------------------------------------------------------------------------------- /stweet/auth/simple_auth_token_provider.py: -------------------------------------------------------------------------------- 1 | """Util to process access token to Twitter api.""" 2 | 3 | import json 4 | import time 5 | from json import JSONDecodeError 6 | from typing import Callable, Optional 7 | 8 | from ..exceptions import RefreshTokenException 9 | from ..exceptions.too_many_requests_exception import TooManyRequestsException 10 | from ..http_request import WebClient 11 | from ..twitter_api.twitter_api_requests import TwitterApiRequests 12 | from .auth_token_provider import AuthTokenProvider 13 | from .fail_strategy.auth_fail_strategy import AuthFailStrategy 14 | from .fail_strategy.wait_auth_fail_strategy import WaitAuthFailStrategy 15 | 16 | _TIMEOUT = 20 17 | _URL = 'https://api.twitter.com/1.1/guest/activate.json' 18 | 19 | 20 | def _run_retrying_for_string( 21 | stop_max_ms: int, 22 | on_except_function: Callable[[], None], 23 | catch_predicate: Callable[[Exception], bool], 24 | call_function: Callable[[], str] 25 | ) -> str: 26 | def current_milli_time(): 27 | return round(time.time() * 1000) 28 | 29 | first_error_time = -1 30 | result = None 31 | while result is None: 32 | try: 33 | result = call_function() 34 | except Exception as e: 35 | if first_error_time == -1: 36 | first_error_time = current_milli_time() 37 | time_from_first_error = current_milli_time() - first_error_time 38 | is_time_over = time_from_first_error > stop_max_ms 39 | if not catch_predicate(e) or is_time_over: 40 | raise e 41 | on_except_function() 42 | return result 43 | 44 | 45 | class SimpleAuthTokenProvider(AuthTokenProvider): 46 | """Class to manage Twitter token api.""" 47 | 48 | auth_fail_strategy: AuthFailStrategy 49 | stop_max_delay_on_too_many_requests_exception: int 50 | 51 | def __init__( 52 | self, 53 | auth_fail_strategy: Optional[AuthFailStrategy] = None, 54 | stop_max_delay_on_too_many_requests_exception: int = 40 * 60 * 1000 55 | ): 56 | """Constructor of SimpleAuthTokenProvider, can override default retries time.""" 57 | self.auth_fail_strategy = auth_fail_strategy 58 | if self.auth_fail_strategy is None: 59 | self.auth_fail_strategy = WaitAuthFailStrategy(60 * 1000) 60 | self.stop_max_delay_on_too_many_requests_exception = \ 61 | stop_max_delay_on_too_many_requests_exception 62 | 63 | def _request_for_response_body(self, web_client: WebClient): 64 | """Method from Twint.""" 65 | token_request_details = TwitterApiRequests().get_guest_token_request_details() 66 | token_response = web_client.run_request(token_request_details) 67 | if token_response.is_success(): 68 | return token_response.text 69 | else: 70 | raise RefreshTokenException(f'Error during request for token -- {token_response}') 71 | 72 | def get_new_token(self, web_client: WebClient) -> str: 73 | """Method to get refreshed token. In case of error raise RefreshTokenException.""" 74 | 75 | def simple_get_new_token() -> str: 76 | try: 77 | token_html = self._request_for_response_body(web_client) 78 | return json.loads(token_html)['guest_token'] 79 | except JSONDecodeError: 80 | raise RefreshTokenException('Error during request for token') 81 | except KeyError: 82 | raise RefreshTokenException('Error during request for token') 83 | 84 | # by this https://github.com/rholder/retrying/issues/70#issuecomment-313129305 85 | return _run_retrying_for_string( 86 | stop_max_ms=self.stop_max_delay_on_too_many_requests_exception, 87 | on_except_function=self.auth_fail_strategy.run_strategy, 88 | catch_predicate=lambda e: isinstance(e, TooManyRequestsException), 89 | call_function=simple_get_new_token 90 | ) 91 | -------------------------------------------------------------------------------- /stweet/exceptions/__init__.py: -------------------------------------------------------------------------------- 1 | from .refresh_token_exception import RefreshTokenException 2 | from .scrap_batch_bad_response import ScrapBatchBadResponse 3 | -------------------------------------------------------------------------------- /stweet/exceptions/refresh_token_exception.py: -------------------------------------------------------------------------------- 1 | """RefreshTokenException definition.""" 2 | 3 | 4 | class RefreshTokenException(Exception): 5 | """RefreshTokenException class.""" 6 | 7 | def __init__(self, msg): 8 | """Error constructor.""" 9 | super().__init__(msg) 10 | -------------------------------------------------------------------------------- /stweet/exceptions/scrap_batch_bad_response.py: -------------------------------------------------------------------------------- 1 | """ScrapBatchBadResponse definition.""" 2 | 3 | 4 | class ScrapBatchBadResponse(Exception): 5 | """ScrapBatchBadResponse class.""" 6 | 7 | def __init__(self, msg): 8 | """Error constructor.""" 9 | super().__init__(msg) 10 | -------------------------------------------------------------------------------- /stweet/exceptions/too_many_requests_exception.py: -------------------------------------------------------------------------------- 1 | """TooManyRequestsException class.""" 2 | 3 | 4 | class TooManyRequestsException(Exception): 5 | """TooManyRequestsException class.""" 6 | 7 | def __init__(self, request_url: str): 8 | """Error constructor.""" 9 | super().__init__(f'to many requests to {request_url}') 10 | -------------------------------------------------------------------------------- /stweet/exceptions/user_suspended_exception.py: -------------------------------------------------------------------------------- 1 | """ScrapBatchBadResponse class.""" 2 | 3 | 4 | class UserSuspendedException(Exception): 5 | """ScrapBatchBadResponse class.""" 6 | 7 | def __init__(self): 8 | """Error constructor.""" 9 | super().__init__('Username is suspended') 10 | -------------------------------------------------------------------------------- /stweet/get_user_runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .get_users_context import GetUsersContext 2 | from .get_users_result import GetUsersResult 3 | from .get_users_runner import GetUsersRunner 4 | from .get_users_task import GetUsersTask 5 | -------------------------------------------------------------------------------- /stweet/get_user_runner/get_users_context.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import List, Tuple 3 | 4 | 5 | @dataclass 6 | class GetUsersContext: 7 | scrapped_count: int = 0 8 | usernames_with_error: List[Tuple[str, Exception]] = field(default_factory=list) 9 | 10 | def add_one_scrapped_user(self): 11 | self.scrapped_count += 1 12 | 13 | def add_user_with_scrap_error(self, username: str, exception: Exception): 14 | self.usernames_with_error.append((username, exception)) 15 | -------------------------------------------------------------------------------- /stweet/get_user_runner/get_users_result.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Tuple 3 | 4 | 5 | @dataclass 6 | class GetUsersResult: 7 | users_count: int 8 | usernames_with_error: List[Tuple[str, Exception]] 9 | -------------------------------------------------------------------------------- /stweet/get_user_runner/get_users_runner.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Optional 3 | 4 | from ..http_request import WebClient 5 | from ..model.user_raw import UserRaw 6 | from ..raw_output.raw_data_output import RawDataOutput 7 | from ..twitter_api.default_twitter_web_client_provider import \ 8 | DefaultTwitterWebClientProvider 9 | from ..twitter_api.twitter_api_requests import TwitterApiRequests 10 | from .get_users_context import GetUsersContext 11 | from .get_users_result import GetUsersResult 12 | from .get_users_task import GetUsersTask 13 | from .user_parser import parse_user 14 | 15 | 16 | @dataclass 17 | class _TweetByIdBaseInfo: 18 | id: str 19 | username: str 20 | tweet_content: str 21 | 22 | 23 | class GetUsersRunner: 24 | 25 | get_user_context: GetUsersContext 26 | get_user_task: GetUsersTask 27 | raw_data_outputs: List[RawDataOutput] 28 | web_client: WebClient 29 | 30 | def __init__( 31 | self, 32 | get_user_task: GetUsersTask, 33 | raw_data_outputs: List[RawDataOutput], 34 | get_user_context: Optional[GetUsersContext] = None, 35 | web_client: Optional[WebClient] = None 36 | ): 37 | self.get_user_context = GetUsersContext() if get_user_context is None else get_user_context 38 | self.get_user_task = get_user_task 39 | self.raw_data_outputs = raw_data_outputs 40 | self.web_client = web_client if web_client is not None \ 41 | else DefaultTwitterWebClientProvider.get_web_client() 42 | return 43 | 44 | def run(self) -> GetUsersResult: 45 | for username in self.get_user_task.usernames: 46 | self._try_get_user(username) 47 | return GetUsersResult(self.get_user_context.scrapped_count, 48 | self.get_user_context.usernames_with_error) 49 | 50 | def _try_get_user(self, username: str): 51 | try: 52 | request_details = TwitterApiRequests().get_user_details_request_details(username) 53 | user_request_response = self.web_client.run_request(request_details) 54 | full_user = parse_user(user_request_response.text) 55 | self.get_user_context.add_one_scrapped_user() 56 | self._process_user_to_output(full_user) 57 | except Exception as exception: 58 | self.get_user_context.add_user_with_scrap_error(username, exception) 59 | 60 | def _process_user_to_output(self, user_raw: UserRaw): 61 | for user_output in self.raw_data_outputs: 62 | user_output.export_raw_data([user_raw]) 63 | -------------------------------------------------------------------------------- /stweet/get_user_runner/get_users_task.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List 3 | 4 | 5 | @dataclass(frozen=True) 6 | class GetUsersTask: 7 | usernames: List[str] 8 | 9 | def __init__( 10 | self, 11 | usernames: List[str] 12 | ): 13 | object.__setattr__(self, 'usernames', usernames) 14 | return 15 | -------------------------------------------------------------------------------- /stweet/get_user_runner/user_parser.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import arrow 4 | 5 | from stweet.model.user_raw import UserRaw 6 | 7 | 8 | def parse_user(response_content: str) -> UserRaw: 9 | return UserRaw(json.dumps(json.loads(response_content)['data']['user']['result']), arrow.now()) 10 | -------------------------------------------------------------------------------- /stweet/http_request/__init__.py: -------------------------------------------------------------------------------- 1 | from .http_method import HttpMethod 2 | from .request_details import RequestDetails 3 | from .request_response import RequestResponse 4 | from .requests.requests_web_client import (RequestsWebClient, 5 | RequestsWebClientProxyConfig) 6 | from .web_client import WebClient 7 | -------------------------------------------------------------------------------- /stweet/http_request/http_method.py: -------------------------------------------------------------------------------- 1 | """HttpMethod enum class.""" 2 | import enum 3 | 4 | 5 | class HttpMethod(enum.Enum): 6 | """HttpMethod enum class.""" 7 | 8 | GET = 1 9 | POST = 2 10 | -------------------------------------------------------------------------------- /stweet/http_request/interceptor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markowanga/stweet/fe34e98254dc7646bde6e083b5f6f745a0ee8cb6/stweet/http_request/interceptor/__init__.py -------------------------------------------------------------------------------- /stweet/http_request/interceptor/logging_requests_web_client_interceptor.py: -------------------------------------------------------------------------------- 1 | """Class of LoggingRequestsWebClientInterceptor.""" 2 | import logging 3 | from http.client import HTTPConnection 4 | from typing import List 5 | 6 | from .. import RequestDetails, RequestResponse, RequestsWebClient, WebClient 7 | 8 | 9 | class LoggingRequestsWebClientInterceptor(WebClient.WebClientInterceptor): 10 | """Class of LoggingRequestsWebClientInterceptor.""" 11 | 12 | @staticmethod 13 | def _debug_requests_on(): 14 | """Switches on logging of the requests module.""" 15 | HTTPConnection.debuglevel = 1 16 | 17 | logging.basicConfig() 18 | logging.getLogger().setLevel(logging.DEBUG) 19 | requests_log = logging.getLogger("requests.packages.urllib3") 20 | requests_log.setLevel(logging.DEBUG) 21 | requests_log.propagate = True 22 | 23 | @staticmethod 24 | def _debug_requests_off(): 25 | """Switches off logging of the requests module, might be some side-effects.""" 26 | HTTPConnection.debuglevel = 0 27 | 28 | root_logger = logging.getLogger() 29 | root_logger.setLevel(logging.WARNING) 30 | root_logger.handlers = [] 31 | requests_log = logging.getLogger("requests.packages.urllib3") 32 | requests_log.setLevel(logging.NOTSET) 33 | requests_log.propagate = False 34 | 35 | def logs_to_show(self, params: RequestDetails) -> bool: 36 | """Method to decide that show logs of request. 37 | 38 | Method can be overridden and then the logs will be filtered – example by request url. 39 | """ 40 | return True 41 | 42 | def intercept( 43 | self, 44 | requests_details: RequestDetails, 45 | next_interceptors: List[WebClient.WebClientInterceptor], 46 | web_client: RequestsWebClient 47 | ) -> RequestResponse: 48 | """Method show logs when predicate is true. Uses static field so it can be problem with concurrency.""" 49 | is_to_log = self.logs_to_show(requests_details) 50 | if is_to_log: 51 | LoggingRequestsWebClientInterceptor._debug_requests_on() 52 | to_return = self.get_response(requests_details, next_interceptors, web_client) 53 | if is_to_log: 54 | LoggingRequestsWebClientInterceptor._debug_requests_off() 55 | return to_return 56 | -------------------------------------------------------------------------------- /stweet/http_request/interceptor/params_response_log_web_client_interceptor.py: -------------------------------------------------------------------------------- 1 | """Class of ParamsResponseLogWebClientInterceptor.""" 2 | import threading 3 | from typing import List 4 | 5 | from .. import RequestDetails, RequestResponse, RequestsWebClient, WebClient 6 | 7 | 8 | class ParamsResponseLogWebClientInterceptor(WebClient.WebClientInterceptor): 9 | """Class of ParamsResponseLogWebClientInterceptor. 10 | 11 | Interceptor log input params and out response. 12 | """ 13 | 14 | _counter: int 15 | _lock: threading.Lock 16 | 17 | def __init__(self): 18 | """Constructor of ParamsResponseLogWebClientInterceptor.""" 19 | self._value = 0 20 | self._lock = threading.Lock() 21 | 22 | def increment(self) -> int: 23 | """Thread safe increment. Returns old value.""" 24 | with self._lock: 25 | to_return = self._value 26 | self._value += 1 27 | return to_return 28 | 29 | def logs_to_show(self, params: RequestDetails) -> bool: 30 | """Method to decide that show logs of request. 31 | 32 | Method can be overridden and then the logs will be filtered – example by request url. 33 | """ 34 | return True 35 | 36 | def intercept( 37 | self, 38 | requests_details: RequestDetails, 39 | next_interceptors: List[WebClient.WebClientInterceptor], 40 | web_client: RequestsWebClient 41 | ) -> RequestResponse: 42 | """Method show logs when predicate is true. Uses static field so it can be problem with concurrency.""" 43 | is_to_log = self.logs_to_show(requests_details) 44 | index = self.increment() 45 | if is_to_log: 46 | print(f'{index} -- {requests_details}') 47 | to_return = self.get_response(requests_details, next_interceptors, web_client) 48 | if is_to_log: 49 | print(f'{index} -- {to_return}') 50 | return to_return 51 | -------------------------------------------------------------------------------- /stweet/http_request/request_details.py: -------------------------------------------------------------------------------- 1 | """Class with request details.""" 2 | 3 | from dataclasses import dataclass 4 | from typing import Dict 5 | 6 | from .http_method import HttpMethod 7 | 8 | 9 | @dataclass 10 | class RequestDetails: 11 | """Class with request details. Specify all http request details.""" 12 | 13 | http_method: HttpMethod 14 | url: str 15 | headers: Dict[str, str] 16 | params: Dict[str, str] 17 | timeout: int 18 | -------------------------------------------------------------------------------- /stweet/http_request/request_response.py: -------------------------------------------------------------------------------- 1 | """Class with response details.""" 2 | 3 | from dataclasses import dataclass 4 | from typing import Optional 5 | 6 | 7 | @dataclass 8 | class RequestResponse: 9 | """Class with response details. Independent of web library implementation.""" 10 | 11 | status_code: Optional[int] 12 | text: Optional[str] 13 | 14 | def is_429(self) -> bool: 15 | """Method to check that is token_expired response status.""" 16 | return self.status_code == 429 17 | 18 | def is_success(self) -> bool: 19 | """Method to check that response have success status.""" 20 | return self.status_code is not None and self.status_code < 300 21 | -------------------------------------------------------------------------------- /stweet/http_request/requests/__init__.py: -------------------------------------------------------------------------------- 1 | from .requests_web_client import RequestsWebClient 2 | from .requests_web_client_proxy_config import RequestsWebClientProxyConfig 3 | -------------------------------------------------------------------------------- /stweet/http_request/requests/requests_web_client.py: -------------------------------------------------------------------------------- 1 | """Request search_runner class.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict, List, Optional 5 | 6 | import requests 7 | import requests.adapters 8 | import urllib3 9 | import urllib3.util.ssl_ 10 | 11 | from ..request_details import RequestDetails 12 | from ..request_response import RequestResponse 13 | from ..web_client import WebClient 14 | from .requests_web_client_proxy_config import RequestsWebClientProxyConfig 15 | 16 | _CIPHERS = 'TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:ECDHE-ECDSA-AES128-' \ 17 | 'GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:' \ 18 | 'ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-RSA-AES128-SHA' \ 19 | ':ECDHE-RSA-AES256-SHA:AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA:AES256-SHA' 20 | 21 | 22 | class _TwitterTLSAdapter(requests.adapters.HTTPAdapter): 23 | def init_poolmanager(self, *args, **kwargs): 24 | # FIXME: When urllib3 2.0.0 is out and can be required, 25 | # this should use urllib3.util.create_urllib3_context instead of the private, undocumented ssl_ module. 26 | kwargs['ssl_context'] = urllib3.util.ssl_.create_urllib3_context(ciphers=_CIPHERS) 27 | return super().init_poolmanager(*args, **kwargs) 28 | 29 | 30 | class RequestsWebClient(WebClient): 31 | """Request search_runner class. Implementation based on requests library.""" 32 | 33 | proxy: Optional[RequestsWebClientProxyConfig] 34 | verify: bool 35 | 36 | def __init__( 37 | self, 38 | proxy: Optional[RequestsWebClientProxyConfig] = None, 39 | verify: bool = True, 40 | interceptors: Optional[List[WebClient.WebClientInterceptor]] = None 41 | ): 42 | """Constructor of RequestsWebClient.""" 43 | interceptors_to_super = interceptors \ 44 | if interceptors is not None \ 45 | else [] 46 | super(RequestsWebClient, self).__init__(interceptors_to_super) 47 | self.proxy = proxy 48 | self.verify = verify 49 | 50 | def run_clear_request(self, params: RequestDetails) -> RequestResponse: 51 | """Main method to run request using requests package.""" 52 | session = requests.Session() 53 | adapter = _TwitterTLSAdapter() 54 | session.mount('https://twitter.com', adapter) 55 | session.mount('https://api.twitter.com', adapter) 56 | response = session.request( 57 | method=params.http_method.name, 58 | url=params.url, 59 | params=params.params, 60 | headers=params.headers, 61 | timeout=params.timeout, 62 | proxies=self._get_proxy(), 63 | verify=self.verify 64 | ) 65 | return RequestResponse(response.status_code, response.text) 66 | 67 | def _get_proxy(self) -> Dict[str, str]: 68 | return None if self.proxy is None else dict({ 69 | 'http': self.proxy.http_proxy, 70 | 'https': self.proxy.https_proxy, 71 | }) 72 | -------------------------------------------------------------------------------- /stweet/http_request/requests/requests_web_client_proxy_config.py: -------------------------------------------------------------------------------- 1 | """Configuration of proxy to RequestsWebClient.""" 2 | from dataclasses import dataclass 3 | 4 | 5 | @dataclass 6 | class RequestsWebClientProxyConfig: 7 | """Configuration class of proxy to RequestsWebClient.""" 8 | 9 | http_proxy: str 10 | https_proxy: str 11 | -------------------------------------------------------------------------------- /stweet/http_request/web_client.py: -------------------------------------------------------------------------------- 1 | """Web client abstract class.""" 2 | from __future__ import annotations 3 | 4 | from abc import ABC, abstractmethod 5 | from typing import List, Optional 6 | 7 | from .request_details import RequestDetails 8 | from .request_response import RequestResponse 9 | 10 | 11 | def _run_request_with_interceptors( 12 | requests_details: RequestDetails, 13 | next_interceptors: List[WebClient.WebClientInterceptor], 14 | web_client: WebClient 15 | ) -> RequestResponse: 16 | return next_interceptors[0].intercept(requests_details, next_interceptors[1:], web_client) if len( 17 | next_interceptors) > 0 else web_client.run_clear_request(requests_details) 18 | 19 | 20 | class WebClient: 21 | """Web client abstract class.""" 22 | 23 | _interceptors: List[WebClientInterceptor] 24 | 25 | def __init__(self, interceptors: Optional[List[WebClientInterceptor]]): 26 | """Base constructor of class.""" 27 | self._interceptors = [] if interceptors is None else interceptors 28 | 29 | def run_request(self, requests_details: RequestDetails) -> RequestResponse: 30 | """Method process the request. Method wrap request with interceptors.""" 31 | return _run_request_with_interceptors(requests_details, self._interceptors, self) 32 | 33 | @abstractmethod 34 | def run_clear_request(self, params: RequestDetails) -> RequestResponse: 35 | """Abstract method to run only the request.""" 36 | 37 | class WebClientInterceptor(ABC): 38 | """Abstract class of web client interceptor.""" 39 | 40 | @staticmethod 41 | def get_response( 42 | requests_details: RequestDetails, 43 | next_interceptors: List[WebClient.WebClientInterceptor], 44 | web_client: WebClient 45 | ) -> RequestResponse: 46 | """Method process request. If any interceptor passes method wrap request with this.""" 47 | return _run_request_with_interceptors(requests_details, next_interceptors, web_client) 48 | 49 | @abstractmethod 50 | def intercept( 51 | self, 52 | requests_details: RequestDetails, 53 | next_interceptors: List[WebClient.WebClientInterceptor], 54 | web_client: WebClient 55 | ) -> RequestResponse: 56 | """Interceptor method of request. 57 | 58 | Method need to call WebClientInterceptor.get_response to process request by next interceptors 59 | and client. 60 | """ 61 | -------------------------------------------------------------------------------- /stweet/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .language import Language 2 | from .user_tweet_raw import UserTweetRaw 3 | -------------------------------------------------------------------------------- /stweet/model/cursor.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class Cursor: 6 | type: str 7 | value: str 8 | -------------------------------------------------------------------------------- /stweet/model/language.py: -------------------------------------------------------------------------------- 1 | """Domain Language enum class.""" 2 | 3 | import enum 4 | 5 | 6 | class Language(enum.Enum): 7 | """Domain Language enum class.""" 8 | 9 | def __new__(cls, *args, **kwargs): 10 | """Class __new__ method.""" 11 | value = len(cls.__members__) + 1 12 | obj = object.__new__(cls) 13 | obj._value_ = value 14 | return obj 15 | 16 | def __init__(self, short_value): 17 | """Class constructor method.""" 18 | self.short_value = short_value 19 | 20 | ENGLISH = 'en' 21 | ARABIC = 'ar' 22 | BASQUE = 'eu' 23 | BENGALI = 'bn' 24 | BULGARIAN = 'bg' 25 | TRADITIONAL_CHINESE = 'zh-tw' 26 | SIMPLIFIED_CHINESE = 'zh-cn' 27 | CROATIAN = 'hr' 28 | CZECH = 'cs' 29 | DANISH = 'da' 30 | FINNISH = 'fi' 31 | FRENCH = 'fr' 32 | GREEK = 'el' 33 | GUJARATI = 'gu' 34 | HEBREW = 'iw' 35 | HINDI = 'hi' 36 | SPANISH = 'es' 37 | INDONESIAN = 'in' 38 | JAPANESE = 'ja' 39 | CANADIAN = 'kn' 40 | CATALAN = 'ca' 41 | KOREAN = 'ko' 42 | MARATHI = 'mr' 43 | DUTCH = 'nl' 44 | GERMAN = 'de' 45 | NORWEGIAN = 'no' 46 | PERSIAN = 'fa' 47 | POLISH = 'pl' 48 | PORTUGUESE = 'pt' 49 | RUSSIAN = 'ru' 50 | ROMANIAN = 'ro' 51 | SERBIAN = 'sr' 52 | SLOVAK = 'sk' 53 | SWEDISH = 'sv' 54 | THAI = 'th' 55 | TAMIL = 'ta' 56 | TURKISH = 'tr' 57 | UKRAINIAN = 'uk' 58 | URDU = 'ur' 59 | HUNGARIAN = 'hu' 60 | VIETNAMESE = 'vi' 61 | ITALIAN = 'it' 62 | -------------------------------------------------------------------------------- /stweet/model/raw_data.py: -------------------------------------------------------------------------------- 1 | import json 2 | from abc import ABC 3 | 4 | from arrow import Arrow 5 | 6 | 7 | class RawData(ABC): 8 | object_type: str 9 | download_datetime: Arrow 10 | raw_value: str 11 | 12 | def __init__(self, object_type: str, raw_value: str, download_datetime: Arrow): 13 | self.raw_value = raw_value 14 | self.object_type = object_type 15 | self.download_datetime = download_datetime 16 | 17 | def to_json_line(self) -> str: 18 | return json.dumps({ 19 | 'object_type': self.object_type, 20 | 'download_datetime': self.download_datetime.isoformat(), 21 | 'raw_value': json.loads(self.raw_value) 22 | }) 23 | -------------------------------------------------------------------------------- /stweet/model/tweet_raw.py: -------------------------------------------------------------------------------- 1 | from arrow import Arrow 2 | 3 | from stweet.model.raw_data import RawData 4 | 5 | 6 | class TweetRaw(RawData): 7 | def __init__(self, raw_value: str, download_datetime: Arrow): 8 | super().__init__('TweetRaw', raw_value, download_datetime) 9 | -------------------------------------------------------------------------------- /stweet/model/user_raw.py: -------------------------------------------------------------------------------- 1 | from arrow import Arrow 2 | 3 | from stweet.model.raw_data import RawData 4 | 5 | 6 | class UserRaw(RawData): 7 | def __init__(self, raw_value: str, download_datetime: Arrow): 8 | super().__init__('UserRaw', raw_value, download_datetime) 9 | -------------------------------------------------------------------------------- /stweet/model/user_tweet_raw.py: -------------------------------------------------------------------------------- 1 | from arrow import Arrow 2 | 3 | from stweet.model.raw_data import RawData 4 | 5 | 6 | class UserTweetRaw(RawData): 7 | def __init__(self, raw_value: str, download_datetime: Arrow): 8 | super().__init__('UserTweetRaw', raw_value, download_datetime) 9 | -------------------------------------------------------------------------------- /stweet/raw_output/__init__.py: -------------------------------------------------------------------------------- 1 | from .collector_raw_output import CollectorRawOutput 2 | from .json_line_file_raw_output import JsonLineFileRawOutput 3 | from .print_every_n_raw_output import PrintEveryNRawOutput 4 | from .print_first_in_batch_raw_output import PrintFirstInBatchRawOutput 5 | from .print_raw_output import PrintRawOutput 6 | -------------------------------------------------------------------------------- /stweet/raw_output/collector_raw_output.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from ..model.raw_data import RawData 4 | from .raw_data_output import RawDataOutput 5 | 6 | 7 | class CollectorRawOutput(RawDataOutput): 8 | _raw_data_list: List[RawData] 9 | 10 | def __init__(self): 11 | self._raw_data_list = [] 12 | 13 | def export_raw_data(self, raw_data_list: List[RawData]): 14 | self._raw_data_list.extend(raw_data_list) 15 | return 16 | 17 | def get_raw_list(self) -> List[RawData]: 18 | return self._raw_data_list 19 | -------------------------------------------------------------------------------- /stweet/raw_output/json_line_file_raw_output.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from ..model.raw_data import RawData 4 | from .raw_data_output import RawDataOutput 5 | 6 | 7 | class JsonLineFileRawOutput(RawDataOutput): 8 | file_name: str 9 | 10 | def __init__(self, file_name: str): 11 | self.file_name = file_name 12 | 13 | def export_raw_data(self, raw_data_list: List[RawData]): 14 | with open(self.file_name, 'a') as file: 15 | for raw in raw_data_list: 16 | file.write(f'{raw.to_json_line()}\n') 17 | return 18 | -------------------------------------------------------------------------------- /stweet/raw_output/print_every_n_raw_output.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from ..model.raw_data import RawData 4 | from .raw_data_output import RawDataOutput 5 | 6 | 7 | class PrintEveryNRawOutput(RawDataOutput): 8 | each_n: int 9 | _counter: int = 0 10 | 11 | def __init__(self, each_n: int): 12 | self.each_n = each_n 13 | 14 | def export_raw_data(self, raw_data_list: List[RawData]): 15 | for it in raw_data_list: 16 | self._counter += 1 17 | if self._counter % self.each_n == 0: 18 | print(self._counter, it.to_json_line()) 19 | return 20 | -------------------------------------------------------------------------------- /stweet/raw_output/print_first_in_batch_raw_output.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from ..model.raw_data import RawData 4 | from .raw_data_output import RawDataOutput 5 | 6 | 7 | class PrintFirstInBatchRawOutput(RawDataOutput): 8 | 9 | def export_raw_data(self, raw_data_list: List[RawData]): 10 | message = str(raw_data_list[0].to_json_line()) if len( 11 | raw_data_list) > 0 else 'PrintFirstInRequestTweetOutput -- no tweets to print' 12 | print(message) 13 | return 14 | -------------------------------------------------------------------------------- /stweet/raw_output/print_raw_output.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from ..model.raw_data import RawData 4 | from .raw_data_output import RawDataOutput 5 | 6 | 7 | class PrintRawOutput(RawDataOutput): 8 | 9 | def export_raw_data(self, raw_data_list: List[RawData]): 10 | for it in raw_data_list: 11 | print(it.to_json_line()) 12 | return 13 | -------------------------------------------------------------------------------- /stweet/raw_output/raw_data_output.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List 3 | 4 | from ..model.raw_data import RawData 5 | 6 | 7 | class RawDataOutput(ABC): 8 | 9 | @abstractmethod 10 | def export_raw_data(self, raw_data_list: List[RawData]): 11 | pass 12 | -------------------------------------------------------------------------------- /stweet/search_runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .replies_filter import RepliesFilter 2 | from .search_run_context import SearchRunContext 3 | from .search_runner import TweetSearchRunner 4 | from .search_tweets_result import SearchTweetsResult 5 | from .search_tweets_task import SearchTweetsTask 6 | -------------------------------------------------------------------------------- /stweet/search_runner/replies_filter.py: -------------------------------------------------------------------------------- 1 | """Domain RepliesFilter enum class.""" 2 | 3 | import enum 4 | 5 | 6 | class RepliesFilter(enum.Enum): 7 | """Domain RepliesFilter enum class.""" 8 | 9 | ONLY_REPLIES = 1 10 | ONLY_ORIGINAL = 2 11 | -------------------------------------------------------------------------------- /stweet/search_runner/search_run_context.py: -------------------------------------------------------------------------------- 1 | """Domain SearchRunContext class.""" 2 | 3 | from dataclasses import dataclass 4 | from typing import Optional 5 | 6 | from ..model.cursor import Cursor 7 | 8 | 9 | @dataclass 10 | class SearchRunContext: 11 | """Domain SearchRunContext class.""" 12 | 13 | cursor: Optional[Cursor] 14 | last_tweets_download_count: int 15 | all_download_tweets_count: int 16 | 17 | def __init__( 18 | self, 19 | cursor: [Cursor] = None, 20 | guest_auth_token: Optional[str] = None, 21 | last_tweets_download_count: int = 0, 22 | all_download_tweets: int = 0 23 | ): 24 | """Class constructor.""" 25 | self.cursor = cursor 26 | self.guest_auth_token = guest_auth_token 27 | self.last_tweets_download_count = last_tweets_download_count 28 | self.all_download_tweets_count = all_download_tweets 29 | return 30 | 31 | def add_downloaded_tweets_count(self, new_downloaded_tweets_count: int): 32 | """Method to update downloaded tweets count.""" 33 | self.all_download_tweets_count += new_downloaded_tweets_count 34 | self.last_tweets_download_count = new_downloaded_tweets_count 35 | -------------------------------------------------------------------------------- /stweet/search_runner/search_runner.py: -------------------------------------------------------------------------------- 1 | """Runner to process task to search tweets.""" 2 | import json 3 | from typing import List, Optional 4 | 5 | from ..exceptions.scrap_batch_bad_response import ScrapBatchBadResponse 6 | from ..http_request.request_details import RequestDetails 7 | from ..http_request.web_client import WebClient 8 | from ..model.tweet_raw import TweetRaw 9 | from ..model.user_raw import UserRaw 10 | from ..raw_output.raw_data_output import RawDataOutput 11 | from ..twitter_api.default_twitter_web_client_provider import \ 12 | DefaultTwitterWebClientProvider 13 | from ..twitter_api.twitter_api_requests import TwitterApiRequests 14 | from .search_run_context import SearchRunContext 15 | from .search_tweets_result import SearchTweetsResult 16 | from .search_tweets_task import SearchTweetsTask 17 | from .tweet_raw_parser import get_scroll_cursor, parse_tweets, parse_users 18 | 19 | 20 | class TweetSearchRunner: 21 | """Runner class to process task to search tweets.""" 22 | 23 | search_run_context: SearchRunContext 24 | search_tweets_task: SearchTweetsTask 25 | tweet_raw_data_outputs: List[RawDataOutput] 26 | user_raw_data_outputs: List[RawDataOutput] 27 | web_client: WebClient 28 | 29 | def __init__( 30 | self, 31 | search_tweets_task: SearchTweetsTask, 32 | tweet_raw_data_outputs: List[RawDataOutput], 33 | user_raw_data_outputs: List[RawDataOutput], 34 | search_run_context: Optional[SearchRunContext] = None, 35 | web_client: Optional[WebClient] = None 36 | ): 37 | """Constructor to create object.""" 38 | self.search_run_context = SearchRunContext() if search_run_context is None \ 39 | else search_run_context 40 | self.search_tweets_task = search_tweets_task 41 | self.tweet_raw_data_outputs = tweet_raw_data_outputs 42 | self.user_raw_data_outputs = user_raw_data_outputs 43 | self.web_client = web_client \ 44 | if web_client is not None \ 45 | else DefaultTwitterWebClientProvider.get_web_client() 46 | return 47 | 48 | def run(self) -> SearchTweetsResult: 49 | """Main search_runner method.""" 50 | while not self._is_end_of_scrapping(): 51 | self._execute_next_tweets_request() 52 | return SearchTweetsResult(self.search_run_context.all_download_tweets_count) 53 | 54 | def _is_end_of_scrapping(self) -> bool: 55 | ctx = self.search_run_context 56 | last_scrap_zero = ctx.last_tweets_download_count == 0 57 | is_cursor = ctx.cursor is not None 58 | return (last_scrap_zero and is_cursor) or (not last_scrap_zero and not is_cursor) 59 | 60 | def _execute_next_tweets_request(self): 61 | request_params = self._get_next_request_details() 62 | response = self.web_client.run_request(request_params) 63 | if response.is_success(): 64 | tweets = parse_tweets(response.text) 65 | users = parse_users(response.text) 66 | cursor = get_scroll_cursor(json.loads(response.text)['timeline']['instructions']) 67 | self.search_run_context.add_downloaded_tweets_count(len(tweets)) 68 | self.search_run_context.cursor = cursor 69 | self._process_new_results_to_output(tweets, users) 70 | else: 71 | raise ScrapBatchBadResponse(response) 72 | return 73 | 74 | def _get_next_request_details(self) -> RequestDetails: 75 | return TwitterApiRequests().get_search_tweet_request_details_new_api( 76 | self.search_run_context.all_download_tweets_count, 77 | self.search_run_context.cursor, 78 | self.search_tweets_task.tweets_limit, 79 | self.search_tweets_task.get_full_search_query() 80 | ) 81 | 82 | def _process_new_results_to_output(self, tweets: List[TweetRaw], users: List[UserRaw]): 83 | for raw_data_output in self.tweet_raw_data_outputs: 84 | raw_data_output.export_raw_data(tweets) 85 | for raw_data_output in self.user_raw_data_outputs: 86 | raw_data_output.export_raw_data(users) 87 | return 88 | -------------------------------------------------------------------------------- /stweet/search_runner/search_tweets_result.py: -------------------------------------------------------------------------------- 1 | """Class with result of TweetSearchRunner task.""" 2 | 3 | from dataclasses import dataclass 4 | 5 | 6 | @dataclass 7 | class SearchTweetsResult: 8 | """Class with result of TweetSearchRunner task.""" 9 | 10 | downloaded_count: int 11 | -------------------------------------------------------------------------------- /stweet/search_runner/search_tweets_task.py: -------------------------------------------------------------------------------- 1 | """Domain SearchTweetsTask class.""" 2 | 3 | from dataclasses import dataclass 4 | from typing import Optional 5 | 6 | from arrow import Arrow 7 | 8 | from ..model.language import Language 9 | from .replies_filter import RepliesFilter 10 | 11 | 12 | def _format_date(arrow_time: Arrow) -> str: 13 | return arrow_time.date().isoformat() 14 | 15 | 16 | @dataclass(frozen=True) 17 | class SearchTweetsTask: 18 | """Domain SearchTweetsTask class.""" 19 | 20 | all_words: Optional[str] 21 | exact_words: Optional[str] 22 | any_word: Optional[str] 23 | from_username: Optional[str] 24 | to_username: Optional[str] 25 | since: Optional[Arrow] 26 | until: Optional[Arrow] 27 | language: Optional[Language] 28 | tweets_limit: Optional[int] 29 | replies_filter: Optional[RepliesFilter] 30 | 31 | def __init__( 32 | self, 33 | all_words: Optional[str] = None, 34 | exact_words: Optional[str] = None, 35 | any_word: Optional[str] = None, 36 | from_username: Optional[str] = None, 37 | to_username: Optional[str] = None, 38 | since: Optional[Arrow] = None, 39 | until: Optional[Arrow] = None, 40 | language: Optional[Language] = None, 41 | tweets_limit: Optional[int] = None, 42 | replies_filter: Optional[RepliesFilter] = None 43 | ): 44 | """Class constructor.""" 45 | object.__setattr__(self, 'all_words', all_words) 46 | object.__setattr__(self, 'exact_words', exact_words) 47 | object.__setattr__(self, 'any_word', any_word) 48 | object.__setattr__(self, 'from_username', from_username) 49 | object.__setattr__(self, 'to_username', to_username) 50 | object.__setattr__(self, 'since', since) 51 | object.__setattr__(self, 'until', until) 52 | object.__setattr__(self, 'language', language) 53 | object.__setattr__(self, 'tweets_limit', tweets_limit) 54 | object.__setattr__(self, 'replies_filter', replies_filter) 55 | return 56 | 57 | def get_full_search_query(self) -> str: 58 | """Method to return full search query.""" 59 | query = '' 60 | if self.all_words is not None: 61 | query += self.all_words 62 | if self.exact_words is not None: 63 | query += f' "{self.exact_words}"' 64 | if self.any_word is not None: 65 | query += f' ({" OR ".join(self.any_word.split(" "))})' 66 | if self.language is not None: 67 | query += f' lang:{self.language.short_value}' 68 | if self.from_username: 69 | query += f' from:{self.from_username}' 70 | if self.since is not None: 71 | query += f" since:{_format_date(self.since)}" 72 | if self.until is not None: 73 | query += f" until:{_format_date(self.until)}" 74 | if self.to_username: 75 | query += f" to:{self.to_username}" 76 | if self.replies_filter is not None: 77 | if self.replies_filter == RepliesFilter.ONLY_REPLIES: 78 | query += " filter:replies" 79 | elif self.replies_filter == RepliesFilter.ONLY_ORIGINAL: 80 | query += " -filter:replies" 81 | return query 82 | -------------------------------------------------------------------------------- /stweet/search_runner/tweet_raw_parser.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import List, Optional 3 | 4 | import arrow 5 | 6 | from ..model.cursor import Cursor 7 | from ..model.tweet_raw import TweetRaw 8 | from ..model.user_raw import UserRaw 9 | 10 | 11 | def get_scroll_cursor(instructions: List[any]) -> Optional[Cursor]: 12 | entries = [ 13 | [entry for entry in instruction['addEntries']['entries']] 14 | for instruction in instructions if 'addEntries' in instruction 15 | ] 16 | entries = [item for sublist in entries for item in sublist] 17 | replace_entries = [ 18 | instruction['replaceEntry']['entry'] 19 | for instruction in instructions if 'replaceEntry' in instruction 20 | ] 21 | entries.extend(replace_entries) 22 | bottom_entries = [it for it in entries if it['entryId'] == 'cursor-bottom-0'] 23 | bottom_entry = None if len(bottom_entries) == 0 else bottom_entries[0] 24 | if bottom_entry is not None: 25 | cursor_raw = bottom_entry['content']['operation']['cursor'] 26 | return Cursor(cursor_raw['cursorType'], cursor_raw['value']) 27 | else: 28 | return None 29 | 30 | 31 | def parse_users(response: str) -> List[UserRaw]: 32 | users_dict = json.loads(response)['globalObjects']['users'] 33 | return [ 34 | UserRaw(json.dumps(users_dict[it]), arrow.now()) 35 | for it in users_dict.keys() 36 | ] 37 | 38 | 39 | def parse_tweets(response: str) -> List[TweetRaw]: 40 | users_dict = json.loads(response)['globalObjects']['tweets'] 41 | return [ 42 | TweetRaw(json.dumps(users_dict[it]), arrow.now()) 43 | for it in users_dict.keys() 44 | ] 45 | -------------------------------------------------------------------------------- /stweet/tweets_by_ids_runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .tweets_by_id_result import TweetsByIdResult 2 | from .tweets_by_id_runner import TweetsByIdRunner 3 | from .tweets_by_id_task import TweetsByIdTask 4 | -------------------------------------------------------------------------------- /stweet/tweets_by_ids_runner/tweet_raw_parser.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import List, Union 3 | 4 | import arrow 5 | 6 | from ..model.cursor import Cursor 7 | from ..model.user_tweet_raw import UserTweetRaw 8 | 9 | 10 | def _parse_tweets_entry_content(entry_content) -> Union[None, Cursor, UserTweetRaw]: 11 | if entry_content['entryType'] == 'TimelineTimelineItem': 12 | item_content = entry_content['itemContent'] 13 | item_content_type = item_content['itemType'] 14 | if item_content_type == 'TimelineTimelineCursor': 15 | return Cursor(item_content['cursorType'], item_content['value']) 16 | elif item_content_type == 'TimelineTweet': 17 | return UserTweetRaw(json.dumps(item_content['tweet_results']['result']), arrow.now()) 18 | elif entry_content['entryType'] == 'TimelineTimelineModule': 19 | item = entry_content['items'][0] 20 | if item['item']['itemContent']['itemType'] == 'TimelineTweet': 21 | return UserTweetRaw(json.dumps(item['item']['itemContent']['tweet_results']['result']), 22 | arrow.now()) 23 | else: 24 | return None 25 | 26 | 27 | def get_all_tweets_from_json(json_str: str) -> List[Union[UserTweetRaw, Cursor]]: 28 | response_obj = json.loads(json_str) 29 | instructions = response_obj['data']['threaded_conversation_with_injections']['instructions'] 30 | tweet_instruction = [it for it in instructions if it['type'] == 'TimelineAddEntries'][0] 31 | entries = tweet_instruction['entries'] 32 | to_return = [_parse_tweets_entry_content(it['content']) for it in entries] 33 | return [it for it in to_return if it is not None] 34 | -------------------------------------------------------------------------------- /stweet/tweets_by_ids_runner/tweets_by_id_context.py: -------------------------------------------------------------------------------- 1 | """Domain TweetsByIdsContext class.""" 2 | 3 | from dataclasses import dataclass 4 | from typing import Optional 5 | 6 | from ..model.cursor import Cursor 7 | 8 | 9 | @dataclass 10 | class TweetsByIdContext: 11 | """Domain TweetsByIdsContext class.""" 12 | 13 | all_download_tweets_count: int 14 | requests_count: int 15 | cursor: Optional[Cursor] 16 | 17 | def __init__( 18 | self, 19 | all_download_tweets: int = 0, 20 | cursor: Optional[Cursor] = None, 21 | requests_count: int = 0 22 | ): 23 | """Class constructor.""" 24 | self.all_download_tweets_count = all_download_tweets 25 | self.cursor = cursor 26 | self.requests_count = requests_count 27 | return 28 | 29 | def add_downloaded_tweets_count_in_request(self, new_tweets_count: int): 30 | """Add download tweet to context counter.""" 31 | self.all_download_tweets_count += new_tweets_count 32 | self.requests_count += 1 33 | -------------------------------------------------------------------------------- /stweet/tweets_by_ids_runner/tweets_by_id_result.py: -------------------------------------------------------------------------------- 1 | """Class with result of TweetSearchRunner task.""" 2 | 3 | from dataclasses import dataclass 4 | from typing import List 5 | 6 | 7 | @dataclass 8 | class TweetsByIdResult: 9 | """Class with result of TweetSearchRunner task.""" 10 | 11 | downloaded_count: int 12 | -------------------------------------------------------------------------------- /stweet/tweets_by_ids_runner/tweets_by_id_runner.py: -------------------------------------------------------------------------------- 1 | """Runner for get tweets by ids.""" 2 | import json 3 | from typing import List, Optional 4 | 5 | from ..exceptions import ScrapBatchBadResponse 6 | from ..http_request import RequestDetails, RequestResponse, WebClient 7 | from ..model import UserTweetRaw 8 | from ..model.cursor import Cursor 9 | from ..raw_output.raw_data_output import RawDataOutput 10 | from ..twitter_api.default_twitter_web_client_provider import \ 11 | DefaultTwitterWebClientProvider 12 | from ..twitter_api.twitter_api_requests import TwitterApiRequests 13 | from .tweet_raw_parser import get_all_tweets_from_json 14 | from .tweets_by_id_context import TweetsByIdContext 15 | from .tweets_by_id_result import TweetsByIdResult 16 | from .tweets_by_id_task import TweetsByIdTask 17 | 18 | _NOT_FOUND_MESSAGE = '_Missing: No status found with that ID.' 19 | 20 | 21 | class TweetsByIdRunner: 22 | tweets_by_id_context: TweetsByIdContext 23 | tweets_by_ids_task: TweetsByIdTask 24 | raw_data_outputs: List[RawDataOutput] 25 | web_client: WebClient 26 | 27 | def __init__( 28 | self, 29 | tweets_by_id_task: TweetsByIdTask, 30 | raw_data_outputs: List[RawDataOutput], 31 | tweets_by_ids_context: Optional[TweetsByIdContext] = None, 32 | web_client: Optional[WebClient] = None, 33 | ): 34 | self.tweets_by_id_context = TweetsByIdContext() if tweets_by_ids_context is None \ 35 | else tweets_by_ids_context 36 | self.tweets_by_ids_task = tweets_by_id_task 37 | self.raw_data_outputs = raw_data_outputs 38 | self.web_client = web_client if web_client is not None \ 39 | else DefaultTwitterWebClientProvider.get_web_client() 40 | return 41 | 42 | def run(self) -> TweetsByIdResult: 43 | """Main search_runner method.""" 44 | while not self._is_end_of_scrapping(): 45 | self._execute_next_tweets_request() 46 | return TweetsByIdResult(self.tweets_by_id_context.all_download_tweets_count) 47 | 48 | def _is_end_of_scrapping(self) -> bool: 49 | ctx = self.tweets_by_id_context 50 | is_cursor = ctx.cursor is not None 51 | was_any_call = ctx.requests_count > 0 52 | return was_any_call and not is_cursor 53 | 54 | @staticmethod 55 | def response_with_not_found(request_response: RequestResponse) -> bool: 56 | parsed = json.loads(request_response.text) 57 | if 'errors' not in parsed: 58 | return False 59 | errors = parsed['errors'] 60 | filtered_errors = [it for it in errors if _NOT_FOUND_MESSAGE == it['message']] 61 | return len(filtered_errors) > 0 62 | 63 | def _execute_next_tweets_request(self): 64 | request_params = self._get_next_request_details() 65 | response = self.web_client.run_request(request_params) 66 | if response.is_success(): 67 | if self.response_with_not_found(response): 68 | self.tweets_by_id_context.add_downloaded_tweets_count_in_request(0) 69 | self.tweets_by_id_context.cursor = None 70 | else: 71 | parsed_list = get_all_tweets_from_json(response.text) 72 | cursors = [it for it in parsed_list if isinstance(it, Cursor)] 73 | cursor = cursors[0] if len(cursors) > 0 else None 74 | user_tweet_raw = [it for it in parsed_list if isinstance(it, UserTweetRaw)] 75 | self.tweets_by_id_context.add_downloaded_tweets_count_in_request(len(user_tweet_raw)) 76 | self.tweets_by_id_context.cursor = cursor 77 | self._process_new_tweets_to_output(user_tweet_raw) 78 | else: 79 | raise ScrapBatchBadResponse(response) 80 | return 81 | 82 | def _process_new_tweets_to_output(self, raw_data_list: List[UserTweetRaw]): 83 | for raw_output in self.raw_data_outputs: 84 | raw_output.export_raw_data(raw_data_list) 85 | return 86 | 87 | def _get_next_request_details(self) -> RequestDetails: 88 | return TwitterApiRequests().get_tweet_request_by_id( 89 | self.tweets_by_ids_task.tweet_id, 90 | self.tweets_by_id_context.cursor 91 | ) 92 | -------------------------------------------------------------------------------- /stweet/tweets_by_ids_runner/tweets_by_id_task.py: -------------------------------------------------------------------------------- 1 | """Domain TweetsByIdsTask class.""" 2 | from dataclasses import dataclass 3 | 4 | 5 | @dataclass(frozen=True) 6 | class TweetsByIdTask: 7 | """Domain TweetsByIdsTask class.""" 8 | 9 | tweet_id: str 10 | 11 | def __init__( 12 | self, 13 | tweet_id: str 14 | ): 15 | """Class constructor.""" 16 | object.__setattr__(self, 'tweet_id', tweet_id) 17 | return 18 | -------------------------------------------------------------------------------- /stweet/twitter_api/__init__.py: -------------------------------------------------------------------------------- 1 | from .default_twitter_web_client_provider import \ 2 | DefaultTwitterWebClientProvider 3 | -------------------------------------------------------------------------------- /stweet/twitter_api/default_twitter_web_client_provider.py: -------------------------------------------------------------------------------- 1 | """DefaultTwitterWebClientProvider class.""" 2 | from tor_python_easy.tor_control_port_client import TorControlPortClient 3 | 4 | from ..auth import SimpleAuthTokenProvider 5 | from ..auth.fail_strategy.tor_ip_change_auth_fail_strategy import \ 6 | TorIpChangeAuthFailStrategy 7 | from ..http_request import (RequestsWebClient, RequestsWebClientProxyConfig, 8 | WebClient) 9 | from .twitter_auth_web_client_interceptor import \ 10 | TwitterAuthWebClientInterceptor 11 | 12 | 13 | class DefaultTwitterWebClientProvider: 14 | 15 | @staticmethod 16 | def get_web_client() -> WebClient: 17 | """Method returns default WebClient.""" 18 | return RequestsWebClient(interceptors=[TwitterAuthWebClientInterceptor()]) 19 | 20 | @staticmethod 21 | def get_web_client_preconfigured_for_tor_proxy( 22 | socks_proxy_url: str, 23 | control_host: str, 24 | control_port: int, 25 | control_password: str 26 | ) -> WebClient: 27 | tor_control_client = TorControlPortClient(control_host, control_port, control_password) 28 | fail_strategy = TorIpChangeAuthFailStrategy(tor_control_client) 29 | auth_token_provider = SimpleAuthTokenProvider(fail_strategy) 30 | return RequestsWebClient( 31 | proxy=RequestsWebClientProxyConfig(socks_proxy_url, socks_proxy_url), 32 | interceptors=[TwitterAuthWebClientInterceptor(auth_token_provider=auth_token_provider)] 33 | ) 34 | -------------------------------------------------------------------------------- /stweet/twitter_api/twitter_api_requests.py: -------------------------------------------------------------------------------- 1 | """Definitions of all api calls.""" 2 | import json 3 | from typing import Optional 4 | 5 | from ..http_request.http_method import HttpMethod 6 | from ..http_request.request_details import RequestDetails 7 | from ..model.cursor import Cursor 8 | 9 | _default_tweets_count_in_batch = 20 10 | 11 | 12 | class TwitterApiRequests: 13 | """Definitions of all api calls.""" 14 | 15 | timeout: int 16 | 17 | def __init__(self, timeout: int = 60): 18 | """Constructor TwitterApiRequests.""" 19 | self.timeout = timeout 20 | 21 | def get_guest_token_request_details(self): 22 | """Method return request details to get guest token.""" 23 | return RequestDetails( 24 | HttpMethod.POST, 25 | 'https://api.twitter.com/1.1/guest/activate.json', 26 | dict(), 27 | dict(), 28 | self.timeout 29 | ) 30 | 31 | def get_search_tweet_request_details_new_api( 32 | self, 33 | all_download_tweets_count: int, 34 | cursor: Cursor, 35 | tweets_limit: Optional[int], 36 | full_search_query: str 37 | ) -> RequestDetails: 38 | count = _default_tweets_count_in_batch \ 39 | if tweets_limit is None \ 40 | else min(_default_tweets_count_in_batch, tweets_limit - all_download_tweets_count) 41 | params = dict([ 42 | ('include_profile_interstitial_type', '1'), 43 | ('include_blocking', '1'), 44 | ('include_blocked_by', '1'), 45 | ('include_followed_by', '1'), 46 | ('include_want_retweets', '1'), 47 | ('include_mute_edge', '1'), 48 | ('include_can_dm', '1'), 49 | ('include_can_media_tag', '1'), 50 | ('skip_status', '1'), 51 | ('cards_platform', 'Web-12'), 52 | ('include_cards', '1'), 53 | ('include_ext_alt_text', 'true'), 54 | ('include_quote_count', 'true'), 55 | ('include_reply_count', '1'), 56 | ('tweet_mode', 'extended'), 57 | ('include_entities', 'true'), 58 | ('include_user_entities', 'true'), 59 | ('include_ext_media_color', 'true'), 60 | ('include_ext_media_availability', 'true'), 61 | ('send_error_codes', 'true'), 62 | ('simple_quoted_tweet', 'true'), 63 | ('q', full_search_query), 64 | ('count', count), 65 | ('query_source', 'typed_query'), 66 | ('pc', '1'), 67 | ('spelling_corrections', '1'), 68 | ('ext', 'mediaStats,highlightedLabel,voiceInfo') 69 | ]) 70 | if cursor is not None: 71 | params['cursor'] = cursor.value 72 | return RequestDetails( 73 | HttpMethod.GET, 74 | url='https://twitter.com/i/api/2/search/adaptive.json', 75 | headers=dict(), 76 | params=params, 77 | timeout=self.timeout 78 | ) 79 | 80 | def get_user_details_request_details(self, user_screen_name: str) -> RequestDetails: 81 | variable_query = { 82 | "screen_name": user_screen_name, 83 | "withSafetyModeUserFields": True, 84 | "withSuperFollowsUserFields": True 85 | } 86 | _graphql_token = 'cYsDlVss-qimNYmNlb6inw' # token generated for ony request in browser 87 | return RequestDetails( 88 | http_method=HttpMethod.GET, 89 | url=f'https://twitter.com/i/api/graphql/{_graphql_token}/UserByScreenName', 90 | headers=dict(), 91 | params=dict({ 92 | 'variables': json.dumps(variable_query) 93 | }), 94 | timeout=self.timeout 95 | ) 96 | 97 | def get_tweet_request_by_id(self, tweet_id: str, cursor: Optional[Cursor]) -> RequestDetails: 98 | variable_query = { 99 | "focalTweetId": tweet_id, 100 | "with_rux_injections": True, 101 | "includePromotedContent": True, 102 | "withCommunity": True, 103 | "withTweetQuoteCount": True, 104 | "withBirdwatchNotes": True, 105 | "withSuperFollowsUserFields": True, 106 | "withUserResults": True, 107 | "withBirdwatchPivots": True, 108 | "withReactionsMetadata": True, 109 | "withReactionsPerspective": True, 110 | "withSuperFollowsTweetFields": True, 111 | "withVoice": True 112 | } 113 | if cursor is not None: 114 | variable_query['cursor'] = cursor.value 115 | _graphql_token = 'kUnCMgMYZCR8GyRZz76IQg' 116 | return RequestDetails( 117 | http_method=HttpMethod.GET, 118 | url=f'https://twitter.com/i/api/graphql/{_graphql_token}/TweetDetail', 119 | headers=dict(), 120 | params=dict({ 121 | 'variables': json.dumps(variable_query) 122 | }), 123 | timeout=self.timeout 124 | ) 125 | -------------------------------------------------------------------------------- /stweet/twitter_api/twitter_auth_web_client_interceptor.py: -------------------------------------------------------------------------------- 1 | """Class of TwitterAuthWebClientInterceptor.""" 2 | from threading import Lock 3 | from typing import List, Optional 4 | 5 | from stweet.auth import AuthTokenProvider, SimpleAuthTokenProvider 6 | from stweet.exceptions.too_many_requests_exception import \ 7 | TooManyRequestsException 8 | from stweet.http_request import (RequestDetails, RequestResponse, 9 | RequestsWebClient, WebClient) 10 | 11 | _AUTH_TOKEN = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4p' \ 12 | 'uTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA' 13 | _MAX_TRIES = 5 14 | 15 | 16 | class TwitterAuthWebClientInterceptor(WebClient.WebClientInterceptor): 17 | """Class of TwitterAuthWebClientInterceptor. 18 | 19 | Interceptor allows to simple manage auth requests. 20 | """ 21 | 22 | _current_token: Optional[str] 23 | _auth_token_provider: AuthTokenProvider 24 | _quest_token_lock: Lock 25 | 26 | def __init__( 27 | self, 28 | init_auth_token: Optional[str] = None, 29 | auth_token_provider: Optional[AuthTokenProvider] = None 30 | ): 31 | """Constructor of AuthWebClientInterceptor.""" 32 | self._current_token = init_auth_token 33 | self._auth_token_provider = auth_token_provider \ 34 | if auth_token_provider is not None \ 35 | else SimpleAuthTokenProvider() 36 | self._quest_token_lock = Lock() 37 | 38 | def _add_auth_token(self, request_details: RequestDetails): 39 | request_details.headers['Authorization'] = _AUTH_TOKEN 40 | 41 | def _add_guest_token(self, request_details: RequestDetails, web_client: WebClient): 42 | if self._current_token is None: 43 | self._call_for_new_auth_request(web_client) 44 | request_details.headers['x-guest-token'] = self._current_token 45 | 46 | def _is_auth_token_to_add(self, request_details: RequestDetails) -> bool: 47 | return 'http://api.twitter.com' in request_details.url \ 48 | or 'https://api.twitter.com' in request_details.url \ 49 | or 'https://twitter.com/i/api' in request_details.url 50 | 51 | def _is_guest_token_to_add(self, request_details: RequestDetails) -> bool: 52 | if 'https://twitter.com/i/api/graphql/' in request_details.url: 53 | return True 54 | is_guest_request = '/1.1/guest/activate.json' in request_details.url 55 | return self._is_auth_token_to_add(request_details) and not is_guest_request 56 | 57 | def _call_for_new_auth_request(self, web_client: WebClient): 58 | old_token = self._current_token 59 | with self._quest_token_lock: 60 | if old_token == self._current_token: 61 | self._current_token = self._auth_token_provider.get_new_token(web_client) 62 | 63 | def intercept( 64 | self, 65 | requests_details: RequestDetails, 66 | next_interceptors: List[WebClient.WebClientInterceptor], 67 | web_client: RequestsWebClient 68 | ) -> RequestResponse: 69 | """Method intercepts request. It manage with auth headers.""" 70 | need_guest_token = self._is_guest_token_to_add(requests_details) 71 | if self._is_auth_token_to_add(requests_details): 72 | self._add_auth_token(requests_details) 73 | 74 | if need_guest_token: 75 | self._add_guest_token(requests_details, web_client) 76 | 77 | response: Optional[RequestResponse] = None 78 | tries_counter = 0 79 | 80 | while tries_counter < _MAX_TRIES and (response is None or response.is_429()): 81 | if need_guest_token and response is not None: 82 | self._call_for_new_auth_request(web_client) 83 | self._add_guest_token(requests_details, web_client) 84 | response = self.get_response(requests_details, next_interceptors, web_client) 85 | tries_counter = tries_counter + 1 86 | 87 | if response.is_429(): 88 | raise TooManyRequestsException(requests_details.url) 89 | 90 | return response 91 | -------------------------------------------------------------------------------- /test-services-docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | services: 3 | squid_proxy: 4 | image: docker.io/salrashid123/squidproxy 5 | command: /apps/squid/sbin/squid -NsY -f /apps/squid.conf.forward 6 | ports: 7 | - "3128:3128" 8 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markowanga/stweet/fe34e98254dc7646bde6e083b5f6f745a0ee8cb6/tests/__init__.py -------------------------------------------------------------------------------- /tests/integration/all_languages_test.py: -------------------------------------------------------------------------------- 1 | import stweet as st 2 | from tests.test_util import tweet_list_assert_condition 3 | 4 | 5 | def run_test_for_single_language(language: st.Language): 6 | search_tweets_task = st.SearchTweetsTask( 7 | all_words='#covid19', 8 | tweets_limit=10, 9 | language=language 10 | ) 11 | tweets_collector = st.CollectorTweetOutput() 12 | st.TweetSearchRunner( 13 | search_tweets_task=search_tweets_task, 14 | tweet_outputs=[tweets_collector] 15 | ).run() 16 | tweet_list_assert_condition( 17 | tweets_collector.get_raw_list(), 18 | lambda tweet: tweet.lang in language.short_value 19 | ) 20 | 21 | 22 | def test_search_in_all_languages(): 23 | for language in st.Language: 24 | run_test_for_single_language(language) 25 | -------------------------------------------------------------------------------- /tests/integration/exception_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import stweet as st 4 | from stweet import WebClient 5 | from stweet.auth import SimpleAuthTokenProvider 6 | from stweet.exceptions import RefreshTokenException, ScrapBatchBadResponse 7 | from stweet.exceptions.too_many_requests_exception import TooManyRequestsException 8 | from stweet.http_request import RequestResponse 9 | from stweet.twitter_api.twitter_auth_web_client_interceptor import TwitterAuthWebClientInterceptor 10 | from tests.mock_web_client import MockWebClient 11 | 12 | 13 | def get_client_with_default_response(response: RequestResponse = RequestResponse(None, None)) -> WebClient: 14 | return MockWebClient( 15 | default_response=response, 16 | interceptors=[TwitterAuthWebClientInterceptor()] 17 | ) 18 | 19 | 20 | def test_get_simple_auth_token_with_incorrect_response_1(): 21 | with pytest.raises(RefreshTokenException): 22 | SimpleAuthTokenProvider().get_new_token(get_client_with_default_response(RequestResponse(400, None))) 23 | 24 | 25 | def test_get_auth_token_with_incorrect_response_2(): 26 | with pytest.raises(TooManyRequestsException): 27 | SimpleAuthTokenProvider(50, 150).get_new_token(get_client_with_default_response(RequestResponse(429, None))) 28 | 29 | 30 | def test_get_auth_token_with_incorrect_response_3(): 31 | with pytest.raises(RefreshTokenException): 32 | SimpleAuthTokenProvider().get_new_token(get_client_with_default_response(RequestResponse(200, '{}'))) 33 | 34 | 35 | def test_get_auth_token_with_incorrect_response_4(): 36 | with pytest.raises(RefreshTokenException): 37 | SimpleAuthTokenProvider().get_new_token(get_client_with_default_response(RequestResponse(200, 'LALA'))) 38 | 39 | 40 | def test_runner_exceptions(): 41 | class TokenExpiryExceptionWebClient(st.WebClient): 42 | 43 | count_dict = dict({ 44 | 'https://api.twitter.com/2/search/adaptive.json': 0, 45 | 'https://api.twitter.com/1.1/guest/activate.json': 0 46 | }) 47 | 48 | def run_clear_request(self, params: st.http_request.RequestDetails) -> st.http_request.RequestResponse: 49 | self.count_dict[params.url] = self.count_dict[params.url] + 1 50 | if params.url == 'https://api.twitter.com/2/search/adaptive.json': 51 | if self.count_dict[params.url] == 1: 52 | return st.http_request.RequestResponse(429, None) 53 | else: 54 | return st.http_request.RequestResponse(400, '') 55 | else: 56 | return st.http_request.RequestResponse(200, '{"guest_token":"1350356785648062465"}') 57 | 58 | with pytest.raises(ScrapBatchBadResponse): 59 | search_tweets_task = st.SearchTweetsTask( 60 | all_words='#koronawirus' 61 | ) 62 | st.TweetSearchRunner( 63 | search_tweets_task=search_tweets_task, 64 | tweet_outputs=[], 65 | web_client=TokenExpiryExceptionWebClient(interceptors=[TwitterAuthWebClientInterceptor()]), 66 | ).run() 67 | 68 | 69 | def test_get_not_existing_user(): 70 | task = st.GetUsersTask(['fcbewkjdsncvjwkfs']) 71 | result = st.GetUsersRunner(task, []).run() 72 | assert result.users_count == 0 73 | -------------------------------------------------------------------------------- /tests/integration/export_import_test.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import pytest 4 | 5 | import stweet as st 6 | from tests.test_util import get_temp_test_file_name, two_lists_assert_equal 7 | 8 | # pytest.fixture(autouse=True)(run_around_tests) 9 | 10 | 11 | def get_tweets() -> List[st.UserTweetRaw]: 12 | collect_tweet_output = st.CollectorTweetOutput() 13 | task = st.SearchTweetsTask(all_words="#covid19", tweets_limit=100) 14 | st.TweetSearchRunner(task, [collect_tweet_output]).run() 15 | return collect_tweet_output.get_raw_list() 16 | 17 | 18 | def get_users() -> List[st.User]: 19 | collect_user_output = st.CollectorUserOutput() 20 | task = st.GetUsersTask(list(set([tweet.user_name for tweet in get_tweets()]))[:10]) 21 | st.GetUsersRunner(task, [collect_user_output]).run() 22 | return collect_user_output.get_scrapped_users() 23 | 24 | 25 | def test_tweet_json_lines_serialization(): 26 | jl_filename = get_temp_test_file_name('jl') 27 | tweets = get_tweets() 28 | st.export_tweets_to_json_lines(tweets, jl_filename) 29 | imported_tweets = st.read_tweets_from_json_lines_file(jl_filename) 30 | two_lists_assert_equal(imported_tweets, tweets) 31 | 32 | 33 | def test_tweet_csv_serialization(): 34 | csv_filename = get_temp_test_file_name('csv') 35 | tweets = get_tweets() 36 | st.export_tweets_to_csv(tweets, csv_filename) 37 | imported_tweets = st.read_tweets_from_csv_file(csv_filename) 38 | two_lists_assert_equal(imported_tweets, tweets) 39 | 40 | 41 | def test_user_json_lines_serialization(): 42 | jl_filename = get_temp_test_file_name('jl') 43 | users = get_users() 44 | st.export_users_to_json_lines(users, jl_filename) 45 | imported_tweets = st.read_users_from_json_lines_file(jl_filename) 46 | two_lists_assert_equal(imported_tweets, users) 47 | 48 | 49 | def test_user_csv_serialization(): 50 | csv_filename = get_temp_test_file_name('csv') 51 | users = get_users() 52 | st.export_users_to_csv(users, csv_filename) 53 | imported_tweets = st.read_users_from_csv_file(csv_filename) 54 | two_lists_assert_equal(users, imported_tweets) 55 | -------------------------------------------------------------------------------- /tests/integration/get_tweet_by_id_test.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | import stweet as st 4 | from stweet import RequestsWebClient 5 | from stweet.http_request import RequestDetails, RequestResponse 6 | from stweet.http_request.interceptor.logging_requests_web_client_interceptor import \ 7 | LoggingRequestsWebClientInterceptor 8 | from stweet.twitter_api.twitter_auth_web_client_interceptor import TwitterAuthWebClientInterceptor 9 | 10 | _TWITTER_JSON_NO_TWEETS = '{"globalObjects":{"tweets":{},"users":{},"moments":{},"cards":{},"places":{}' \ 11 | ',"media":{},"broadcasts":{},"topics":{},"lists":{}},"timeline":{"id":"search' \ 12 | '-6749090958448035293","instructions":[{"addEntries":{"entries":[{"entryId":"' \ 13 | 'sq-cursor-top","sortIndex":"999999999","content":{"operation":{"cursor":{"va' \ 14 | 'lue":"refresh:thGAVUV0VFVBYBFgESNQAVACUAERXsiHoVgIl6GAdERUZBVUxUFQAVABUBFQAV' \ 15 | 'AAA=","cursorType":"Top"}}}},{"entryId":"sq-cursor-bottom","sortIndex":"0","' \ 16 | 'content":{"operation":{"cursor":{"value":"scroll:thGAVUV0VFVBYBFgESNQAVACUAE' \ 17 | 'RXsiHoVgIl6GAdERUZBVUxUFQAVABUBFQAVAAA=","cursorType":"Bottom"}}}}]}}]}}' 18 | 19 | 20 | class CustomAdapter(RequestsWebClient): 21 | 22 | def __init__(self, override: List[Tuple[str, RequestResponse]]): 23 | super().__init__() 24 | self.override = override 25 | 26 | def run_request(self, params: RequestDetails) -> RequestResponse: 27 | filtered = [it for it in self.override if it[0] == params.url] 28 | if len(filtered) > 0: 29 | return filtered[0][1] 30 | else: 31 | return super().run_request(params) 32 | 33 | 34 | def test_get_tweets_by_ids(): 35 | tweets_ids = ['1337071849772093442', '1337067073051238400'] 36 | task = st.TweetsByIdTask(tweets_ids) 37 | collect_output = st.CollectorTweetOutput() 38 | result = st.TweetsByIdRunner(task, [collect_output], 39 | web_client=RequestsWebClient( 40 | interceptors=[LoggingRequestsWebClientInterceptor(), 41 | TwitterAuthWebClientInterceptor()])).run() 42 | scrapped_tweets_ids = [it.id_str for it in collect_output.get_raw_list()] 43 | assert result.downloaded_count == 1 44 | assert len(collect_output.get_raw_list()) == 1 45 | assert len(result.tweet_ids_not_scrapped) == 1 46 | 47 | 48 | def test_get_not_existing_tweet(): 49 | tweets_ids = ['1337071849772093442'] 50 | task = st.TweetsByIdTask(tweets_ids) 51 | collect_output = st.CollectorTweetOutput() 52 | result = st.TweetsByIdRunner( 53 | task, 54 | [collect_output], 55 | web_client=CustomAdapter( 56 | [('https://cdn.syndication.twimg.com/tweet', RequestResponse(404, ''))]) 57 | ).run() 58 | assert result.downloaded_count == 0 59 | assert len(result.tweet_ids_not_scrapped) == 1 60 | 61 | 62 | def test_get_not_existing_tweet_in_twitter(): 63 | tweets_ids = ['1337071849772093442'] 64 | task = st.TweetsByIdTask(tweets_ids) 65 | collect_output = st.CollectorTweetOutput() 66 | result = st.TweetsByIdRunner( 67 | task, 68 | [collect_output], 69 | web_client=CustomAdapter( 70 | [('https://api.twitter.com/2/search/adaptive.json', 71 | RequestResponse(200, _TWITTER_JSON_NO_TWEETS))] 72 | ) 73 | ).run() 74 | assert result.downloaded_count == 0 75 | assert len(result.tweet_ids_not_scrapped) == 1 76 | 77 | 78 | test_get_tweets_by_ids() 79 | -------------------------------------------------------------------------------- /tests/integration/get_user_test.py: -------------------------------------------------------------------------------- 1 | import stweet as st 2 | 3 | _usernames = ['ProtasiewiczJ', 'donaldtuskEPP'] 4 | 5 | 6 | def test_get_user(): 7 | task = st.GetUsersTask(_usernames) 8 | task_result = st.GetUsersRunner(task, [st.PrintUserOutput()]).run() 9 | assert len(_usernames) == task_result.users_count 10 | -------------------------------------------------------------------------------- /tests/integration/import_older_version_test.py: -------------------------------------------------------------------------------- 1 | import stweet as st 2 | 3 | _RESOURCES_PATH = 'tests/resources' 4 | 5 | 6 | def test_tweets_csv_import_v1_1_2(): 7 | csv_filename = f'{_RESOURCES_PATH}/tweets_v1.1.2.csv' 8 | tweets_from_csv = st.read_tweets_from_csv_file(csv_filename) 9 | assert len(tweets_from_csv) == 9 10 | 11 | 12 | def test_tweets_json_import_v1_1_2(): 13 | jl_filename = f'{_RESOURCES_PATH}/tweets_v1.1.2.jl' 14 | tweets_from_csv = st.read_tweets_from_json_lines_file(jl_filename) 15 | assert len(tweets_from_csv) == 9 16 | 17 | 18 | def test_user_csv_import_v1_3_0(): 19 | csv_filename = f'{_RESOURCES_PATH}/users_v1.3.0.csv' 20 | users = st.read_users_from_csv_file(csv_filename) 21 | assert len(users) == 2 22 | 23 | 24 | def test_user_json_import_v1_3_0(): 25 | jl_filename = f'{_RESOURCES_PATH}/users_v1.3.0.jl' 26 | users = st.read_users_from_json_lines_file(jl_filename) 27 | assert len(users) == 2 28 | -------------------------------------------------------------------------------- /tests/integration/interceptor_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from io import StringIO 3 | 4 | import stweet as st 5 | from stweet.http_request import HttpMethod 6 | from stweet.http_request.interceptor.logging_requests_web_client_interceptor import LoggingRequestsWebClientInterceptor 7 | from stweet.http_request.interceptor.params_response_log_web_client_interceptor import \ 8 | ParamsResponseLogWebClientInterceptor 9 | from stweet.twitter_api.twitter_api_requests import TwitterApiRequests 10 | 11 | 12 | def get_example_request_details() -> st.http_request.RequestDetails: 13 | return st.http_request.RequestDetails( 14 | http_method=HttpMethod.GET, 15 | url='https://api.github.com/events', 16 | params=dict({}), 17 | headers=dict({}), 18 | timeout=200 19 | ) 20 | 21 | 22 | def start_redirect_output() -> StringIO: 23 | captured_output = StringIO() 24 | sys.stdout = captured_output 25 | sys.stderr = captured_output 26 | return captured_output 27 | 28 | 29 | def stop_redirect_output(): 30 | sys.stdout = sys.__stdout__ 31 | sys.stderr = sys.__stderr__ 32 | 33 | 34 | def test_logging_requests_web_client_interceptor(): 35 | captured_output = start_redirect_output() 36 | request = TwitterApiRequests().get_guest_token_request_details() 37 | st.RequestsWebClient(interceptors=[LoggingRequestsWebClientInterceptor()]).run_request(request) 38 | stop_redirect_output() 39 | content = captured_output.getvalue() 40 | assert "send: b'POST /1.1/guest/activate.json HTTP/1.1" in content 41 | 42 | 43 | def test_params_response_log_web_client_interceptor(): 44 | captured_output = start_redirect_output() 45 | st.RequestsWebClient(interceptors=[ParamsResponseLogWebClientInterceptor()]).run_request( 46 | TwitterApiRequests().get_guest_token_request_details()) 47 | stop_redirect_output() 48 | content = captured_output.getvalue() 49 | assert "RequestDetails(" in content 50 | assert "RequestResponse(" in content 51 | -------------------------------------------------------------------------------- /tests/integration/large_iterator_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import stweet as st 4 | from tests.test_util import get_temp_test_file_name, two_lists_assert_equal, get_tweets_to_tweet_output_test 5 | from tests.test_util import get_users_to_tweet_output_test 6 | 7 | 8 | def test_user_json_lines_read_iterator(): 9 | file_name = get_temp_test_file_name('jl') 10 | collector = st.CollectorUserOutput() 11 | get_users_to_tweet_output_test([collector, st.JsonLineFileUserOutput(file_name)]) 12 | iterator = st.UserJsonLineFileIterator(file_name, 2) 13 | list_from_iterator = [] 14 | iterator.open() 15 | while True: 16 | try: 17 | list_from_iterator.extend(next(iterator)) 18 | except StopIteration: 19 | break 20 | iterator.close() 21 | two_lists_assert_equal(list_from_iterator, collector.get_scrapped_users()) 22 | 23 | 24 | def test_user_csv_read_iterator(): 25 | file_name = get_temp_test_file_name('csv') 26 | collector = st.CollectorUserOutput() 27 | get_users_to_tweet_output_test([collector, st.CsvUserOutput(file_name)]) 28 | iterator = st.UserCsvFileIterator(file_name, 4) 29 | list_from_iterator = [] 30 | iterator.open() 31 | while True: 32 | try: 33 | list_from_iterator.extend(next(iterator)) 34 | except StopIteration: 35 | break 36 | two_lists_assert_equal(list_from_iterator, collector.get_scrapped_users()) 37 | 38 | 39 | def test_tweet_json_lines_read_iterator(): 40 | file_name = get_temp_test_file_name('jl') 41 | collector = st.CollectorTweetOutput() 42 | get_tweets_to_tweet_output_test([collector, st.JsonLineFileTweetOutput(file_name)]) 43 | iterator = st.TweetJsonLineFileIterator(file_name, 4) 44 | list_from_iterator = [] 45 | iterator.open() 46 | while True: 47 | try: 48 | list_from_iterator.extend(next(iterator)) 49 | except StopIteration: 50 | break 51 | iterator.close() 52 | two_lists_assert_equal(list_from_iterator, collector.get_raw_list()) 53 | 54 | 55 | def test_tweet_csv_read_iterator(): 56 | file_name = get_temp_test_file_name('csv') 57 | collector = st.CollectorTweetOutput() 58 | get_tweets_to_tweet_output_test([collector, st.CsvTweetOutput(file_name)]) 59 | iterator = st.TweetCsvFileIterator(file_name, 4) 60 | list_from_iterator = [] 61 | iterator.open() 62 | while True: 63 | try: 64 | list_from_iterator.extend(next(iterator)) 65 | except StopIteration: 66 | break 67 | two_lists_assert_equal(list_from_iterator, collector.get_raw_list()) 68 | -------------------------------------------------------------------------------- /tests/integration/parse_media_test.py: -------------------------------------------------------------------------------- 1 | import stweet as st 2 | 3 | 4 | def test_scrap_tweet_with_single_media(): 5 | tweets_ids = ['1357358278746005508'] 6 | collector = st.CollectorTweetOutput() 7 | st.TweetsByIdRunner(st.TweetsByIdTask(tweets_ids), [collector]).run() 8 | tweets = collector.get_raw_list() 9 | assert len(tweets) == 1 10 | assert len(tweets[0].media) == 1 11 | 12 | 13 | def test_scrap_tweet_with_double_media(): 14 | tweets_ids = ['1115978039534297088'] 15 | collector = st.CollectorTweetOutput() 16 | st.TweetsByIdRunner(st.TweetsByIdTask(tweets_ids), [collector]).run() 17 | tweets = collector.get_raw_list() 18 | assert len(tweets) == 1 19 | assert len(tweets[0].media) == 2 20 | -------------------------------------------------------------------------------- /tests/integration/print_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from io import StringIO 3 | 4 | import stweet as st 5 | from tests.test_util import get_tweets_to_tweet_output_test, get_users_to_tweet_output_test 6 | from tests.tweet_output_export_call_counter import TweetOutputExportCallCounter 7 | from tests.tweet_output_tweets_counter import TweetOutputTweetsCounter 8 | 9 | 10 | def test_print_all_tweet_output(): 11 | captured_output = StringIO() 12 | sys.stdout = captured_output 13 | tweets_collector = st.CollectorTweetOutput() 14 | get_tweets_to_tweet_output_test([ 15 | st.PrintTweetOutput(), 16 | tweets_collector 17 | ]) 18 | sys.stdout = sys.__stdout__ 19 | assert captured_output.getvalue().count('Tweet(') == len(tweets_collector.get_raw_list()) 20 | 21 | 22 | def test_print_all_user_output(): 23 | captured_output = StringIO() 24 | sys.stdout = captured_output 25 | users_collector = st.CollectorUserOutput() 26 | get_users_to_tweet_output_test([ 27 | st.PrintUserOutput(), 28 | users_collector 29 | ]) 30 | sys.stdout = sys.__stdout__ 31 | assert captured_output.getvalue().count('User(') == len(users_collector.get_scrapped_users()) 32 | 33 | 34 | def test_print_batch_single_tweet_tweet_output(): 35 | captured_output = StringIO() 36 | sys.stdout = captured_output 37 | tweet_output_counter = TweetOutputExportCallCounter() 38 | get_tweets_to_tweet_output_test([ 39 | st.PrintFirstInRequestTweetOutput(), 40 | tweet_output_counter 41 | ]) 42 | sys.stdout = sys.__stdout__ 43 | print_tweet_count = captured_output.getvalue().count('Tweet(') 44 | print_no_tweets_line = captured_output.getvalue().count('PrintFirstInRequestTweetOutput -- no tweets to print') 45 | assert (print_tweet_count + print_no_tweets_line) == tweet_output_counter.get_output_call_count() 46 | 47 | 48 | def test_print_each_n_tweet_tweet_output(): 49 | captured_output = StringIO() 50 | each_n = 7 51 | sys.stdout = captured_output 52 | tweet_output_counter = TweetOutputTweetsCounter() 53 | get_tweets_to_tweet_output_test([ 54 | st.PrintEveryNTweetOutput(each_n), 55 | tweet_output_counter 56 | ]) 57 | sys.stdout = sys.__stdout__ 58 | print_tweet_count = captured_output.getvalue().count('Tweet(') 59 | assert print_tweet_count == int(tweet_output_counter.get_output_call_count() / each_n) 60 | 61 | 62 | def test_print_each_n_tweet_user_output(): 63 | captured_output = StringIO() 64 | each_n = 2 65 | sys.stdout = captured_output 66 | scrap_result = get_users_to_tweet_output_test([st.PrintEveryNUserOutput(each_n)]) 67 | sys.stdout = sys.__stdout__ 68 | print_tweet_count = captured_output.getvalue().count('User(') 69 | assert print_tweet_count == int(scrap_result.users_count / each_n) 70 | -------------------------------------------------------------------------------- /tests/integration/proxy_client_requests_test.py: -------------------------------------------------------------------------------- 1 | import stweet as st 2 | from stweet.twitter_api.twitter_auth_web_client_interceptor import TwitterAuthWebClientInterceptor 3 | 4 | 5 | def test_using_proxy_client(): 6 | task = st.SearchTweetsTask( 7 | all_words='#covid19', 8 | tweets_limit=200 9 | ) 10 | proxy_client = st.RequestsWebClient( 11 | proxy=st.RequestsWebClientProxyConfig( 12 | http_proxy='http://localhost:3128', 13 | https_proxy='http://localhost:3128' 14 | ), 15 | interceptors=[TwitterAuthWebClientInterceptor()] 16 | ) 17 | tweets_collector = st.CollectorTweetOutput() 18 | result = st.TweetSearchRunner( 19 | search_tweets_task=task, 20 | tweet_outputs=[tweets_collector], 21 | web_client=proxy_client 22 | ).run() 23 | scrapped_tweets = tweets_collector.get_raw_list() 24 | assert isinstance(result, st.SearchTweetsResult) 25 | assert len(scrapped_tweets) == task.tweets_limit 26 | -------------------------------------------------------------------------------- /tests/integration/reply_filter_test.py: -------------------------------------------------------------------------------- 1 | import stweet as st 2 | from tests.test_util import tweet_list_assert_condition 3 | 4 | 5 | def test_search_as_replay(): 6 | search_tweets_task = st.SearchTweetsTask( 7 | all_words='#covid19', 8 | tweets_limit=500, 9 | replies_filter=st.RepliesFilter.ONLY_REPLIES 10 | ) 11 | tweets_collector = st.CollectorTweetOutput() 12 | st.TweetSearchRunner( 13 | search_tweets_task=search_tweets_task, 14 | tweet_outputs=[tweets_collector] 15 | ).run() 16 | tweet_list_assert_condition( 17 | tweets_collector.get_raw_list(), 18 | lambda tweet: len(tweet.in_reply_to_status_id_str + tweet.in_reply_to_user_id_str) > 0 19 | ) 20 | 21 | 22 | def test_search_as_not_replay(): 23 | search_tweets_task = st.SearchTweetsTask( 24 | all_words='#covid19', 25 | tweets_limit=500, 26 | replies_filter=st.RepliesFilter.ONLY_ORIGINAL 27 | ) 28 | tweets_collector = st.CollectorTweetOutput() 29 | st.TweetSearchRunner( 30 | search_tweets_task=search_tweets_task, 31 | tweet_outputs=[tweets_collector] 32 | ).run() 33 | tweet_list_assert_condition( 34 | tweets_collector.get_raw_list(), 35 | lambda tweet: len(tweet.in_reply_to_status_id_str + tweet.in_reply_to_user_id_str) == 0 36 | ) 37 | -------------------------------------------------------------------------------- /tests/integration/search_in_language_test.py: -------------------------------------------------------------------------------- 1 | import stweet as st 2 | from tests.test_util import tweet_list_assert_condition 3 | 4 | 5 | def _run_search_test_covid_tweets_in_language(language: st.Language): 6 | search_tweets_task = st.SearchTweetsTask( 7 | all_words='#covid19', 8 | tweets_limit=100, 9 | language=language 10 | ) 11 | tweets_collector = st.CollectorTweetOutput() 12 | st.TweetSearchRunner( 13 | search_tweets_task=search_tweets_task, 14 | tweet_outputs=[tweets_collector] 15 | ).run() 16 | tweet_list_assert_condition( 17 | tweets_collector.get_raw_list(), 18 | lambda tweet: tweet.lang == language.short_value 19 | ) 20 | 21 | 22 | def test_search_tweets_in_english(): 23 | _run_search_test_covid_tweets_in_language(st.Language.ENGLISH) 24 | 25 | 26 | def test_search_tweets_in_polish(): 27 | _run_search_test_covid_tweets_in_language(st.Language.ENGLISH) 28 | 29 | 30 | def test_search_tweets_in_german(): 31 | _run_search_test_covid_tweets_in_language(st.Language.GERMAN) 32 | -------------------------------------------------------------------------------- /tests/integration/search_return_objest_test.py: -------------------------------------------------------------------------------- 1 | import stweet as st 2 | 3 | 4 | def test_return_tweets_objects(): 5 | phrase = '#koronawirus' 6 | search_tweets_task = st.SearchTweetsTask( 7 | all_words=phrase, 8 | tweets_limit=200 9 | ) 10 | tweets_collector = st.CollectorTweetOutput() 11 | result = st.TweetSearchRunner( 12 | search_tweets_task=search_tweets_task, 13 | tweet_outputs=[tweets_collector] 14 | ).run() 15 | scrapped_tweets = tweets_collector.get_raw_list() 16 | assert isinstance(result, st.SearchTweetsResult) 17 | assert result.downloaded_count == len(scrapped_tweets) 18 | assert result.downloaded_count > 0 19 | assert all([phrase in it.full_text for it in scrapped_tweets if phrase in it.full_text]) is True 20 | -------------------------------------------------------------------------------- /tests/integration/serialization_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import stweet as st 4 | from tests.test_util import get_temp_test_file_name, get_tweets_to_tweet_output_test, \ 5 | two_lists_assert_equal 6 | 7 | 8 | def test_csv_serialization(): 9 | csv_filename = get_temp_test_file_name('csv') 10 | tweets_collector = st.CollectorTweetOutput() 11 | get_tweets_to_tweet_output_test([ 12 | st.CsvTweetOutput(csv_filename), 13 | tweets_collector 14 | ]) 15 | tweets_from_csv = st.read_tweets_from_csv_file(csv_filename) 16 | two_lists_assert_equal(tweets_from_csv, tweets_collector.get_raw_list()) 17 | 18 | 19 | def test_file_json_lines_serialization(): 20 | jl_filename = get_temp_test_file_name('jl') 21 | tweets_collector = st.CollectorTweetOutput() 22 | get_tweets_to_tweet_output_test([ 23 | st.JsonLineFileTweetOutput(jl_filename), 24 | tweets_collector 25 | ]) 26 | tweets_from_jl = st.read_tweets_from_json_lines_file(jl_filename) 27 | two_lists_assert_equal(tweets_from_jl, tweets_collector.get_raw_list()) 28 | -------------------------------------------------------------------------------- /tests/integration/time_period_test.py: -------------------------------------------------------------------------------- 1 | from arrow import Arrow 2 | 3 | import stweet as st 4 | from tests.test_util import tweet_list_assert_condition 5 | 6 | 7 | def _run_test_between_dates(since: Arrow, until: Arrow): 8 | search_tweets_task = st.SearchTweetsTask( 9 | any_word="#koronawirus #covid19", 10 | since=since, 11 | until=until 12 | ) 13 | tweets_collector = st.CollectorTweetOutput() 14 | st.TweetSearchRunner( 15 | search_tweets_task=search_tweets_task, 16 | tweet_outputs=[tweets_collector] 17 | ).run() 18 | tweet_list_assert_condition( 19 | tweets_collector.get_raw_list(), 20 | lambda tweet: since <= tweet.created_at <= until 21 | ) 22 | 23 | 24 | def test_for_polish_timezone(): 25 | _run_test_between_dates( 26 | since=Arrow(year=2020, month=6, day=11, hour=7), 27 | until=Arrow(year=2020, month=6, day=11, hour=8) 28 | ) 29 | 30 | 31 | def test_for_utc_timezone(): 32 | tz = 'Europe/Warsaw' 33 | _run_test_between_dates( 34 | since=Arrow(year=2020, month=6, day=11, hour=7, tzinfo=tz), 35 | until=Arrow(year=2020, month=6, day=11, hour=8, tzinfo=tz) 36 | ) 37 | -------------------------------------------------------------------------------- /tests/integration/tweets_count_test.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import stweet as st 4 | 5 | 6 | def _scrap_tweets_with_count_assert(count: int): 7 | phrase = '#covid19' 8 | search_tweets_task = st.SearchTweetsTask( 9 | all_words=phrase, 10 | tweets_limit=count 11 | ) 12 | tweets_collector = st.CollectorTweetOutput() 13 | st.TweetSearchRunner( 14 | search_tweets_task=search_tweets_task, 15 | tweet_outputs=[tweets_collector] 16 | ).run() 17 | assert len(tweets_collector.get_raw_list()) == count 18 | 19 | 20 | def test_scrap_small_count_of_tweets(): 21 | _scrap_tweets_with_count_assert(10) 22 | 23 | 24 | def test_scrap_medium_count_of_tweets(): 25 | _scrap_tweets_with_count_assert(100) 26 | 27 | 28 | def test_scrap_big_count_of_tweets(): 29 | _scrap_tweets_with_count_assert(299) 30 | -------------------------------------------------------------------------------- /tests/integration/username_search_test.py: -------------------------------------------------------------------------------- 1 | import stweet as st 2 | from tests.test_util import to_base_text, tweet_list_assert_condition 3 | 4 | 5 | def test_search_to_username(): 6 | username = 'realDonaldTrump' 7 | search_tweets_task = st.SearchTweetsTask( 8 | to_username=username, 9 | tweets_limit=100 10 | ) 11 | tweets_collector = st.CollectorTweetOutput() 12 | st.TweetSearchRunner( 13 | search_tweets_task=search_tweets_task, 14 | tweet_outputs=[tweets_collector] 15 | ).run() 16 | tweet_list_assert_condition( 17 | tweets_collector.get_raw_list(), 18 | lambda tweet: to_base_text(username) in to_base_text(tweet.full_text) 19 | ) 20 | 21 | 22 | def test_return_tweets_from_user(): 23 | username = 'realDonaldTrump' 24 | search_tweets_task = st.SearchTweetsTask( 25 | from_username=username, 26 | tweets_limit=100 27 | ) 28 | tweets_collector = st.CollectorTweetOutput() 29 | st.TweetSearchRunner( 30 | search_tweets_task=search_tweets_task, 31 | tweet_outputs=[tweets_collector] 32 | ).run() 33 | tweet_list_assert_condition( 34 | tweets_collector.get_raw_list(), 35 | lambda tweet: tweet.user_name == username 36 | ) 37 | -------------------------------------------------------------------------------- /tests/integration/word_search_test.py: -------------------------------------------------------------------------------- 1 | import stweet as st 2 | from tests.test_util import to_base_text, tweet_list_assert_condition 3 | 4 | 5 | def search_by_hashtag(): 6 | phrase = '#koronawirus' 7 | search_tweets_task = st.SearchTweetsTask( 8 | all_words=phrase, 9 | tweets_limit=200 10 | ) 11 | tweets_collector = st.CollectorTweetOutput() 12 | st.TweetSearchRunner( 13 | search_tweets_task=search_tweets_task, 14 | tweet_outputs=[tweets_collector] 15 | ).run() 16 | scrapped_tweets = tweets_collector.get_raw_list() 17 | assert all([phrase in it.full_text for it in scrapped_tweets if phrase in it.full_text]) is True 18 | 19 | 20 | def test_exact_words(): 21 | exact_phrase = 'duda kaczyński kempa' 22 | search_tweets_task = st.SearchTweetsTask( 23 | exact_words=exact_phrase 24 | ) 25 | tweets_collector = st.CollectorTweetOutput() 26 | st.TweetSearchRunner( 27 | search_tweets_task=search_tweets_task, 28 | tweet_outputs=[tweets_collector] 29 | ).run() 30 | tweet_list_assert_condition( 31 | tweets_collector.get_raw_list(), 32 | lambda tweet: to_base_text(exact_phrase) in to_base_text(tweet.full_text) 33 | ) 34 | 35 | 36 | def contains_any_word(words: str, value: str) -> bool: 37 | return any([to_base_text(word) in to_base_text(value) for word in words.split()]) is True 38 | 39 | 40 | def test_any_word(): 41 | any_phrase = 'kaczynski tusk' 42 | search_tweets_task = st.SearchTweetsTask( 43 | any_word=any_phrase, 44 | tweets_limit=100 45 | ) 46 | tweets_collector = st.CollectorTweetOutput() 47 | st.TweetSearchRunner( 48 | search_tweets_task=search_tweets_task, 49 | tweet_outputs=[tweets_collector] 50 | ).run() 51 | 52 | tweet_list_assert_condition( 53 | tweets_collector.get_raw_list(), 54 | lambda tweet: contains_any_word(any_phrase, tweet.full_text) or contains_any_word( 55 | any_phrase, tweet.user_full_name) or contains_any_word(any_phrase, tweet.user_name) 56 | ) 57 | -------------------------------------------------------------------------------- /tests/mock_web_client.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List, Dict 2 | 3 | import stweet as st 4 | from stweet import WebClient 5 | from stweet.http_request import RequestDetails, RequestResponse 6 | 7 | 8 | class MockWebClient(st.WebClient): 9 | responses: Optional[Dict[str, RequestResponse]] 10 | default_response: Optional[RequestResponse] 11 | 12 | def __init__( 13 | self, 14 | interceptors: Optional[List[WebClient.WebClientInterceptor]] = None, 15 | default_response: Optional[RequestResponse] = None, 16 | responses: Optional[Dict[str, RequestResponse]] = None 17 | ): 18 | super().__init__(interceptors) 19 | self.responses = responses 20 | self.default_response = default_response 21 | 22 | def run_clear_request(self, params: RequestDetails) -> RequestResponse: 23 | if self.responses is not None and params.url in self.responses.keys(): 24 | return self.responses[params.url] 25 | elif self.default_response is not None: 26 | return self.default_response 27 | else: 28 | raise Exception('no value to return') 29 | -------------------------------------------------------------------------------- /tests/resources/tweets_v1.1.2.csv: -------------------------------------------------------------------------------- 1 | created_at,id_str,conversation_id_str,full_text,lang,favorited,retweeted,retweet_count,favorite_count,reply_count,quote_count,quoted_status_id_str,quoted_status_short_url,quoted_status_expand_url,user_id_str,user_name,user_full_name,user_verified,in_reply_to_status_id_str,in_reply_to_user_id_str,hashtags,mentions,urls 2 | 2021-02-05T00:54:28+00:00,1357492753215729666,1357492753215729666,"It no longer matters. The decision to accept casualties rather than accede to simple acts of social generosity has been made. Hundreds of thousands dead due to bumbling, both willful and unconscious, has been shrugged off by everyone other than, one presumes, the dead. #COVID19",en,False,False,0,0,0,0,1357490037773004802,,,3231804466,robert__gibbons,Robert Gibbons,False,,,#COVID19,, 3 | 2021-02-05T00:54:27+00:00,1357492749516292098,1357492749516292098,"Here in our hearts 4 | Oh the answer is there 5 | If we only would look there inside them 6 | We can make it better, we can make it better #WhatIf #WorldCancerDay #COVID19",en,False,False,0,0,0,0,1357489296844992512,,,1007294353818226694,bigdan071288,Daniel,False,,,"#WhatIf , #WorldCancerDay , #COVID19",, 7 | 2021-02-05T00:54:26+00:00,1357492745514942466,1357492745514942466,"Hoy más que nunca, ciudadanos, ciudadanas y autoridades de la @AlcaldiaMHmx nos unimos con un solo propósito; reducir el numero de contagios por #COVID19. No es tiempo de fiestas ni reuniones, continuamos en #SemáforoRojo 🚦 por contingencia sanitaria. 8 | 9 | #MHelCorazónDeLaCapital https://t.co/VRRcVdXjsx",es,False,False,0,0,0,0,,,,1065021849430618112,MHSUrbanos,Servicios Urbanos,False,,,"#COVID19 , #SemáforoRojo , #MHelCorazónDeLaCapital",AlcaldiaMHmx, 10 | 2021-02-05T00:54:25+00:00,1357492742943764480,1357492742943764480,発見遅れたCOCOA不具合、厚生労働省「実機テストせず」:#朝日新聞デジタル https://t.co/ng0nDNqw2M #新型コロナウイルス #COVID19,ja,False,False,0,0,0,0,,,,113366981,otaka_thursday,おたか 🍥,False,,,"#朝日新聞デジタル , #新型コロナウイルス , #COVID19",,https://t.co/ng0nDNqw2M 11 | 2021-02-05T00:54:14+00:00,1357492697901264897,1357492697901264897,"that moment when your patient decides to (stupidly) go to Turks and Caicos and returns with COVID (omg) and uses up precious resources, time, PPE, & ED personnel to take care of her while exposing a slew of other patients and staff in the ER. #COVID19 #canyounot #COVIDIOT https://t.co/Icy9EydgLg",en,False,False,0,0,0,0,,,,2462950457,paper_canyon,paper canyon,False,,,"#COVID19 , #canyounot , #COVIDIOT",, 12 | 2021-02-05T00:54:09+00:00,1357492674731913216,1357492674731913216,“hasta un 40% de las personas que fueron internadas por complicaciones asociadas al Covid-19 tuvieron secuelas en la función de sus pulmones”. Hay que mantener los cuidados y vacunarse cuando nos toque #COVID19,es,False,False,0,0,0,0,1357358481939050496,,,139287395,jota_leonr,José Julio León,False,,,#COVID19,, 13 | 2021-02-05T00:54:08+00:00,1357492671217086464,1357492671217086464,"Feeling sorry for corporations during the #covid19 #pandemic? 14 | 15 | YOU MIGHT WANT TO EDUCATE YOURSELF on how American companies are treating front line employees during a pandemic? #MustRead",en,False,False,0,0,0,0,1357483359677550592,,,2316413918,GregCurtin,-v|v- 🍁 🇺🇸,False,,,"#covid19 , #pandemic , #MustRead",, 16 | 2021-02-05T00:54:07+00:00,1357492667064623104,1357492667064623104,"Yesterday hot topics: 17 | #lka (16.88%) 18 | #Srilanka (13.71%) 19 | #IndependenceDaySL (5.68%) 20 | #IndependenceDay (3.84%) 21 | #adaderanasinhala (2.75%) 22 | #slnews (2.75%) 23 | #Covid19 (2.09%) 24 | #IndependenceSL (1.58%) 25 | #73rdIndependenceDay (1.17%) 26 | #COVID19SL (1.08%)",en,False,False,0,0,0,0,,,,1343032398238208002,yuganOffcial,Yugan Narmathan 🇱🇰,False,,,"#lka , #Srilanka , #IndependenceDaySL , #IndependenceDay , #adaderanasinhala , #slnews , #Covid19 , #IndependenceSL , #73rdIndependenceDay , #COVID19SL",, 27 | 2021-02-05T00:54:06+00:00,1357492664363532288,1357492664363532288,#COVID19 #Impfung #Impfpflicht https://t.co/h2175ku7Zp https://t.co/UCJMOTj2Rj,und,False,False,0,0,0,0,,,,394643993,Caputschi,Pit Caputschi,False,,,"#COVID19 , #Impfung , #Impfpflicht",,https://t.co/h2175ku7Zp 28 | -------------------------------------------------------------------------------- /tests/resources/tweets_v1.1.2.jl: -------------------------------------------------------------------------------- 1 | {"created_at": "2021-02-05T00:54:28+00:00", "id_str": "1357492753215729666", "conversation_id_str": "1357492753215729666", "full_text": "It no longer matters. The decision to accept casualties rather than accede to simple acts of social generosity has been made. Hundreds of thousands dead due to bumbling, both willful and unconscious, has been shrugged off by everyone other than, one presumes, the dead. #COVID19", "lang": "en", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "1357490037773004802", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "3231804466", "user_name": "robert__gibbons", "user_full_name": "Robert Gibbons", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#COVID19"], "mentions": [], "urls": []} 2 | {"created_at": "2021-02-05T00:54:27+00:00", "id_str": "1357492749516292098", "conversation_id_str": "1357492749516292098", "full_text": "Here in our hearts\nOh the answer is there\nIf we only would look there inside them\nWe can make it better, we can make it better #WhatIf #WorldCancerDay #COVID19", "lang": "en", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "1357489296844992512", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "1007294353818226694", "user_name": "bigdan071288", "user_full_name": "Daniel", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#WhatIf", "#WorldCancerDay", "#COVID19"], "mentions": [], "urls": []} 3 | {"created_at": "2021-02-05T00:54:26+00:00", "id_str": "1357492745514942466", "conversation_id_str": "1357492745514942466", "full_text": "Hoy m\u00e1s que nunca, ciudadanos, ciudadanas y autoridades de la @AlcaldiaMHmx nos unimos con un solo prop\u00f3sito; reducir el numero de contagios por #COVID19. No es tiempo de fiestas ni reuniones, continuamos en #Sem\u00e1foroRojo \ud83d\udea6 por contingencia sanitaria.\n\n#MHelCoraz\u00f3nDeLaCapital https://t.co/VRRcVdXjsx", "lang": "es", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "1065021849430618112", "user_name": "MHSUrbanos", "user_full_name": "Servicios Urbanos", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#COVID19", "#Sem\u00e1foroRojo", "#MHelCoraz\u00f3nDeLaCapital"], "mentions": ["AlcaldiaMHmx"], "urls": []} 4 | {"created_at": "2021-02-05T00:54:25+00:00", "id_str": "1357492742943764480", "conversation_id_str": "1357492742943764480", "full_text": "\u767a\u898b\u9045\u308c\u305fCOCOA\u4e0d\u5177\u5408\u3001\u539a\u751f\u52b4\u50cd\u7701\u300c\u5b9f\u6a5f\u30c6\u30b9\u30c8\u305b\u305a\u300d\uff1a#\u671d\u65e5\u65b0\u805e\u30c7\u30b8\u30bf\u30eb https://t.co/ng0nDNqw2M #\u65b0\u578b\u30b3\u30ed\u30ca\u30a6\u30a4\u30eb\u30b9 #COVID19", "lang": "ja", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "113366981", "user_name": "otaka_thursday", "user_full_name": "\u304a\u305f\u304b \ud83c\udf65", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#\u671d\u65e5\u65b0\u805e\u30c7\u30b8\u30bf\u30eb", "#\u65b0\u578b\u30b3\u30ed\u30ca\u30a6\u30a4\u30eb\u30b9", "#COVID19"], "mentions": [], "urls": ["https://t.co/ng0nDNqw2M"]} 5 | {"created_at": "2021-02-05T00:54:14+00:00", "id_str": "1357492697901264897", "conversation_id_str": "1357492697901264897", "full_text": "that moment when your patient decides to (stupidly) go to Turks and Caicos and returns with COVID (omg) and uses up precious resources, time, PPE, & ED personnel to take care of her while exposing a slew of other patients and staff in the ER. #COVID19 #canyounot #COVIDIOT https://t.co/Icy9EydgLg", "lang": "en", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "2462950457", "user_name": "paper_canyon", "user_full_name": "paper canyon", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#COVID19", "#canyounot", "#COVIDIOT"], "mentions": [], "urls": []} 6 | {"created_at": "2021-02-05T00:54:09+00:00", "id_str": "1357492674731913216", "conversation_id_str": "1357492674731913216", "full_text": "\u201chasta un 40% de las personas que fueron internadas\u00a0por complicaciones asociadas al Covid-19 tuvieron secuelas en la funci\u00f3n de sus pulmones\u201d. Hay que mantener los cuidados y vacunarse cuando nos toque #COVID19", "lang": "es", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "1357358481939050496", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "139287395", "user_name": "jota_leonr", "user_full_name": "Jos\u00e9 Julio Le\u00f3n", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#COVID19"], "mentions": [], "urls": []} 7 | {"created_at": "2021-02-05T00:54:08+00:00", "id_str": "1357492671217086464", "conversation_id_str": "1357492671217086464", "full_text": "Feeling sorry for corporations during the #covid19 #pandemic?\n \nYOU MIGHT WANT TO EDUCATE YOURSELF on how American companies are treating front line employees during a pandemic? #MustRead", "lang": "en", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "1357483359677550592", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "2316413918", "user_name": "GregCurtin", "user_full_name": "-v|v- \ud83c\udf41 \ud83c\uddfa\ud83c\uddf8", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#covid19", "#pandemic", "#MustRead"], "mentions": [], "urls": []} 8 | {"created_at": "2021-02-05T00:54:07+00:00", "id_str": "1357492667064623104", "conversation_id_str": "1357492667064623104", "full_text": "Yesterday hot topics:\n#lka (16.88%)\n#Srilanka (13.71%)\n#IndependenceDaySL (5.68%)\n#IndependenceDay (3.84%)\n#adaderanasinhala (2.75%)\n#slnews (2.75%)\n#Covid19 (2.09%)\n#IndependenceSL (1.58%)\n#73rdIndependenceDay (1.17%)\n#COVID19SL (1.08%)", "lang": "en", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "1343032398238208002", "user_name": "yuganOffcial", "user_full_name": "Yugan Narmathan \ud83c\uddf1\ud83c\uddf0", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#lka", "#Srilanka", "#IndependenceDaySL", "#IndependenceDay", "#adaderanasinhala", "#slnews", "#Covid19", "#IndependenceSL", "#73rdIndependenceDay", "#COVID19SL"], "mentions": [], "urls": []} 9 | {"created_at": "2021-02-05T00:54:06+00:00", "id_str": "1357492664363532288", "conversation_id_str": "1357492664363532288", "full_text": "#COVID19 #Impfung #Impfpflicht https://t.co/h2175ku7Zp https://t.co/UCJMOTj2Rj", "lang": "und", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "394643993", "user_name": "Caputschi", "user_full_name": "Pit Caputschi", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#COVID19", "#Impfung", "#Impfpflicht"], "mentions": [], "urls": ["https://t.co/h2175ku7Zp"]} 10 | -------------------------------------------------------------------------------- /tests/resources/users_v1.3.0.csv: -------------------------------------------------------------------------------- 1 | created_at,id_str,rest_id_str,default_profile,default_profile_image,description,favourites_count,followers_count,friends_count,has_custom_timelines,listed_count,location,media_count,name,pinned_tweet_ids_str,profile_banner_url,profile_banner_url,profile_image_url_https,protected,screen_name,statuses_count,verified 2 | 2012-06-27T15:42:43+00:00,VXNlcjo2MjAxODI4NzU=,620182875,True,False,"Posel z Wrocław; PSL-UED;b.Wiceprzewodniczący PE https://t.co/ndbunIxXbL Partn.Wschodnie/MP for Wroclaw,PL;fmr EuroParl Vice-President dealing w EastPartnership",1066,1066,653,False,200,,910,Jacek Protasiewicz,789746720410308608,https://pbs.twimg.com/profile_banners/620182875/1562084177,https://pbs.twimg.com/profile_banners/620182875/1562084177,https://pbs.twimg.com/profile_images/1173463288723759104/zmngswpE_normal.jpg,False,ProtasiewiczJ,11851,True 3 | 2018-01-28T13:50:09+00:00,VXNlcjo5NTc2MTE3NTg0OTUxNjY0NjQ=,957611758495166464,True,False,Młoda Lewica | Aktywistka społeczna | Feministka,619,619,86,False,0,"Tarnów, Polska",74,Klaudia🏳️‍🌈,1186730782225829888,https://pbs.twimg.com/profile_banners/957611758495166464/1586938851,https://pbs.twimg.com/profile_banners/957611758495166464/1586938851,https://pbs.twimg.com/profile_images/1250338276046557187/wAil8yYf_normal.jpg,False,clavdiie,195,False 4 | -------------------------------------------------------------------------------- /tests/resources/users_v1.3.0.jl: -------------------------------------------------------------------------------- 1 | {"created_at": "2012-06-27T15:42:43+00:00", "id_str": "VXNlcjo2MjAxODI4NzU=", "rest_id_str": "620182875", "default_profile": true, "default_profile_image": false, "description": "Posel z Wroc\u0142aw; PSL-UED;b.Wiceprzewodnicz\u0105cy PE https://t.co/ndbunIxXbL Partn.Wschodnie/MP for Wroclaw,PL;fmr EuroParl Vice-President dealing w EastPartnership", "favourites_count": 1066, "followers_count": 1066, "friends_count": 653, "has_custom_timelines": false, "listed_count": 200, "location": "", "media_count": 910, "name": "Jacek Protasiewicz", "pinned_tweet_ids_str": ["789746720410308608"], "profile_banner_url": "https://pbs.twimg.com/profile_banners/620182875/1562084177", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1173463288723759104/zmngswpE_normal.jpg", "protected": false, "screen_name": "ProtasiewiczJ", "statuses_count": 11851, "verified": true} 2 | {"created_at": "2018-01-28T13:50:09+00:00", "id_str": "VXNlcjo5NTc2MTE3NTg0OTUxNjY0NjQ=", "rest_id_str": "957611758495166464", "default_profile": true, "default_profile_image": false, "description": "M\u0142oda Lewica | Aktywistka spo\u0142eczna | Feministka", "favourites_count": 619, "followers_count": 619, "friends_count": 86, "has_custom_timelines": false, "listed_count": 0, "location": "Tarn\u00f3w, Polska", "media_count": 74, "name": "Klaudia\ud83c\udff3\ufe0f\u200d\ud83c\udf08", "pinned_tweet_ids_str": ["1186730782225829888"], "profile_banner_url": "https://pbs.twimg.com/profile_banners/957611758495166464/1586938851", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1250338276046557187/wAil8yYf_normal.jpg", "protected": false, "screen_name": "clavdiie", "statuses_count": 195, "verified": false} 3 | -------------------------------------------------------------------------------- /tests/test_file_manager.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from tests.test_util import remove_all_temp_files 4 | 5 | 6 | @pytest.fixture(autouse=True, scope="session") 7 | def run_around_tests(): 8 | yield 9 | remove_all_temp_files() 10 | -------------------------------------------------------------------------------- /tests/test_util.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import string 4 | import unicodedata 5 | import uuid 6 | from typing import List, Callable 7 | 8 | import stweet as st 9 | 10 | _temp_file_prefix = 'test_temp_file_' 11 | 12 | 13 | def get_temp_test_file_name(file_extension_without_dot: str) -> str: 14 | return '{}{}.{}'.format(_temp_file_prefix, _get_uuid_str(), file_extension_without_dot) 15 | 16 | 17 | def _get_uuid_str() -> str: 18 | return str(uuid.uuid4()).replace('-', '') 19 | 20 | 21 | def remove_all_temp_files(): 22 | files_to_remove = glob.glob("{}*".format(_temp_file_prefix)) 23 | for filePath in files_to_remove: 24 | os.remove(filePath) 25 | return 26 | 27 | 28 | def _remove_accented_chars(text) -> str: 29 | new_text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore') 30 | return new_text 31 | 32 | 33 | def to_base_text(value: str) -> str: 34 | table = str.maketrans(dict.fromkeys(string.punctuation)) 35 | to_return = _remove_accented_chars(value.translate(table).lower()) 36 | return to_return 37 | 38 | 39 | def get_tweets_to_tweet_output_test(tweet_output: List[st.TweetOutput]): 40 | phrase = '#koronawirus' 41 | search_tweets_task = st.SearchTweetsTask( 42 | all_words=phrase, 43 | tweets_limit=200 44 | ) 45 | st.TweetSearchRunner( 46 | search_tweets_task=search_tweets_task, 47 | tweet_outputs=tweet_output 48 | ).run() 49 | 50 | 51 | def get_users_to_tweet_output_test(user_outputs: List[st.UserOutput]) -> st.GetUsersResult: 52 | users = ['RealDonaldTrump', 'ProtasiewiczJ', 'donaldtuskEPP', 'RealDonaldTrump', 'ProtasiewiczJ', 'donaldtuskEPP'] 53 | get_users_task = st.GetUsersTask(users) 54 | return st.GetUsersRunner( 55 | get_user_task=get_users_task, 56 | user_outputs=user_outputs 57 | ).run() 58 | 59 | 60 | def tweet_list_assert_condition(tweets: List[st.UserTweetRaw], condition: Callable[[st.UserTweetRaw], bool]): 61 | for tweet in tweets: 62 | if not condition(tweet): 63 | print(f'--- {tweet}') 64 | assert all([ 65 | condition(tweet) 66 | for tweet in tweets 67 | ]) is True 68 | 69 | 70 | def two_lists_assert_equal(tweets_1: List[any], tweets_2: List[any]): 71 | assert len(tweets_1) == len(tweets_2) 72 | for tweet_id in range(len(tweets_1)): 73 | if tweets_1[tweet_id] != tweets_2[tweet_id]: 74 | print('-----') 75 | print(tweets_1[tweet_id]) 76 | print(tweets_2[tweet_id]) 77 | print('--') 78 | assert tweets_1 == tweets_2 79 | -------------------------------------------------------------------------------- /tests/tweet_output_export_call_counter.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import stweet as st 4 | 5 | 6 | class TweetOutputExportCallCounter(st.TweetOutput): 7 | counter: int 8 | 9 | def __init__(self): 10 | self.counter = 0 11 | 12 | def export_tweets(self, tweets: List[st.UserTweetRaw]): 13 | self.counter += 1 14 | return 15 | 16 | def get_output_call_count(self) -> int: 17 | return self.counter 18 | -------------------------------------------------------------------------------- /tests/tweet_output_tweets_counter.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import stweet as st 4 | 5 | 6 | class TweetOutputTweetsCounter(st.TweetOutput): 7 | counter: int 8 | 9 | def __init__(self): 10 | self.counter = 0 11 | 12 | def export_tweets(self, tweets: List[st.UserTweetRaw]): 13 | self.counter += len(tweets) 14 | return 15 | 16 | def get_output_call_count(self) -> int: 17 | return self.counter 18 | -------------------------------------------------------------------------------- /tests/unit/language_test.py: -------------------------------------------------------------------------------- 1 | import stweet as st 2 | 3 | 4 | def test_unique_language_shortcut(): 5 | assert len(st.Language) == len(set([it.short_value for it in st.Language])) 6 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = pep8,docstyle,tests&cov 3 | skipsdist = True 4 | 5 | [testenv:pep8] 6 | deps = 7 | flake8 8 | basepython = python3 9 | commands = 10 | flake8 {posargs} 11 | 12 | [testenv:docstyle] 13 | deps = 14 | pydocstyle 15 | basepython = python3 16 | commands = 17 | pydocstyle --verbose {posargs} 18 | 19 | [testenv:tests&cov] 20 | deps = 21 | pytest 22 | pytest-cov 23 | -rrequirements.txt 24 | basepython = python3 25 | setenv = 26 | PYTHONPATH={toxinidir}/ 27 | COV_CORE_CONFIG={toxinidir}/.coveragerc 28 | commands = 29 | docker-compose -f {toxinidir}/test-services-docker-compose.yml up -d 30 | pytest -s tests --cov-fail-under=100 --cov-report=xml --cov-report=term --cov=stweet 31 | docker-compose -f {toxinidir}/test-services-docker-compose.yml stop 32 | docker-compose -f {toxinidir}/test-services-docker-compose.yml rm -f 33 | 34 | [flake8] 35 | # W503 - is said to be incompatible with current PEP8, however flake8 is 36 | # not updated to handle it 37 | # W504 skipped because it is overeager and unnecessary 38 | ignore = W503,W504 39 | per-file-ignores = __init__.py:F401 40 | show-source = True 41 | exclude = .git,.venv,.tox,dist,doc,*egg,build,venv,tests,tmp 42 | import-order-style = pep8 43 | max-line-length = 120 44 | 45 | 46 | [pydocstyle] 47 | # D104 Missing docstring in public package 48 | # D203 1 blank line required before class docstring 49 | # D213 Multi-line docstring summary should start at the second line 50 | # D214 Section is over-indented 51 | # D215 Section underline is over-indented 52 | # D401 First line should be in imperative mood; try rephrasing 53 | # D405 Section name should be properly capitalized 54 | # D406 Section name should end with a newline 55 | # D407 Missing dashed underline after section 56 | # D408 Section underline should be in the line following the section’s name 57 | # D409 Section underline should match the length of its name 58 | # D410 Missing blank line after section 59 | # D411 Missing blank line before section 60 | ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411 61 | match-dir = ^(?!\.tox|venv|tests|tmp).* 62 | match = ^(?!setup).*\.py 63 | --------------------------------------------------------------------------------