├── .coveragerc
├── .github
    └── workflows
    │   ├── python-package.yml
    │   └── python-publish.yml
├── .gitignore
├── LICENSE
├── README.md
├── poetry.lock
├── pyproject.toml
├── stweet
    ├── __init__.py
    ├── auth
    │   ├── __init__.py
    │   ├── auth_token_provider.py
    │   ├── fail_strategy
    │   │   ├── __init__.py
    │   │   ├── auth_fail_strategy.py
    │   │   ├── tor_ip_change_auth_fail_strategy.py
    │   │   └── wait_auth_fail_strategy.py
    │   └── simple_auth_token_provider.py
    ├── exceptions
    │   ├── __init__.py
    │   ├── refresh_token_exception.py
    │   ├── scrap_batch_bad_response.py
    │   ├── too_many_requests_exception.py
    │   └── user_suspended_exception.py
    ├── get_user_runner
    │   ├── __init__.py
    │   ├── get_users_context.py
    │   ├── get_users_result.py
    │   ├── get_users_runner.py
    │   ├── get_users_task.py
    │   └── user_parser.py
    ├── http_request
    │   ├── __init__.py
    │   ├── http_method.py
    │   ├── interceptor
    │   │   ├── __init__.py
    │   │   ├── logging_requests_web_client_interceptor.py
    │   │   └── params_response_log_web_client_interceptor.py
    │   ├── request_details.py
    │   ├── request_response.py
    │   ├── requests
    │   │   ├── __init__.py
    │   │   ├── requests_web_client.py
    │   │   └── requests_web_client_proxy_config.py
    │   └── web_client.py
    ├── model
    │   ├── __init__.py
    │   ├── cursor.py
    │   ├── language.py
    │   ├── raw_data.py
    │   ├── tweet_raw.py
    │   ├── user_raw.py
    │   └── user_tweet_raw.py
    ├── raw_output
    │   ├── __init__.py
    │   ├── collector_raw_output.py
    │   ├── json_line_file_raw_output.py
    │   ├── print_every_n_raw_output.py
    │   ├── print_first_in_batch_raw_output.py
    │   ├── print_raw_output.py
    │   └── raw_data_output.py
    ├── search_runner
    │   ├── __init__.py
    │   ├── replies_filter.py
    │   ├── search_run_context.py
    │   ├── search_runner.py
    │   ├── search_tweets_result.py
    │   ├── search_tweets_task.py
    │   └── tweet_raw_parser.py
    ├── tweets_by_ids_runner
    │   ├── __init__.py
    │   ├── tweet_raw_parser.py
    │   ├── tweets_by_id_context.py
    │   ├── tweets_by_id_result.py
    │   ├── tweets_by_id_runner.py
    │   └── tweets_by_id_task.py
    └── twitter_api
    │   ├── __init__.py
    │   ├── default_twitter_web_client_provider.py
    │   ├── twitter_api_requests.py
    │   └── twitter_auth_web_client_interceptor.py
├── test-services-docker-compose.yml
├── tests
    ├── __init__.py
    ├── integration
    │   ├── all_languages_test.py
    │   ├── exception_test.py
    │   ├── export_import_test.py
    │   ├── get_tweet_by_id_test.py
    │   ├── get_user_test.py
    │   ├── import_older_version_test.py
    │   ├── interceptor_test.py
    │   ├── large_iterator_test.py
    │   ├── parse_media_test.py
    │   ├── print_test.py
    │   ├── proxy_client_requests_test.py
    │   ├── reply_filter_test.py
    │   ├── search_in_language_test.py
    │   ├── search_return_objest_test.py
    │   ├── serialization_test.py
    │   ├── time_period_test.py
    │   ├── tweets_count_test.py
    │   ├── username_search_test.py
    │   └── word_search_test.py
    ├── mock_web_client.py
    ├── resources
    │   ├── tweets_v1.1.2.csv
    │   ├── tweets_v1.1.2.jl
    │   ├── users_v1.3.0.csv
    │   └── users_v1.3.0.jl
    ├── test_file_manager.py
    ├── test_util.py
    ├── tweet_output_export_call_counter.py
    ├── tweet_output_tweets_counter.py
    └── unit
    │   └── language_test.py
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = stweet/search_runner/parse/base_tweet_parser.py


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   pull_request:
 8 |     branches: [ master, develop ]
 9 |     paths-ignore:
10 |       - 'README.md'
11 |       - 'docs/**'
12 |   schedule:
13 |     - cron: '0 0 * * *'
14 | 
15 | jobs:
16 |   build:
17 | 
18 |     runs-on: ubuntu-22.04
19 |     strategy:
20 |       matrix:
21 |         python-version: [ 3.7, 3.8, 3.9 ]
22 | 
23 |     steps:
24 |       - uses: actions/checkout@v2
25 | 
26 |       - name: Set up Python ${{ matrix.python-version }}
27 |         uses: actions/setup-python@v2
28 |         with:
29 |           python-version: ${{ matrix.python-version }}
30 | 
31 |       - name: Install poetry
32 |         uses: abatilo/actions-poetry@v2.0.0
33 |         with:
34 |           poetry-version: 1.3.1
35 | 
36 |       - name: Install deps
37 |         run: poetry install -vv
38 | 
39 | #      - name: Run tox
40 | #        run: tox -v
41 | #
42 | #      - name: Upload coverage to Codecov
43 | #        uses: codecov/codecov-action@v1
44 | #        with:
45 | #          token: ${{ secrets.CODECOV_TOKEN }}
46 | #          file: ./coverage.xml
47 | #          flags: unittests
48 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python package & deploy
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 | 
10 | jobs:
11 |   deploy:
12 | 
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       matrix:
16 |         python-version: [ 3.8 ]
17 |         poetry-version: [ 1.3.1 ]
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v2
21 | 
22 |       - name: Set up Python ${{ matrix.python-version }}
23 |         uses: actions/setup-python@v2
24 |         with:
25 |           python-version: ${{ matrix.python-version }}
26 | 
27 |       - name: Install poetry
28 |         uses: abatilo/actions-poetry@v2.0.0
29 |         with:
30 |           poetry-version: ${{ matrix.poetry-version }}
31 | 
32 |       - name: Install deps
33 |         run: poetry install -vv
34 | 
35 |       #      - name: Run tox
36 |       #        run: tox -v
37 |       #
38 |       #      - name: Upload coverage to Codecov
39 |       #        uses: codecov/codecov-action@v1
40 |       #        with:
41 |       #          token: ${{ secrets.CODECOV_TOKEN }}
42 |       #          file: ./coverage.xml
43 |       #          flags: unittests
44 | 
45 |       - name: Build and publish
46 |         run: |
47 |           poetry build
48 |           poetry publish --username ${{ secrets.PYPI_USERNAME }} --password ${{ secrets.PYPI_PASSWORD }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .idea
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | cover/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | .pybuilder/
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | #   For a library or package, you might want to ignore these files since the code is
 89 | #   intended to run in multiple environments; otherwise, check them in:
 90 | # .python-version
 91 | 
 92 | # pipenv
 93 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 94 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 95 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 96 | #   install all needed dependencies.
 97 | #Pipfile.lock
 98 | 
 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100 | __pypackages__/
101 | 
102 | # Celery stuff
103 | celerybeat-schedule
104 | celerybeat.pid
105 | 
106 | # SageMath parsed files
107 | *.sage.py
108 | 
109 | # Environments
110 | .env
111 | .venv
112 | env/
113 | venv/
114 | ENV/
115 | env.bak/
116 | venv.bak/
117 | 
118 | # Spyder project settings
119 | .spyderproject
120 | .spyproject
121 | 
122 | # Rope project settings
123 | .ropeproject
124 | 
125 | # mkdocs documentation
126 | /site
127 | 
128 | # mypy
129 | .mypy_cache/
130 | .dmypy.json
131 | dmypy.json
132 | 
133 | # Pyre type checker
134 | .pyre/
135 | 
136 | # pytype static type analyzer
137 | .pytype/
138 | 
139 | # Cython debug symbols
140 | cython_debug/
141 | 
142 | tmp/test_run.py
143 | clean.sh
144 | tmp
145 | sample_tweets.json
146 | .pypirc
147 | 
148 | 
149 | .DS_Store
150 | stweet_run.py


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Marcin Wątroba
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # stweet
  2 | 
  3 | [![Open Source Love](https://badges.frapsoft.com/os/v2/open-source.svg?v=103)](https://github.com/ellerbrock/open-source-badges/)
  4 | ![Python package](https://github.com/markowanga/stweet/workflows/Python%20package/badge.svg?branch=master)
  5 | [![PyPI version](https://badge.fury.io/py/stweet.svg)](https://badge.fury.io/py/stweet)
  6 | [![MIT Licence](https://badges.frapsoft.com/os/mit/mit.svg?v=103)](https://opensource.org/licenses/mit-license.php)
  7 | 
  8 | A modern fast python library to scrap tweets and users quickly from Twitter unofficial API.
  9 | 
 10 | This tool helps you to scrap tweets by a search phrase, tweets by ids and user by usernames. It uses
 11 | the Twitter API, the same API is used on a website.
 12 | 
 13 | ## Inspiration for the creation of the library
 14 | 
 15 | I have used twint to scrap tweets, but it has many errors, and it doesn't work properly. The code
 16 | was not simple to understand. All tasks have one config, and the user has to know the exact
 17 | parameter. The last important thing is the fact that Api can change — Twitter is the API owner and
 18 | changes depend on it. It is annoying when something does not work and users must report bugs as
 19 | issues.
 20 | 
 21 | ## Main advantages of the library
 22 | 
 23 | - **Simple code** — the code is not only mine, every user can contribute to the library
 24 | - **Domain objects and interfaces** — the main part of functionalities can be replaced (eg. calling
 25 |   web requests), the library has basic simple solution — if you want to expand it, you can do it
 26 |   without any problems and forks
 27 | - ~~**100% coverage with integration tests** — this advantage can find the API changes, tests are
 28 |   carried out every week and when the task fails, we can find the source of change easily~~ – not in
 29 |   version 2.0
 30 | - **Custom tweets and users output** — it is a part of the interface, if you want to save tweets and
 31 |   users custom format, it takes you a brief moment
 32 | 
 33 | ## Installation
 34 | 
 35 | ```shell script
 36 | pip install -U stweet
 37 | ```
 38 | 
 39 | ## Donate
 40 | 
 41 | If you want to sponsor me, in thanks for the project, please send me some crypto 😁:
 42 | 
 43 | | Coin    | Wallet address                             |
 44 | |---------|--------------------------------------------|
 45 | | Bitcoin | 3EajE9DbLvEmBHLRzjDfG86LyZB4jzsZyg         |
 46 | | Etherum | 0xE43d8C2c7a9af286bc2fc0568e2812151AF9b1FD |
 47 | 
 48 | ## Basic usage
 49 | 
 50 | To make a simple request the scrap **task** must be prepared. The task should be processed by **
 51 | runner**.
 52 | 
 53 | ```python
 54 | import stweet as st
 55 | 
 56 | 
 57 | def try_search():
 58 |     search_tweets_task = st.SearchTweetsTask(all_words='#covid19')
 59 |     output_jl_tweets = st.JsonLineFileRawOutput('output_raw_search_tweets.jl')
 60 |     output_jl_users = st.JsonLineFileRawOutput('output_raw_search_users.jl')
 61 |     output_print = st.PrintRawOutput()
 62 |     st.TweetSearchRunner(search_tweets_task=search_tweets_task,
 63 |                          tweet_raw_data_outputs=[output_print, output_jl_tweets],
 64 |                          user_raw_data_outputs=[output_print, output_jl_users]).run()
 65 | 
 66 | 
 67 | def try_user_scrap():
 68 |     user_task = st.GetUsersTask(['iga_swiatek'])
 69 |     output_json = st.JsonLineFileRawOutput('output_raw_user.jl')
 70 |     output_print = st.PrintRawOutput()
 71 |     st.GetUsersRunner(get_user_task=user_task, raw_data_outputs=[output_print, output_json]).run()
 72 | 
 73 | 
 74 | def try_tweet_by_id_scrap():
 75 |     id_task = st.TweetsByIdTask('1447348840164564994')
 76 |     output_json = st.JsonLineFileRawOutput('output_raw_id.jl')
 77 |     output_print = st.PrintRawOutput()
 78 |     st.TweetsByIdRunner(tweets_by_id_task=id_task,
 79 |                         raw_data_outputs=[output_print, output_json]).run()
 80 | 
 81 | 
 82 | if __name__ == '__main__':
 83 |     try_search()
 84 |     try_user_scrap()
 85 |     try_tweet_by_id_scrap()
 86 | ```
 87 | 
 88 | Example above shows that it is few lines of code required to scrap tweets.
 89 | 
 90 | ## Export format
 91 | 
 92 | Stweet uses api from website so there is no documentation about receiving response. Response is
 93 | saving as raw so final user must parse it on his own. Maybe parser will be added in feature.
 94 | 
 95 | Scrapped data can be exported in different ways by using `RawDataOutput` abstract class. List of
 96 | these outputs can be passed in every runner – yes it is possible to export in two different ways.
 97 | 
 98 | Currently, stweet have implemented:
 99 | 
100 | - **CollectorRawOutput** – can save data in memory and return as list of objects
101 | - **JsonLineFileRawOutput** – can export data as json lines
102 | - **PrintEveryNRawOutput** – prints every N-th item
103 | - **PrintFirstInBatchRawOutput** – prints first item in batch
104 | - **PrintRawOutput** – prints all items (not recommended in large scrapping)
105 | 
106 | ## Using tor proxy
107 | 
108 | Library is integrated with [tor-python-easy](https://github.com/markowanga/tor-python-easy).
109 | It allows using tor proxy with exposed control port – to change ip when it is needed.
110 | 
111 | If you want to use tor proxy client you need to prepare custom web client and use it in runner.
112 | 
113 | You need to run tor proxy -- you can run it on your local OS, or you can use this
114 | [docker-compose](https://github.com/markowanga/tor-python-easy/blob/main/docker-compose.yml).
115 | 
116 | Code snippet below show how to use proxy:
117 | 
118 | ```python
119 | import stweet as st
120 | 
121 | if __name__ == '__main__':
122 |     web_client = st.DefaultTwitterWebClientProvider.get_web_client_preconfigured_for_tor_proxy(
123 |         socks_proxy_url='socks5://localhost:9050',
124 |         control_host='localhost',
125 |         control_port=9051,
126 |         control_password='test1234'
127 |     )
128 | 
129 |     search_tweets_task = st.SearchTweetsTask(all_words='#covid19')
130 |     output_jl_tweets = st.JsonLineFileRawOutput('output_raw_search_tweets.jl')
131 |     output_jl_users = st.JsonLineFileRawOutput('output_raw_search_users.jl')
132 |     output_print = st.PrintRawOutput()
133 |     st.TweetSearchRunner(search_tweets_task=search_tweets_task,
134 |                          tweet_raw_data_outputs=[output_print, output_jl_tweets],
135 |                          user_raw_data_outputs=[output_print, output_jl_users],
136 |                          web_client=web_client).run()
137 | ```
138 | 
139 | ## Divide scrap periods recommended
140 | 
141 | Twitter on guest client block multiple pagination. Sometimes in one query there is possible to call for 3 paginations.
142 | To avoid this limitation divide scrapping period for smaller parts.
143 | 
144 | Twitter in 2023 block in API putting time range in timestamp – only format YYYY-MM-DD is acceptable. In arrow you can
145 | only put time without hours.
146 | 
147 | ## Twint inspiration
148 | 
149 | Small part of library uses code from [twint](https://github.com/twintproject/twint). Twint was also
150 | main inspiration to create stweet.
151 | 


--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Poetry and should not be changed by hand.
  2 | 
  3 | [[package]]
  4 | name = "arrow"
  5 | version = "1.2.3"
  6 | description = "Better dates & times for Python"
  7 | category = "main"
  8 | optional = false
  9 | python-versions = ">=3.6"
 10 | files = [
 11 |     {file = "arrow-1.2.3-py3-none-any.whl", hash = "sha256:5a49ab92e3b7b71d96cd6bfcc4df14efefc9dfa96ea19045815914a6ab6b1fe2"},
 12 |     {file = "arrow-1.2.3.tar.gz", hash = "sha256:3934b30ca1b9f292376d9db15b19446088d12ec58629bc3f0da28fd55fb633a1"},
 13 | ]
 14 | 
 15 | [package.dependencies]
 16 | python-dateutil = ">=2.7.0"
 17 | 
 18 | [[package]]
 19 | name = "atomicwrites"
 20 | version = "1.4.1"
 21 | description = "Atomic file writes."
 22 | category = "dev"
 23 | optional = false
 24 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 25 | files = [
 26 |     {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"},
 27 | ]
 28 | 
 29 | [[package]]
 30 | name = "attrs"
 31 | version = "22.2.0"
 32 | description = "Classes Without Boilerplate"
 33 | category = "dev"
 34 | optional = false
 35 | python-versions = ">=3.6"
 36 | files = [
 37 |     {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"},
 38 |     {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"},
 39 | ]
 40 | 
 41 | [package.extras]
 42 | cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"]
 43 | dev = ["attrs[docs,tests]"]
 44 | docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"]
 45 | tests = ["attrs[tests-no-zope]", "zope.interface"]
 46 | tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"]
 47 | 
 48 | [[package]]
 49 | name = "certifi"
 50 | version = "2022.12.7"
 51 | description = "Python package for providing Mozilla's CA Bundle."
 52 | category = "main"
 53 | optional = false
 54 | python-versions = ">=3.6"
 55 | files = [
 56 |     {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"},
 57 |     {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"},
 58 | ]
 59 | 
 60 | [[package]]
 61 | name = "charset-normalizer"
 62 | version = "3.0.1"
 63 | description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 64 | category = "main"
 65 | optional = false
 66 | python-versions = "*"
 67 | files = [
 68 |     {file = "charset-normalizer-3.0.1.tar.gz", hash = "sha256:ebea339af930f8ca5d7a699b921106c6e29c617fe9606fa7baa043c1cdae326f"},
 69 |     {file = "charset_normalizer-3.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88600c72ef7587fe1708fd242b385b6ed4b8904976d5da0893e31df8b3480cb6"},
 70 |     {file = "charset_normalizer-3.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c75ffc45f25324e68ab238cb4b5c0a38cd1c3d7f1fb1f72b5541de469e2247db"},
 71 |     {file = "charset_normalizer-3.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db72b07027db150f468fbada4d85b3b2729a3db39178abf5c543b784c1254539"},
 72 |     {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62595ab75873d50d57323a91dd03e6966eb79c41fa834b7a1661ed043b2d404d"},
 73 |     {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff6f3db31555657f3163b15a6b7c6938d08df7adbfc9dd13d9d19edad678f1e8"},
 74 |     {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:772b87914ff1152b92a197ef4ea40efe27a378606c39446ded52c8f80f79702e"},
 75 |     {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70990b9c51340e4044cfc394a81f614f3f90d41397104d226f21e66de668730d"},
 76 |     {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:292d5e8ba896bbfd6334b096e34bffb56161c81408d6d036a7dfa6929cff8783"},
 77 |     {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2edb64ee7bf1ed524a1da60cdcd2e1f6e2b4f66ef7c077680739f1641f62f555"},
 78 |     {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:31a9ddf4718d10ae04d9b18801bd776693487cbb57d74cc3458a7673f6f34639"},
 79 |     {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:44ba614de5361b3e5278e1241fda3dc1838deed864b50a10d7ce92983797fa76"},
 80 |     {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:12db3b2c533c23ab812c2b25934f60383361f8a376ae272665f8e48b88e8e1c6"},
 81 |     {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c512accbd6ff0270939b9ac214b84fb5ada5f0409c44298361b2f5e13f9aed9e"},
 82 |     {file = "charset_normalizer-3.0.1-cp310-cp310-win32.whl", hash = "sha256:502218f52498a36d6bf5ea77081844017bf7982cdbe521ad85e64cabee1b608b"},
 83 |     {file = "charset_normalizer-3.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:601f36512f9e28f029d9481bdaf8e89e5148ac5d89cffd3b05cd533eeb423b59"},
 84 |     {file = "charset_normalizer-3.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0298eafff88c99982a4cf66ba2efa1128e4ddaca0b05eec4c456bbc7db691d8d"},
 85 |     {file = "charset_normalizer-3.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a8d0fc946c784ff7f7c3742310cc8a57c5c6dc31631269876a88b809dbeff3d3"},
 86 |     {file = "charset_normalizer-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:87701167f2a5c930b403e9756fab1d31d4d4da52856143b609e30a1ce7160f3c"},
 87 |     {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e76c0f23218b8f46c4d87018ca2e441535aed3632ca134b10239dfb6dadd6b"},
 88 |     {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0c0a590235ccd933d9892c627dec5bc7511ce6ad6c1011fdf5b11363022746c1"},
 89 |     {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c7fe7afa480e3e82eed58e0ca89f751cd14d767638e2550c77a92a9e749c317"},
 90 |     {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79909e27e8e4fcc9db4addea88aa63f6423ebb171db091fb4373e3312cb6d603"},
 91 |     {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ac7b6a045b814cf0c47f3623d21ebd88b3e8cf216a14790b455ea7ff0135d18"},
 92 |     {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:72966d1b297c741541ca8cf1223ff262a6febe52481af742036a0b296e35fa5a"},
 93 |     {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f9d0c5c045a3ca9bedfc35dca8526798eb91a07aa7a2c0fee134c6c6f321cbd7"},
 94 |     {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:5995f0164fa7df59db4746112fec3f49c461dd6b31b841873443bdb077c13cfc"},
 95 |     {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4a8fcf28c05c1f6d7e177a9a46a1c52798bfe2ad80681d275b10dcf317deaf0b"},
 96 |     {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:761e8904c07ad053d285670f36dd94e1b6ab7f16ce62b9805c475b7aa1cffde6"},
 97 |     {file = "charset_normalizer-3.0.1-cp311-cp311-win32.whl", hash = "sha256:71140351489970dfe5e60fc621ada3e0f41104a5eddaca47a7acb3c1b851d6d3"},
 98 |     {file = "charset_normalizer-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:9ab77acb98eba3fd2a85cd160851816bfce6871d944d885febf012713f06659c"},
 99 |     {file = "charset_normalizer-3.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:84c3990934bae40ea69a82034912ffe5a62c60bbf6ec5bc9691419641d7d5c9a"},
100 |     {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74292fc76c905c0ef095fe11e188a32ebd03bc38f3f3e9bcb85e4e6db177b7ea"},
101 |     {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c95a03c79bbe30eec3ec2b7f076074f4281526724c8685a42872974ef4d36b72"},
102 |     {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f4c39b0e3eac288fedc2b43055cfc2ca7a60362d0e5e87a637beac5d801ef478"},
103 |     {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df2c707231459e8a4028eabcd3cfc827befd635b3ef72eada84ab13b52e1574d"},
104 |     {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93ad6d87ac18e2a90b0fe89df7c65263b9a99a0eb98f0a3d2e079f12a0735837"},
105 |     {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:59e5686dd847347e55dffcc191a96622f016bc0ad89105e24c14e0d6305acbc6"},
106 |     {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:cd6056167405314a4dc3c173943f11249fa0f1b204f8b51ed4bde1a9cd1834dc"},
107 |     {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:083c8d17153ecb403e5e1eb76a7ef4babfc2c48d58899c98fcaa04833e7a2f9a"},
108 |     {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:f5057856d21e7586765171eac8b9fc3f7d44ef39425f85dbcccb13b3ebea806c"},
109 |     {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:7eb33a30d75562222b64f569c642ff3dc6689e09adda43a082208397f016c39a"},
110 |     {file = "charset_normalizer-3.0.1-cp36-cp36m-win32.whl", hash = "sha256:95dea361dd73757c6f1c0a1480ac499952c16ac83f7f5f4f84f0658a01b8ef41"},
111 |     {file = "charset_normalizer-3.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:eaa379fcd227ca235d04152ca6704c7cb55564116f8bc52545ff357628e10602"},
112 |     {file = "charset_normalizer-3.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3e45867f1f2ab0711d60c6c71746ac53537f1684baa699f4f668d4c6f6ce8e14"},
113 |     {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cadaeaba78750d58d3cc6ac4d1fd867da6fc73c88156b7a3212a3cd4819d679d"},
114 |     {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:911d8a40b2bef5b8bbae2e36a0b103f142ac53557ab421dc16ac4aafee6f53dc"},
115 |     {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:503e65837c71b875ecdd733877d852adbc465bd82c768a067badd953bf1bc5a3"},
116 |     {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a60332922359f920193b1d4826953c507a877b523b2395ad7bc716ddd386d866"},
117 |     {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16a8663d6e281208d78806dbe14ee9903715361cf81f6d4309944e4d1e59ac5b"},
118 |     {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a16418ecf1329f71df119e8a65f3aa68004a3f9383821edcb20f0702934d8087"},
119 |     {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9d9153257a3f70d5f69edf2325357251ed20f772b12e593f3b3377b5f78e7ef8"},
120 |     {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:02a51034802cbf38db3f89c66fb5d2ec57e6fe7ef2f4a44d070a593c3688667b"},
121 |     {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:2e396d70bc4ef5325b72b593a72c8979999aa52fb8bcf03f701c1b03e1166918"},
122 |     {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:11b53acf2411c3b09e6af37e4b9005cba376c872503c8f28218c7243582df45d"},
123 |     {file = "charset_normalizer-3.0.1-cp37-cp37m-win32.whl", hash = "sha256:0bf2dae5291758b6f84cf923bfaa285632816007db0330002fa1de38bfcb7154"},
124 |     {file = "charset_normalizer-3.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:2c03cc56021a4bd59be889c2b9257dae13bf55041a3372d3295416f86b295fb5"},
125 |     {file = "charset_normalizer-3.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:024e606be3ed92216e2b6952ed859d86b4cfa52cd5bc5f050e7dc28f9b43ec42"},
126 |     {file = "charset_normalizer-3.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4b0d02d7102dd0f997580b51edc4cebcf2ab6397a7edf89f1c73b586c614272c"},
127 |     {file = "charset_normalizer-3.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:358a7c4cb8ba9b46c453b1dd8d9e431452d5249072e4f56cfda3149f6ab1405e"},
128 |     {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81d6741ab457d14fdedc215516665050f3822d3e56508921cc7239f8c8e66a58"},
129 |     {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8b8af03d2e37866d023ad0ddea594edefc31e827fee64f8de5611a1dbc373174"},
130 |     {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9cf4e8ad252f7c38dd1f676b46514f92dc0ebeb0db5552f5f403509705e24753"},
131 |     {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e696f0dd336161fca9adbb846875d40752e6eba585843c768935ba5c9960722b"},
132 |     {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c22d3fe05ce11d3671297dc8973267daa0f938b93ec716e12e0f6dee81591dc1"},
133 |     {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:109487860ef6a328f3eec66f2bf78b0b72400280d8f8ea05f69c51644ba6521a"},
134 |     {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:37f8febc8ec50c14f3ec9637505f28e58d4f66752207ea177c1d67df25da5aed"},
135 |     {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:f97e83fa6c25693c7a35de154681fcc257c1c41b38beb0304b9c4d2d9e164479"},
136 |     {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a152f5f33d64a6be73f1d30c9cc82dfc73cec6477ec268e7c6e4c7d23c2d2291"},
137 |     {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:39049da0ffb96c8cbb65cbf5c5f3ca3168990adf3551bd1dee10c48fce8ae820"},
138 |     {file = "charset_normalizer-3.0.1-cp38-cp38-win32.whl", hash = "sha256:4457ea6774b5611f4bed5eaa5df55f70abde42364d498c5134b7ef4c6958e20e"},
139 |     {file = "charset_normalizer-3.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:e62164b50f84e20601c1ff8eb55620d2ad25fb81b59e3cd776a1902527a788af"},
140 |     {file = "charset_normalizer-3.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8eade758719add78ec36dc13201483f8e9b5d940329285edcd5f70c0a9edbd7f"},
141 |     {file = "charset_normalizer-3.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8499ca8f4502af841f68135133d8258f7b32a53a1d594aa98cc52013fff55678"},
142 |     {file = "charset_normalizer-3.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3fc1c4a2ffd64890aebdb3f97e1278b0cc72579a08ca4de8cd2c04799a3a22be"},
143 |     {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00d3ffdaafe92a5dc603cb9bd5111aaa36dfa187c8285c543be562e61b755f6b"},
144 |     {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2ac1b08635a8cd4e0cbeaf6f5e922085908d48eb05d44c5ae9eabab148512ca"},
145 |     {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f6f45710b4459401609ebebdbcfb34515da4fc2aa886f95107f556ac69a9147e"},
146 |     {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ae1de54a77dc0d6d5fcf623290af4266412a7c4be0b1ff7444394f03f5c54e3"},
147 |     {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b590df687e3c5ee0deef9fc8c547d81986d9a1b56073d82de008744452d6541"},
148 |     {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab5de034a886f616a5668aa5d098af2b5385ed70142090e2a31bcbd0af0fdb3d"},
149 |     {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9cb3032517f1627cc012dbc80a8ec976ae76d93ea2b5feaa9d2a5b8882597579"},
150 |     {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:608862a7bf6957f2333fc54ab4399e405baad0163dc9f8d99cb236816db169d4"},
151 |     {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0f438ae3532723fb6ead77e7c604be7c8374094ef4ee2c5e03a3a17f1fca256c"},
152 |     {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:356541bf4381fa35856dafa6a965916e54bed415ad8a24ee6de6e37deccf2786"},
153 |     {file = "charset_normalizer-3.0.1-cp39-cp39-win32.whl", hash = "sha256:39cf9ed17fe3b1bc81f33c9ceb6ce67683ee7526e65fde1447c772afc54a1bb8"},
154 |     {file = "charset_normalizer-3.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:0a11e971ed097d24c534c037d298ad32c6ce81a45736d31e0ff0ad37ab437d59"},
155 |     {file = "charset_normalizer-3.0.1-py3-none-any.whl", hash = "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24"},
156 | ]
157 | 
158 | [[package]]
159 | name = "colorama"
160 | version = "0.4.6"
161 | description = "Cross-platform colored terminal text."
162 | category = "dev"
163 | optional = false
164 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
165 | files = [
166 |     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
167 |     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
168 | ]
169 | 
170 | [[package]]
171 | name = "idna"
172 | version = "3.4"
173 | description = "Internationalized Domain Names in Applications (IDNA)"
174 | category = "main"
175 | optional = false
176 | python-versions = ">=3.5"
177 | files = [
178 |     {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
179 |     {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
180 | ]
181 | 
182 | [[package]]
183 | name = "iniconfig"
184 | version = "2.0.0"
185 | description = "brain-dead simple config-ini parsing"
186 | category = "dev"
187 | optional = false
188 | python-versions = ">=3.7"
189 | files = [
190 |     {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
191 |     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
192 | ]
193 | 
194 | [[package]]
195 | name = "isort"
196 | version = "5.12.0"
197 | description = "A Python utility / library to sort Python imports."
198 | category = "dev"
199 | optional = false
200 | python-versions = ">=3.8.0"
201 | files = [
202 |     {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"},
203 |     {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"},
204 | ]
205 | 
206 | [package.extras]
207 | colors = ["colorama (>=0.4.3)"]
208 | pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"]
209 | plugins = ["setuptools"]
210 | requirements-deprecated-finder = ["pip-api", "pipreqs"]
211 | 
212 | [[package]]
213 | name = "numpy"
214 | version = "1.24.2"
215 | description = "Fundamental package for array computing in Python"
216 | category = "main"
217 | optional = false
218 | python-versions = ">=3.8"
219 | files = [
220 |     {file = "numpy-1.24.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eef70b4fc1e872ebddc38cddacc87c19a3709c0e3e5d20bf3954c147b1dd941d"},
221 |     {file = "numpy-1.24.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e8d2859428712785e8a8b7d2b3ef0a1d1565892367b32f915c4a4df44d0e64f5"},
222 |     {file = "numpy-1.24.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6524630f71631be2dabe0c541e7675db82651eb998496bbe16bc4f77f0772253"},
223 |     {file = "numpy-1.24.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a51725a815a6188c662fb66fb32077709a9ca38053f0274640293a14fdd22978"},
224 |     {file = "numpy-1.24.2-cp310-cp310-win32.whl", hash = "sha256:2620e8592136e073bd12ee4536149380695fbe9ebeae845b81237f986479ffc9"},
225 |     {file = "numpy-1.24.2-cp310-cp310-win_amd64.whl", hash = "sha256:97cf27e51fa078078c649a51d7ade3c92d9e709ba2bfb97493007103c741f1d0"},
226 |     {file = "numpy-1.24.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7de8fdde0003f4294655aa5d5f0a89c26b9f22c0a58790c38fae1ed392d44a5a"},
227 |     {file = "numpy-1.24.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4173bde9fa2a005c2c6e2ea8ac1618e2ed2c1c6ec8a7657237854d42094123a0"},
228 |     {file = "numpy-1.24.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cecaed30dc14123020f77b03601559fff3e6cd0c048f8b5289f4eeabb0eb281"},
229 |     {file = "numpy-1.24.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a23f8440561a633204a67fb44617ce2a299beecf3295f0d13c495518908e910"},
230 |     {file = "numpy-1.24.2-cp311-cp311-win32.whl", hash = "sha256:e428c4fbfa085f947b536706a2fc349245d7baa8334f0c5723c56a10595f9b95"},
231 |     {file = "numpy-1.24.2-cp311-cp311-win_amd64.whl", hash = "sha256:557d42778a6869c2162deb40ad82612645e21d79e11c1dc62c6e82a2220ffb04"},
232 |     {file = "numpy-1.24.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d0a2db9d20117bf523dde15858398e7c0858aadca7c0f088ac0d6edd360e9ad2"},
233 |     {file = "numpy-1.24.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c72a6b2f4af1adfe193f7beb91ddf708ff867a3f977ef2ec53c0ffb8283ab9f5"},
234 |     {file = "numpy-1.24.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c29e6bd0ec49a44d7690ecb623a8eac5ab8a923bce0bea6293953992edf3a76a"},
235 |     {file = "numpy-1.24.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2eabd64ddb96a1239791da78fa5f4e1693ae2dadc82a76bc76a14cbb2b966e96"},
236 |     {file = "numpy-1.24.2-cp38-cp38-win32.whl", hash = "sha256:e3ab5d32784e843fc0dd3ab6dcafc67ef806e6b6828dc6af2f689be0eb4d781d"},
237 |     {file = "numpy-1.24.2-cp38-cp38-win_amd64.whl", hash = "sha256:76807b4063f0002c8532cfeac47a3068a69561e9c8715efdad3c642eb27c0756"},
238 |     {file = "numpy-1.24.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4199e7cfc307a778f72d293372736223e39ec9ac096ff0a2e64853b866a8e18a"},
239 |     {file = "numpy-1.24.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:adbdce121896fd3a17a77ab0b0b5eedf05a9834a18699db6829a64e1dfccca7f"},
240 |     {file = "numpy-1.24.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:889b2cc88b837d86eda1b17008ebeb679d82875022200c6e8e4ce6cf549b7acb"},
241 |     {file = "numpy-1.24.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64bb98ac59b3ea3bf74b02f13836eb2e24e48e0ab0145bbda646295769bd780"},
242 |     {file = "numpy-1.24.2-cp39-cp39-win32.whl", hash = "sha256:63e45511ee4d9d976637d11e6c9864eae50e12dc9598f531c035265991910468"},
243 |     {file = "numpy-1.24.2-cp39-cp39-win_amd64.whl", hash = "sha256:a77d3e1163a7770164404607b7ba3967fb49b24782a6ef85d9b5f54126cc39e5"},
244 |     {file = "numpy-1.24.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:92011118955724465fb6853def593cf397b4a1367495e0b59a7e69d40c4eb71d"},
245 |     {file = "numpy-1.24.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9006288bcf4895917d02583cf3411f98631275bc67cce355a7f39f8c14338fa"},
246 |     {file = "numpy-1.24.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:150947adbdfeceec4e5926d956a06865c1c690f2fd902efede4ca6fe2e657c3f"},
247 |     {file = "numpy-1.24.2.tar.gz", hash = "sha256:003a9f530e880cb2cd177cba1af7220b9aa42def9c4afc2a2fc3ee6be7eb2b22"},
248 | ]
249 | 
250 | [[package]]
251 | name = "packaging"
252 | version = "23.0"
253 | description = "Core utilities for Python packages"
254 | category = "dev"
255 | optional = false
256 | python-versions = ">=3.7"
257 | files = [
258 |     {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"},
259 |     {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"},
260 | ]
261 | 
262 | [[package]]
263 | name = "pandas"
264 | version = "1.5.3"
265 | description = "Powerful data structures for data analysis, time series, and statistics"
266 | category = "main"
267 | optional = false
268 | python-versions = ">=3.8"
269 | files = [
270 |     {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3749077d86e3a2f0ed51367f30bf5b82e131cc0f14260c4d3e499186fccc4406"},
271 |     {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:972d8a45395f2a2d26733eb8d0f629b2f90bebe8e8eddbb8829b180c09639572"},
272 |     {file = "pandas-1.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:50869a35cbb0f2e0cd5ec04b191e7b12ed688874bd05dd777c19b28cbea90996"},
273 |     {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ac844a0fe00bfaeb2c9b51ab1424e5c8744f89860b138434a363b1f620f354"},
274 |     {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a0a56cef15fd1586726dace5616db75ebcfec9179a3a55e78f72c5639fa2a23"},
275 |     {file = "pandas-1.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:478ff646ca42b20376e4ed3fa2e8d7341e8a63105586efe54fa2508ee087f328"},
276 |     {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6973549c01ca91ec96199e940495219c887ea815b2083722821f1d7abfa2b4dc"},
277 |     {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c39a8da13cede5adcd3be1182883aea1c925476f4e84b2807a46e2775306305d"},
278 |     {file = "pandas-1.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f76d097d12c82a535fda9dfe5e8dd4127952b45fea9b0276cb30cca5ea313fbc"},
279 |     {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e474390e60ed609cec869b0da796ad94f420bb057d86784191eefc62b65819ae"},
280 |     {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f2b952406a1588ad4cad5b3f55f520e82e902388a6d5a4a91baa8d38d23c7f6"},
281 |     {file = "pandas-1.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:bc4c368f42b551bf72fac35c5128963a171b40dce866fb066540eeaf46faa003"},
282 |     {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:14e45300521902689a81f3f41386dc86f19b8ba8dd5ac5a3c7010ef8d2932813"},
283 |     {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9842b6f4b8479e41968eced654487258ed81df7d1c9b7b870ceea24ed9459b31"},
284 |     {file = "pandas-1.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:26d9c71772c7afb9d5046e6e9cf42d83dd147b5cf5bcb9d97252077118543792"},
285 |     {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fbcb19d6fceb9e946b3e23258757c7b225ba450990d9ed63ccceeb8cae609f7"},
286 |     {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:565fa34a5434d38e9d250af3c12ff931abaf88050551d9fbcdfafca50d62babf"},
287 |     {file = "pandas-1.5.3-cp38-cp38-win32.whl", hash = "sha256:87bd9c03da1ac870a6d2c8902a0e1fd4267ca00f13bc494c9e5a9020920e1d51"},
288 |     {file = "pandas-1.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:41179ce559943d83a9b4bbacb736b04c928b095b5f25dd2b7389eda08f46f373"},
289 |     {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c74a62747864ed568f5a82a49a23a8d7fe171d0c69038b38cedf0976831296fa"},
290 |     {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c4c00e0b0597c8e4f59e8d461f797e5d70b4d025880516a8261b2817c47759ee"},
291 |     {file = "pandas-1.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a50d9a4336a9621cab7b8eb3fb11adb82de58f9b91d84c2cd526576b881a0c5a"},
292 |     {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd05f7783b3274aa206a1af06f0ceed3f9b412cf665b7247eacd83be41cf7bf0"},
293 |     {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f69c4029613de47816b1bb30ff5ac778686688751a5e9c99ad8c7031f6508e5"},
294 |     {file = "pandas-1.5.3-cp39-cp39-win32.whl", hash = "sha256:7cec0bee9f294e5de5bbfc14d0573f65526071029d036b753ee6507d2a21480a"},
295 |     {file = "pandas-1.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:dfd681c5dc216037e0b0a2c821f5ed99ba9f03ebcf119c7dac0e9a7b960b9ec9"},
296 |     {file = "pandas-1.5.3.tar.gz", hash = "sha256:74a3fd7e5a7ec052f183273dc7b0acd3a863edf7520f5d3a1765c04ffdb3b0b1"},
297 | ]
298 | 
299 | [package.dependencies]
300 | numpy = [
301 |     {version = ">=1.20.3", markers = "python_version < \"3.10\""},
302 |     {version = ">=1.21.0", markers = "python_version >= \"3.10\""},
303 |     {version = ">=1.23.2", markers = "python_version >= \"3.11\""},
304 | ]
305 | python-dateutil = ">=2.8.1"
306 | pytz = ">=2020.1"
307 | 
308 | [package.extras]
309 | test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"]
310 | 
311 | [[package]]
312 | name = "pluggy"
313 | version = "1.0.0"
314 | description = "plugin and hook calling mechanisms for python"
315 | category = "dev"
316 | optional = false
317 | python-versions = ">=3.6"
318 | files = [
319 |     {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
320 |     {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
321 | ]
322 | 
323 | [package.extras]
324 | dev = ["pre-commit", "tox"]
325 | testing = ["pytest", "pytest-benchmark"]
326 | 
327 | [[package]]
328 | name = "py"
329 | version = "1.11.0"
330 | description = "library with cross-python path, ini-parsing, io, code, log facilities"
331 | category = "dev"
332 | optional = false
333 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
334 | files = [
335 |     {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
336 |     {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
337 | ]
338 | 
339 | [[package]]
340 | name = "pysocks"
341 | version = "1.7.1"
342 | description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information."
343 | category = "main"
344 | optional = false
345 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
346 | files = [
347 |     {file = "PySocks-1.7.1-py27-none-any.whl", hash = "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299"},
348 |     {file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"},
349 |     {file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"},
350 | ]
351 | 
352 | [[package]]
353 | name = "pytest"
354 | version = "6.2.5"
355 | description = "pytest: simple powerful testing with Python"
356 | category = "dev"
357 | optional = false
358 | python-versions = ">=3.6"
359 | files = [
360 |     {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"},
361 |     {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"},
362 | ]
363 | 
364 | [package.dependencies]
365 | atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
366 | attrs = ">=19.2.0"
367 | colorama = {version = "*", markers = "sys_platform == \"win32\""}
368 | iniconfig = "*"
369 | packaging = "*"
370 | pluggy = ">=0.12,<2.0"
371 | py = ">=1.8.2"
372 | toml = "*"
373 | 
374 | [package.extras]
375 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
376 | 
377 | [[package]]
378 | name = "python-dateutil"
379 | version = "2.8.2"
380 | description = "Extensions to the standard Python datetime module"
381 | category = "main"
382 | optional = false
383 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
384 | files = [
385 |     {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
386 |     {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
387 | ]
388 | 
389 | [package.dependencies]
390 | six = ">=1.5"
391 | 
392 | [[package]]
393 | name = "pytz"
394 | version = "2022.7.1"
395 | description = "World timezone definitions, modern and historical"
396 | category = "main"
397 | optional = false
398 | python-versions = "*"
399 | files = [
400 |     {file = "pytz-2022.7.1-py2.py3-none-any.whl", hash = "sha256:78f4f37d8198e0627c5f1143240bb0206b8691d8d7ac6d78fee88b78733f8c4a"},
401 |     {file = "pytz-2022.7.1.tar.gz", hash = "sha256:01a0681c4b9684a28304615eba55d1ab31ae00bf68ec157ec3708a8182dbbcd0"},
402 | ]
403 | 
404 | [[package]]
405 | name = "requests"
406 | version = "2.28.2"
407 | description = "Python HTTP for Humans."
408 | category = "main"
409 | optional = false
410 | python-versions = ">=3.7, <4"
411 | files = [
412 |     {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"},
413 |     {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"},
414 | ]
415 | 
416 | [package.dependencies]
417 | certifi = ">=2017.4.17"
418 | charset-normalizer = ">=2,<4"
419 | idna = ">=2.5,<4"
420 | urllib3 = ">=1.21.1,<1.27"
421 | 
422 | [package.extras]
423 | socks = ["PySocks (>=1.5.6,!=1.5.7)"]
424 | use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
425 | 
426 | [[package]]
427 | name = "six"
428 | version = "1.16.0"
429 | description = "Python 2 and 3 compatibility utilities"
430 | category = "main"
431 | optional = false
432 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
433 | files = [
434 |     {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
435 |     {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
436 | ]
437 | 
438 | [[package]]
439 | name = "toml"
440 | version = "0.10.2"
441 | description = "Python Library for Tom's Obvious, Minimal Language"
442 | category = "dev"
443 | optional = false
444 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
445 | files = [
446 |     {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
447 |     {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
448 | ]
449 | 
450 | [[package]]
451 | name = "tor-python-easy"
452 | version = "0.1.5"
453 | description = "Simple library to manage tor proxy and IP changes"
454 | category = "main"
455 | optional = false
456 | python-versions = ">=3.8,<4.0"
457 | files = [
458 |     {file = "tor-python-easy-0.1.5.tar.gz", hash = "sha256:e4b0618b9a0bc8e3415cf68274af5f7759b96f37060a34377845624e9f1ab01d"},
459 |     {file = "tor_python_easy-0.1.5-py3-none-any.whl", hash = "sha256:c154fa30e301fa8cf3b0f7563ec55bbc6abd54a57fccf6a96357ee3529b11aad"},
460 | ]
461 | 
462 | [package.dependencies]
463 | PySocks = ">=1.7.1,<2.0.0"
464 | requests = ">=2.26.0,<3.0.0"
465 | 
466 | [[package]]
467 | name = "urllib3"
468 | version = "1.26.14"
469 | description = "HTTP library with thread-safe connection pooling, file post, and more."
470 | category = "main"
471 | optional = false
472 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
473 | files = [
474 |     {file = "urllib3-1.26.14-py2.py3-none-any.whl", hash = "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"},
475 |     {file = "urllib3-1.26.14.tar.gz", hash = "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72"},
476 | ]
477 | 
478 | [package.extras]
479 | brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
480 | secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
481 | socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
482 | 
483 | [metadata]
484 | lock-version = "2.0"
485 | python-versions = "^3.8"
486 | content-hash = "66fdf491f1724d2864e6451ef720f3f48a95cb86bfffbc50acad33b5a0b3eff8"
487 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "stweet"
 3 | version = "2.1.1"
 4 | description = "Package to scrap tweets"
 5 | authors = ["Marcin Wątroba <markowanga@gmail.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | packages = [
 9 |     { include = "stweet" }
10 | ]
11 | 
12 | [tool.poetry.dependencies]
13 | python = "^3.8"
14 | requests = "^2.26.0"
15 | pandas = "^1.3.3"
16 | arrow = "^1.2.0"
17 | tor-python-easy = "^0.1.2"
18 | 
19 | [tool.poetry.dev-dependencies]
20 | pytest = "^6.2.5"
21 | 
22 | [tool.poetry.group.dev.dependencies]
23 | isort = "^5.12.0"
24 | 
25 | [build-system]
26 | requires = ["poetry-core"]
27 | build-backend = "poetry.core.masonry.api"
28 | 


--------------------------------------------------------------------------------
/stweet/__init__.py:
--------------------------------------------------------------------------------
 1 | from .get_user_runner import GetUsersResult, GetUsersRunner, GetUsersTask
 2 | from .http_request import (RequestsWebClient, RequestsWebClientProxyConfig,
 3 |                            WebClient)
 4 | from .model import Language, UserTweetRaw
 5 | from .raw_output import (CollectorRawOutput, JsonLineFileRawOutput,
 6 |                          PrintEveryNRawOutput, PrintFirstInBatchRawOutput,
 7 |                          PrintRawOutput)
 8 | from .search_runner import (RepliesFilter, SearchTweetsResult,
 9 |                             SearchTweetsTask, TweetSearchRunner)
10 | from .tweets_by_ids_runner import (TweetsByIdResult, TweetsByIdRunner,
11 |                                    TweetsByIdTask)
12 | from .twitter_api import DefaultTwitterWebClientProvider
13 | 


--------------------------------------------------------------------------------
/stweet/auth/__init__.py:
--------------------------------------------------------------------------------
1 | from .auth_token_provider import AuthTokenProvider
2 | from .simple_auth_token_provider import SimpleAuthTokenProvider
3 | 


--------------------------------------------------------------------------------
/stweet/auth/auth_token_provider.py:
--------------------------------------------------------------------------------
 1 | """Abstract class for get guest auth token."""
 2 | from abc import abstractmethod
 3 | 
 4 | from ..http_request.web_client import WebClient
 5 | 
 6 | 
 7 | class AuthTokenProvider:
 8 |     """Abstract class for get guest auth token."""
 9 | 
10 |     @abstractmethod
11 |     def get_new_token(self, web_client: WebClient) -> str:
12 |         """Method returns new token."""
13 | 


--------------------------------------------------------------------------------
/stweet/auth/fail_strategy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markowanga/stweet/fe34e98254dc7646bde6e083b5f6f745a0ee8cb6/stweet/auth/fail_strategy/__init__.py


--------------------------------------------------------------------------------
/stweet/auth/fail_strategy/auth_fail_strategy.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | 
3 | 
4 | class AuthFailStrategy(ABC):
5 | 
6 |     @abstractmethod
7 |     def run_strategy(self) -> None:
8 |         pass
9 | 


--------------------------------------------------------------------------------
/stweet/auth/fail_strategy/tor_ip_change_auth_fail_strategy.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from tor_python_easy.tor_control_port_client import TorControlPortClient
 4 | 
 5 | from stweet.auth.fail_strategy.auth_fail_strategy import AuthFailStrategy
 6 | 
 7 | 
 8 | class TorIpChangeAuthFailStrategy(AuthFailStrategy):
 9 |     tor_control_port_client: TorControlPortClient
10 | 
11 |     def __init__(self, tor_control_port_client: TorControlPortClient):
12 |         self.tor_control_port_client = tor_control_port_client
13 | 
14 |     def run_strategy(self) -> None:
15 |         time.sleep(5)
16 |         self.tor_control_port_client.change_connection_ip()
17 | 


--------------------------------------------------------------------------------
/stweet/auth/fail_strategy/wait_auth_fail_strategy.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from stweet.auth.fail_strategy.auth_fail_strategy import AuthFailStrategy
 4 | 
 5 | 
 6 | class WaitAuthFailStrategy(AuthFailStrategy):
 7 |     ms_wait: int
 8 | 
 9 |     def __init__(self, ms_wait: int):
10 |         self.ms_wait = ms_wait
11 | 
12 |     def run_strategy(self) -> None:
13 |         time.sleep(self.ms_wait * 1.0 / 1000)
14 | 


--------------------------------------------------------------------------------
/stweet/auth/simple_auth_token_provider.py:
--------------------------------------------------------------------------------
 1 | """Util to process access token to Twitter api."""
 2 | 
 3 | import json
 4 | import time
 5 | from json import JSONDecodeError
 6 | from typing import Callable, Optional
 7 | 
 8 | from ..exceptions import RefreshTokenException
 9 | from ..exceptions.too_many_requests_exception import TooManyRequestsException
10 | from ..http_request import WebClient
11 | from ..twitter_api.twitter_api_requests import TwitterApiRequests
12 | from .auth_token_provider import AuthTokenProvider
13 | from .fail_strategy.auth_fail_strategy import AuthFailStrategy
14 | from .fail_strategy.wait_auth_fail_strategy import WaitAuthFailStrategy
15 | 
16 | _TIMEOUT = 20
17 | _URL = 'https://api.twitter.com/1.1/guest/activate.json'
18 | 
19 | 
20 | def _run_retrying_for_string(
21 |         stop_max_ms: int,
22 |         on_except_function: Callable[[], None],
23 |         catch_predicate: Callable[[Exception], bool],
24 |         call_function: Callable[[], str]
25 | ) -> str:
26 |     def current_milli_time():
27 |         return round(time.time() * 1000)
28 | 
29 |     first_error_time = -1
30 |     result = None
31 |     while result is None:
32 |         try:
33 |             result = call_function()
34 |         except Exception as e:
35 |             if first_error_time == -1:
36 |                 first_error_time = current_milli_time()
37 |             time_from_first_error = current_milli_time() - first_error_time
38 |             is_time_over = time_from_first_error > stop_max_ms
39 |             if not catch_predicate(e) or is_time_over:
40 |                 raise e
41 |             on_except_function()
42 |     return result
43 | 
44 | 
45 | class SimpleAuthTokenProvider(AuthTokenProvider):
46 |     """Class to manage Twitter token api."""
47 | 
48 |     auth_fail_strategy: AuthFailStrategy
49 |     stop_max_delay_on_too_many_requests_exception: int
50 | 
51 |     def __init__(
52 |             self,
53 |             auth_fail_strategy: Optional[AuthFailStrategy] = None,
54 |             stop_max_delay_on_too_many_requests_exception: int = 40 * 60 * 1000
55 |     ):
56 |         """Constructor of SimpleAuthTokenProvider, can override default retries time."""
57 |         self.auth_fail_strategy = auth_fail_strategy
58 |         if self.auth_fail_strategy is None:
59 |             self.auth_fail_strategy = WaitAuthFailStrategy(60 * 1000)
60 |         self.stop_max_delay_on_too_many_requests_exception = \
61 |             stop_max_delay_on_too_many_requests_exception
62 | 
63 |     def _request_for_response_body(self, web_client: WebClient):
64 |         """Method from Twint."""
65 |         token_request_details = TwitterApiRequests().get_guest_token_request_details()
66 |         token_response = web_client.run_request(token_request_details)
67 |         if token_response.is_success():
68 |             return token_response.text
69 |         else:
70 |             raise RefreshTokenException(f'Error during request for token -- {token_response}')
71 | 
72 |     def get_new_token(self, web_client: WebClient) -> str:
73 |         """Method to get refreshed token. In case of error raise RefreshTokenException."""
74 | 
75 |         def simple_get_new_token() -> str:
76 |             try:
77 |                 token_html = self._request_for_response_body(web_client)
78 |                 return json.loads(token_html)['guest_token']
79 |             except JSONDecodeError:
80 |                 raise RefreshTokenException('Error during request for token')
81 |             except KeyError:
82 |                 raise RefreshTokenException('Error during request for token')
83 | 
84 |         # by this https://github.com/rholder/retrying/issues/70#issuecomment-313129305
85 |         return _run_retrying_for_string(
86 |             stop_max_ms=self.stop_max_delay_on_too_many_requests_exception,
87 |             on_except_function=self.auth_fail_strategy.run_strategy,
88 |             catch_predicate=lambda e: isinstance(e, TooManyRequestsException),
89 |             call_function=simple_get_new_token
90 |         )
91 | 


--------------------------------------------------------------------------------
/stweet/exceptions/__init__.py:
--------------------------------------------------------------------------------
1 | from .refresh_token_exception import RefreshTokenException
2 | from .scrap_batch_bad_response import ScrapBatchBadResponse
3 | 


--------------------------------------------------------------------------------
/stweet/exceptions/refresh_token_exception.py:
--------------------------------------------------------------------------------
 1 | """RefreshTokenException definition."""
 2 | 
 3 | 
 4 | class RefreshTokenException(Exception):
 5 |     """RefreshTokenException class."""
 6 | 
 7 |     def __init__(self, msg):
 8 |         """Error constructor."""
 9 |         super().__init__(msg)
10 | 


--------------------------------------------------------------------------------
/stweet/exceptions/scrap_batch_bad_response.py:
--------------------------------------------------------------------------------
 1 | """ScrapBatchBadResponse definition."""
 2 | 
 3 | 
 4 | class ScrapBatchBadResponse(Exception):
 5 |     """ScrapBatchBadResponse class."""
 6 | 
 7 |     def __init__(self, msg):
 8 |         """Error constructor."""
 9 |         super().__init__(msg)
10 | 


--------------------------------------------------------------------------------
/stweet/exceptions/too_many_requests_exception.py:
--------------------------------------------------------------------------------
 1 | """TooManyRequestsException class."""
 2 | 
 3 | 
 4 | class TooManyRequestsException(Exception):
 5 |     """TooManyRequestsException class."""
 6 | 
 7 |     def __init__(self, request_url: str):
 8 |         """Error constructor."""
 9 |         super().__init__(f'to many requests to {request_url}')
10 | 


--------------------------------------------------------------------------------
/stweet/exceptions/user_suspended_exception.py:
--------------------------------------------------------------------------------
 1 | """ScrapBatchBadResponse class."""
 2 | 
 3 | 
 4 | class UserSuspendedException(Exception):
 5 |     """ScrapBatchBadResponse class."""
 6 | 
 7 |     def __init__(self):
 8 |         """Error constructor."""
 9 |         super().__init__('Username is suspended')
10 | 


--------------------------------------------------------------------------------
/stweet/get_user_runner/__init__.py:
--------------------------------------------------------------------------------
1 | from .get_users_context import GetUsersContext
2 | from .get_users_result import GetUsersResult
3 | from .get_users_runner import GetUsersRunner
4 | from .get_users_task import GetUsersTask
5 | 


--------------------------------------------------------------------------------
/stweet/get_user_runner/get_users_context.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import List, Tuple
 3 | 
 4 | 
 5 | @dataclass
 6 | class GetUsersContext:
 7 |     scrapped_count: int = 0
 8 |     usernames_with_error: List[Tuple[str, Exception]] = field(default_factory=list)
 9 | 
10 |     def add_one_scrapped_user(self):
11 |         self.scrapped_count += 1
12 | 
13 |     def add_user_with_scrap_error(self, username: str, exception: Exception):
14 |         self.usernames_with_error.append((username, exception))
15 | 


--------------------------------------------------------------------------------
/stweet/get_user_runner/get_users_result.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import List, Tuple
3 | 
4 | 
5 | @dataclass
6 | class GetUsersResult:
7 |     users_count: int
8 |     usernames_with_error: List[Tuple[str, Exception]]
9 | 


--------------------------------------------------------------------------------
/stweet/get_user_runner/get_users_runner.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import List, Optional
 3 | 
 4 | from ..http_request import WebClient
 5 | from ..model.user_raw import UserRaw
 6 | from ..raw_output.raw_data_output import RawDataOutput
 7 | from ..twitter_api.default_twitter_web_client_provider import \
 8 |     DefaultTwitterWebClientProvider
 9 | from ..twitter_api.twitter_api_requests import TwitterApiRequests
10 | from .get_users_context import GetUsersContext
11 | from .get_users_result import GetUsersResult
12 | from .get_users_task import GetUsersTask
13 | from .user_parser import parse_user
14 | 
15 | 
16 | @dataclass
17 | class _TweetByIdBaseInfo:
18 |     id: str
19 |     username: str
20 |     tweet_content: str
21 | 
22 | 
23 | class GetUsersRunner:
24 | 
25 |     get_user_context: GetUsersContext
26 |     get_user_task: GetUsersTask
27 |     raw_data_outputs: List[RawDataOutput]
28 |     web_client: WebClient
29 | 
30 |     def __init__(
31 |             self,
32 |             get_user_task: GetUsersTask,
33 |             raw_data_outputs: List[RawDataOutput],
34 |             get_user_context: Optional[GetUsersContext] = None,
35 |             web_client: Optional[WebClient] = None
36 |     ):
37 |         self.get_user_context = GetUsersContext() if get_user_context is None else get_user_context
38 |         self.get_user_task = get_user_task
39 |         self.raw_data_outputs = raw_data_outputs
40 |         self.web_client = web_client if web_client is not None \
41 |             else DefaultTwitterWebClientProvider.get_web_client()
42 |         return
43 | 
44 |     def run(self) -> GetUsersResult:
45 |         for username in self.get_user_task.usernames:
46 |             self._try_get_user(username)
47 |         return GetUsersResult(self.get_user_context.scrapped_count,
48 |                               self.get_user_context.usernames_with_error)
49 | 
50 |     def _try_get_user(self, username: str):
51 |         try:
52 |             request_details = TwitterApiRequests().get_user_details_request_details(username)
53 |             user_request_response = self.web_client.run_request(request_details)
54 |             full_user = parse_user(user_request_response.text)
55 |             self.get_user_context.add_one_scrapped_user()
56 |             self._process_user_to_output(full_user)
57 |         except Exception as exception:
58 |             self.get_user_context.add_user_with_scrap_error(username, exception)
59 | 
60 |     def _process_user_to_output(self, user_raw: UserRaw):
61 |         for user_output in self.raw_data_outputs:
62 |             user_output.export_raw_data([user_raw])
63 | 


--------------------------------------------------------------------------------
/stweet/get_user_runner/get_users_task.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import List
 3 | 
 4 | 
 5 | @dataclass(frozen=True)
 6 | class GetUsersTask:
 7 |     usernames: List[str]
 8 | 
 9 |     def __init__(
10 |             self,
11 |             usernames: List[str]
12 |     ):
13 |         object.__setattr__(self, 'usernames', usernames)
14 |         return
15 | 


--------------------------------------------------------------------------------
/stweet/get_user_runner/user_parser.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import arrow
 4 | 
 5 | from stweet.model.user_raw import UserRaw
 6 | 
 7 | 
 8 | def parse_user(response_content: str) -> UserRaw:
 9 |     return UserRaw(json.dumps(json.loads(response_content)['data']['user']['result']), arrow.now())
10 | 


--------------------------------------------------------------------------------
/stweet/http_request/__init__.py:
--------------------------------------------------------------------------------
1 | from .http_method import HttpMethod
2 | from .request_details import RequestDetails
3 | from .request_response import RequestResponse
4 | from .requests.requests_web_client import (RequestsWebClient,
5 |                                            RequestsWebClientProxyConfig)
6 | from .web_client import WebClient
7 | 


--------------------------------------------------------------------------------
/stweet/http_request/http_method.py:
--------------------------------------------------------------------------------
 1 | """HttpMethod enum class."""
 2 | import enum
 3 | 
 4 | 
 5 | class HttpMethod(enum.Enum):
 6 |     """HttpMethod enum class."""
 7 | 
 8 |     GET = 1
 9 |     POST = 2
10 | 


--------------------------------------------------------------------------------
/stweet/http_request/interceptor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markowanga/stweet/fe34e98254dc7646bde6e083b5f6f745a0ee8cb6/stweet/http_request/interceptor/__init__.py


--------------------------------------------------------------------------------
/stweet/http_request/interceptor/logging_requests_web_client_interceptor.py:
--------------------------------------------------------------------------------
 1 | """Class of LoggingRequestsWebClientInterceptor."""
 2 | import logging
 3 | from http.client import HTTPConnection
 4 | from typing import List
 5 | 
 6 | from .. import RequestDetails, RequestResponse, RequestsWebClient, WebClient
 7 | 
 8 | 
 9 | class LoggingRequestsWebClientInterceptor(WebClient.WebClientInterceptor):
10 |     """Class of LoggingRequestsWebClientInterceptor."""
11 | 
12 |     @staticmethod
13 |     def _debug_requests_on():
14 |         """Switches on logging of the requests module."""
15 |         HTTPConnection.debuglevel = 1
16 | 
17 |         logging.basicConfig()
18 |         logging.getLogger().setLevel(logging.DEBUG)
19 |         requests_log = logging.getLogger("requests.packages.urllib3")
20 |         requests_log.setLevel(logging.DEBUG)
21 |         requests_log.propagate = True
22 | 
23 |     @staticmethod
24 |     def _debug_requests_off():
25 |         """Switches off logging of the requests module, might be some side-effects."""
26 |         HTTPConnection.debuglevel = 0
27 | 
28 |         root_logger = logging.getLogger()
29 |         root_logger.setLevel(logging.WARNING)
30 |         root_logger.handlers = []
31 |         requests_log = logging.getLogger("requests.packages.urllib3")
32 |         requests_log.setLevel(logging.NOTSET)
33 |         requests_log.propagate = False
34 | 
35 |     def logs_to_show(self, params: RequestDetails) -> bool:
36 |         """Method to decide that show logs of request.
37 | 
38 |         Method can be overridden and then the logs will be filtered – example by request url.
39 |         """
40 |         return True
41 | 
42 |     def intercept(
43 |             self,
44 |             requests_details: RequestDetails,
45 |             next_interceptors: List[WebClient.WebClientInterceptor],
46 |             web_client: RequestsWebClient
47 |     ) -> RequestResponse:
48 |         """Method show logs when predicate is true. Uses static field so it can be problem with concurrency."""
49 |         is_to_log = self.logs_to_show(requests_details)
50 |         if is_to_log:
51 |             LoggingRequestsWebClientInterceptor._debug_requests_on()
52 |         to_return = self.get_response(requests_details, next_interceptors, web_client)
53 |         if is_to_log:
54 |             LoggingRequestsWebClientInterceptor._debug_requests_off()
55 |         return to_return
56 | 


--------------------------------------------------------------------------------
/stweet/http_request/interceptor/params_response_log_web_client_interceptor.py:
--------------------------------------------------------------------------------
 1 | """Class of ParamsResponseLogWebClientInterceptor."""
 2 | import threading
 3 | from typing import List
 4 | 
 5 | from .. import RequestDetails, RequestResponse, RequestsWebClient, WebClient
 6 | 
 7 | 
 8 | class ParamsResponseLogWebClientInterceptor(WebClient.WebClientInterceptor):
 9 |     """Class of ParamsResponseLogWebClientInterceptor.
10 | 
11 |     Interceptor log input params and out response.
12 |     """
13 | 
14 |     _counter: int
15 |     _lock: threading.Lock
16 | 
17 |     def __init__(self):
18 |         """Constructor of ParamsResponseLogWebClientInterceptor."""
19 |         self._value = 0
20 |         self._lock = threading.Lock()
21 | 
22 |     def increment(self) -> int:
23 |         """Thread safe increment. Returns old value."""
24 |         with self._lock:
25 |             to_return = self._value
26 |             self._value += 1
27 |             return to_return
28 | 
29 |     def logs_to_show(self, params: RequestDetails) -> bool:
30 |         """Method to decide that show logs of request.
31 | 
32 |         Method can be overridden and then the logs will be filtered – example by request url.
33 |         """
34 |         return True
35 | 
36 |     def intercept(
37 |             self,
38 |             requests_details: RequestDetails,
39 |             next_interceptors: List[WebClient.WebClientInterceptor],
40 |             web_client: RequestsWebClient
41 |     ) -> RequestResponse:
42 |         """Method show logs when predicate is true. Uses static field so it can be problem with concurrency."""
43 |         is_to_log = self.logs_to_show(requests_details)
44 |         index = self.increment()
45 |         if is_to_log:
46 |             print(f'{index} -- {requests_details}')
47 |         to_return = self.get_response(requests_details, next_interceptors, web_client)
48 |         if is_to_log:
49 |             print(f'{index} -- {to_return}')
50 |         return to_return
51 | 


--------------------------------------------------------------------------------
/stweet/http_request/request_details.py:
--------------------------------------------------------------------------------
 1 | """Class with request details."""
 2 | 
 3 | from dataclasses import dataclass
 4 | from typing import Dict
 5 | 
 6 | from .http_method import HttpMethod
 7 | 
 8 | 
 9 | @dataclass
10 | class RequestDetails:
11 |     """Class with request details. Specify all http request details."""
12 | 
13 |     http_method: HttpMethod
14 |     url: str
15 |     headers: Dict[str, str]
16 |     params: Dict[str, str]
17 |     timeout: int
18 | 


--------------------------------------------------------------------------------
/stweet/http_request/request_response.py:
--------------------------------------------------------------------------------
 1 | """Class with response details."""
 2 | 
 3 | from dataclasses import dataclass
 4 | from typing import Optional
 5 | 
 6 | 
 7 | @dataclass
 8 | class RequestResponse:
 9 |     """Class with response details. Independent of web library implementation."""
10 | 
11 |     status_code: Optional[int]
12 |     text: Optional[str]
13 | 
14 |     def is_429(self) -> bool:
15 |         """Method to check that is token_expired response status."""
16 |         return self.status_code == 429
17 | 
18 |     def is_success(self) -> bool:
19 |         """Method to check that response have success status."""
20 |         return self.status_code is not None and self.status_code < 300
21 | 


--------------------------------------------------------------------------------
/stweet/http_request/requests/__init__.py:
--------------------------------------------------------------------------------
1 | from .requests_web_client import RequestsWebClient
2 | from .requests_web_client_proxy_config import RequestsWebClientProxyConfig
3 | 


--------------------------------------------------------------------------------
/stweet/http_request/requests/requests_web_client.py:
--------------------------------------------------------------------------------
 1 | """Request search_runner class."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict, List, Optional
 5 | 
 6 | import requests
 7 | import requests.adapters
 8 | import urllib3
 9 | import urllib3.util.ssl_
10 | 
11 | from ..request_details import RequestDetails
12 | from ..request_response import RequestResponse
13 | from ..web_client import WebClient
14 | from .requests_web_client_proxy_config import RequestsWebClientProxyConfig
15 | 
16 | _CIPHERS = 'TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:ECDHE-ECDSA-AES128-' \
17 |            'GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:' \
18 |            'ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-RSA-AES128-SHA' \
19 |            ':ECDHE-RSA-AES256-SHA:AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA:AES256-SHA'
20 | 
21 | 
22 | class _TwitterTLSAdapter(requests.adapters.HTTPAdapter):
23 |     def init_poolmanager(self, *args, **kwargs):
24 |         # FIXME: When urllib3 2.0.0 is out and can be required,
25 |         #  this should use urllib3.util.create_urllib3_context instead of the private, undocumented ssl_ module.
26 |         kwargs['ssl_context'] = urllib3.util.ssl_.create_urllib3_context(ciphers=_CIPHERS)
27 |         return super().init_poolmanager(*args, **kwargs)
28 | 
29 | 
30 | class RequestsWebClient(WebClient):
31 |     """Request search_runner class. Implementation based on requests library."""
32 | 
33 |     proxy: Optional[RequestsWebClientProxyConfig]
34 |     verify: bool
35 | 
36 |     def __init__(
37 |             self,
38 |             proxy: Optional[RequestsWebClientProxyConfig] = None,
39 |             verify: bool = True,
40 |             interceptors: Optional[List[WebClient.WebClientInterceptor]] = None
41 |     ):
42 |         """Constructor of RequestsWebClient."""
43 |         interceptors_to_super = interceptors \
44 |             if interceptors is not None \
45 |             else []
46 |         super(RequestsWebClient, self).__init__(interceptors_to_super)
47 |         self.proxy = proxy
48 |         self.verify = verify
49 | 
50 |     def run_clear_request(self, params: RequestDetails) -> RequestResponse:
51 |         """Main method to run request using requests package."""
52 |         session = requests.Session()
53 |         adapter = _TwitterTLSAdapter()
54 |         session.mount('https://twitter.com', adapter)
55 |         session.mount('https://api.twitter.com', adapter)
56 |         response = session.request(
57 |             method=params.http_method.name,
58 |             url=params.url,
59 |             params=params.params,
60 |             headers=params.headers,
61 |             timeout=params.timeout,
62 |             proxies=self._get_proxy(),
63 |             verify=self.verify
64 |         )
65 |         return RequestResponse(response.status_code, response.text)
66 | 
67 |     def _get_proxy(self) -> Dict[str, str]:
68 |         return None if self.proxy is None else dict({
69 |             'http': self.proxy.http_proxy,
70 |             'https': self.proxy.https_proxy,
71 |         })
72 | 


--------------------------------------------------------------------------------
/stweet/http_request/requests/requests_web_client_proxy_config.py:
--------------------------------------------------------------------------------
 1 | """Configuration of proxy to RequestsWebClient."""
 2 | from dataclasses import dataclass
 3 | 
 4 | 
 5 | @dataclass
 6 | class RequestsWebClientProxyConfig:
 7 |     """Configuration class of proxy to RequestsWebClient."""
 8 | 
 9 |     http_proxy: str
10 |     https_proxy: str
11 | 


--------------------------------------------------------------------------------
/stweet/http_request/web_client.py:
--------------------------------------------------------------------------------
 1 | """Web client abstract class."""
 2 | from __future__ import annotations
 3 | 
 4 | from abc import ABC, abstractmethod
 5 | from typing import List, Optional
 6 | 
 7 | from .request_details import RequestDetails
 8 | from .request_response import RequestResponse
 9 | 
10 | 
11 | def _run_request_with_interceptors(
12 |         requests_details: RequestDetails,
13 |         next_interceptors: List[WebClient.WebClientInterceptor],
14 |         web_client: WebClient
15 | ) -> RequestResponse:
16 |     return next_interceptors[0].intercept(requests_details, next_interceptors[1:], web_client) if len(
17 |         next_interceptors) > 0 else web_client.run_clear_request(requests_details)
18 | 
19 | 
20 | class WebClient:
21 |     """Web client abstract class."""
22 | 
23 |     _interceptors: List[WebClientInterceptor]
24 | 
25 |     def __init__(self, interceptors: Optional[List[WebClientInterceptor]]):
26 |         """Base constructor of class."""
27 |         self._interceptors = [] if interceptors is None else interceptors
28 | 
29 |     def run_request(self, requests_details: RequestDetails) -> RequestResponse:
30 |         """Method process the request. Method wrap request with interceptors."""
31 |         return _run_request_with_interceptors(requests_details, self._interceptors, self)
32 | 
33 |     @abstractmethod
34 |     def run_clear_request(self, params: RequestDetails) -> RequestResponse:
35 |         """Abstract method to run only the request."""
36 | 
37 |     class WebClientInterceptor(ABC):
38 |         """Abstract class of web client interceptor."""
39 | 
40 |         @staticmethod
41 |         def get_response(
42 |                 requests_details: RequestDetails,
43 |                 next_interceptors: List[WebClient.WebClientInterceptor],
44 |                 web_client: WebClient
45 |         ) -> RequestResponse:
46 |             """Method process request. If any interceptor passes method wrap request with this."""
47 |             return _run_request_with_interceptors(requests_details, next_interceptors, web_client)
48 | 
49 |         @abstractmethod
50 |         def intercept(
51 |                 self,
52 |                 requests_details: RequestDetails,
53 |                 next_interceptors: List[WebClient.WebClientInterceptor],
54 |                 web_client: WebClient
55 |         ) -> RequestResponse:
56 |             """Interceptor method of request.
57 | 
58 |             Method need to call WebClientInterceptor.get_response to process request by next interceptors
59 |             and client.
60 |             """
61 | 


--------------------------------------------------------------------------------
/stweet/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .language import Language
2 | from .user_tweet_raw import UserTweetRaw
3 | 


--------------------------------------------------------------------------------
/stweet/model/cursor.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | 
3 | 
4 | @dataclass
5 | class Cursor:
6 |     type: str
7 |     value: str
8 | 


--------------------------------------------------------------------------------
/stweet/model/language.py:
--------------------------------------------------------------------------------
 1 | """Domain Language enum class."""
 2 | 
 3 | import enum
 4 | 
 5 | 
 6 | class Language(enum.Enum):
 7 |     """Domain Language enum class."""
 8 | 
 9 |     def __new__(cls, *args, **kwargs):
10 |         """Class __new__ method."""
11 |         value = len(cls.__members__) + 1
12 |         obj = object.__new__(cls)
13 |         obj._value_ = value
14 |         return obj
15 | 
16 |     def __init__(self, short_value):
17 |         """Class constructor method."""
18 |         self.short_value = short_value
19 | 
20 |     ENGLISH = 'en'
21 |     ARABIC = 'ar'
22 |     BASQUE = 'eu'
23 |     BENGALI = 'bn'
24 |     BULGARIAN = 'bg'
25 |     TRADITIONAL_CHINESE = 'zh-tw'
26 |     SIMPLIFIED_CHINESE = 'zh-cn'
27 |     CROATIAN = 'hr'
28 |     CZECH = 'cs'
29 |     DANISH = 'da'
30 |     FINNISH = 'fi'
31 |     FRENCH = 'fr'
32 |     GREEK = 'el'
33 |     GUJARATI = 'gu'
34 |     HEBREW = 'iw'
35 |     HINDI = 'hi'
36 |     SPANISH = 'es'
37 |     INDONESIAN = 'in'
38 |     JAPANESE = 'ja'
39 |     CANADIAN = 'kn'
40 |     CATALAN = 'ca'
41 |     KOREAN = 'ko'
42 |     MARATHI = 'mr'
43 |     DUTCH = 'nl'
44 |     GERMAN = 'de'
45 |     NORWEGIAN = 'no'
46 |     PERSIAN = 'fa'
47 |     POLISH = 'pl'
48 |     PORTUGUESE = 'pt'
49 |     RUSSIAN = 'ru'
50 |     ROMANIAN = 'ro'
51 |     SERBIAN = 'sr'
52 |     SLOVAK = 'sk'
53 |     SWEDISH = 'sv'
54 |     THAI = 'th'
55 |     TAMIL = 'ta'
56 |     TURKISH = 'tr'
57 |     UKRAINIAN = 'uk'
58 |     URDU = 'ur'
59 |     HUNGARIAN = 'hu'
60 |     VIETNAMESE = 'vi'
61 |     ITALIAN = 'it'
62 | 


--------------------------------------------------------------------------------
/stweet/model/raw_data.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from abc import ABC
 3 | 
 4 | from arrow import Arrow
 5 | 
 6 | 
 7 | class RawData(ABC):
 8 |     object_type: str
 9 |     download_datetime: Arrow
10 |     raw_value: str
11 | 
12 |     def __init__(self, object_type: str, raw_value: str, download_datetime: Arrow):
13 |         self.raw_value = raw_value
14 |         self.object_type = object_type
15 |         self.download_datetime = download_datetime
16 | 
17 |     def to_json_line(self) -> str:
18 |         return json.dumps({
19 |             'object_type': self.object_type,
20 |             'download_datetime': self.download_datetime.isoformat(),
21 |             'raw_value': json.loads(self.raw_value)
22 |         })
23 | 


--------------------------------------------------------------------------------
/stweet/model/tweet_raw.py:
--------------------------------------------------------------------------------
1 | from arrow import Arrow
2 | 
3 | from stweet.model.raw_data import RawData
4 | 
5 | 
6 | class TweetRaw(RawData):
7 |     def __init__(self, raw_value: str, download_datetime: Arrow):
8 |         super().__init__('TweetRaw', raw_value, download_datetime)
9 | 


--------------------------------------------------------------------------------
/stweet/model/user_raw.py:
--------------------------------------------------------------------------------
1 | from arrow import Arrow
2 | 
3 | from stweet.model.raw_data import RawData
4 | 
5 | 
6 | class UserRaw(RawData):
7 |     def __init__(self, raw_value: str, download_datetime: Arrow):
8 |         super().__init__('UserRaw', raw_value, download_datetime)
9 | 


--------------------------------------------------------------------------------
/stweet/model/user_tweet_raw.py:
--------------------------------------------------------------------------------
1 | from arrow import Arrow
2 | 
3 | from stweet.model.raw_data import RawData
4 | 
5 | 
6 | class UserTweetRaw(RawData):
7 |     def __init__(self, raw_value: str, download_datetime: Arrow):
8 |         super().__init__('UserTweetRaw', raw_value, download_datetime)
9 | 


--------------------------------------------------------------------------------
/stweet/raw_output/__init__.py:
--------------------------------------------------------------------------------
1 | from .collector_raw_output import CollectorRawOutput
2 | from .json_line_file_raw_output import JsonLineFileRawOutput
3 | from .print_every_n_raw_output import PrintEveryNRawOutput
4 | from .print_first_in_batch_raw_output import PrintFirstInBatchRawOutput
5 | from .print_raw_output import PrintRawOutput
6 | 


--------------------------------------------------------------------------------
/stweet/raw_output/collector_raw_output.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from ..model.raw_data import RawData
 4 | from .raw_data_output import RawDataOutput
 5 | 
 6 | 
 7 | class CollectorRawOutput(RawDataOutput):
 8 |     _raw_data_list: List[RawData]
 9 | 
10 |     def __init__(self):
11 |         self._raw_data_list = []
12 | 
13 |     def export_raw_data(self, raw_data_list: List[RawData]):
14 |         self._raw_data_list.extend(raw_data_list)
15 |         return
16 | 
17 |     def get_raw_list(self) -> List[RawData]:
18 |         return self._raw_data_list
19 | 


--------------------------------------------------------------------------------
/stweet/raw_output/json_line_file_raw_output.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from ..model.raw_data import RawData
 4 | from .raw_data_output import RawDataOutput
 5 | 
 6 | 
 7 | class JsonLineFileRawOutput(RawDataOutput):
 8 |     file_name: str
 9 | 
10 |     def __init__(self, file_name: str):
11 |         self.file_name = file_name
12 | 
13 |     def export_raw_data(self, raw_data_list: List[RawData]):
14 |         with open(self.file_name, 'a') as file:
15 |             for raw in raw_data_list:
16 |                 file.write(f'{raw.to_json_line()}\n')
17 |         return
18 | 


--------------------------------------------------------------------------------
/stweet/raw_output/print_every_n_raw_output.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from ..model.raw_data import RawData
 4 | from .raw_data_output import RawDataOutput
 5 | 
 6 | 
 7 | class PrintEveryNRawOutput(RawDataOutput):
 8 |     each_n: int
 9 |     _counter: int = 0
10 | 
11 |     def __init__(self, each_n: int):
12 |         self.each_n = each_n
13 | 
14 |     def export_raw_data(self, raw_data_list: List[RawData]):
15 |         for it in raw_data_list:
16 |             self._counter += 1
17 |             if self._counter % self.each_n == 0:
18 |                 print(self._counter, it.to_json_line())
19 |         return
20 | 


--------------------------------------------------------------------------------
/stweet/raw_output/print_first_in_batch_raw_output.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from ..model.raw_data import RawData
 4 | from .raw_data_output import RawDataOutput
 5 | 
 6 | 
 7 | class PrintFirstInBatchRawOutput(RawDataOutput):
 8 | 
 9 |     def export_raw_data(self, raw_data_list: List[RawData]):
10 |         message = str(raw_data_list[0].to_json_line()) if len(
11 |             raw_data_list) > 0 else 'PrintFirstInRequestTweetOutput -- no tweets to print'
12 |         print(message)
13 |         return
14 | 


--------------------------------------------------------------------------------
/stweet/raw_output/print_raw_output.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from ..model.raw_data import RawData
 4 | from .raw_data_output import RawDataOutput
 5 | 
 6 | 
 7 | class PrintRawOutput(RawDataOutput):
 8 | 
 9 |     def export_raw_data(self, raw_data_list: List[RawData]):
10 |         for it in raw_data_list:
11 |             print(it.to_json_line())
12 |         return
13 | 


--------------------------------------------------------------------------------
/stweet/raw_output/raw_data_output.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import List
 3 | 
 4 | from ..model.raw_data import RawData
 5 | 
 6 | 
 7 | class RawDataOutput(ABC):
 8 | 
 9 |     @abstractmethod
10 |     def export_raw_data(self, raw_data_list: List[RawData]):
11 |         pass
12 | 


--------------------------------------------------------------------------------
/stweet/search_runner/__init__.py:
--------------------------------------------------------------------------------
1 | from .replies_filter import RepliesFilter
2 | from .search_run_context import SearchRunContext
3 | from .search_runner import TweetSearchRunner
4 | from .search_tweets_result import SearchTweetsResult
5 | from .search_tweets_task import SearchTweetsTask
6 | 


--------------------------------------------------------------------------------
/stweet/search_runner/replies_filter.py:
--------------------------------------------------------------------------------
 1 | """Domain RepliesFilter enum class."""
 2 | 
 3 | import enum
 4 | 
 5 | 
 6 | class RepliesFilter(enum.Enum):
 7 |     """Domain RepliesFilter enum class."""
 8 | 
 9 |     ONLY_REPLIES = 1
10 |     ONLY_ORIGINAL = 2
11 | 


--------------------------------------------------------------------------------
/stweet/search_runner/search_run_context.py:
--------------------------------------------------------------------------------
 1 | """Domain SearchRunContext class."""
 2 | 
 3 | from dataclasses import dataclass
 4 | from typing import Optional
 5 | 
 6 | from ..model.cursor import Cursor
 7 | 
 8 | 
 9 | @dataclass
10 | class SearchRunContext:
11 |     """Domain SearchRunContext class."""
12 | 
13 |     cursor: Optional[Cursor]
14 |     last_tweets_download_count: int
15 |     all_download_tweets_count: int
16 | 
17 |     def __init__(
18 |             self,
19 |             cursor: [Cursor] = None,
20 |             guest_auth_token: Optional[str] = None,
21 |             last_tweets_download_count: int = 0,
22 |             all_download_tweets: int = 0
23 |     ):
24 |         """Class constructor."""
25 |         self.cursor = cursor
26 |         self.guest_auth_token = guest_auth_token
27 |         self.last_tweets_download_count = last_tweets_download_count
28 |         self.all_download_tweets_count = all_download_tweets
29 |         return
30 | 
31 |     def add_downloaded_tweets_count(self, new_downloaded_tweets_count: int):
32 |         """Method to update downloaded tweets count."""
33 |         self.all_download_tweets_count += new_downloaded_tweets_count
34 |         self.last_tweets_download_count = new_downloaded_tweets_count
35 | 


--------------------------------------------------------------------------------
/stweet/search_runner/search_runner.py:
--------------------------------------------------------------------------------
 1 | """Runner to process task to search tweets."""
 2 | import json
 3 | from typing import List, Optional
 4 | 
 5 | from ..exceptions.scrap_batch_bad_response import ScrapBatchBadResponse
 6 | from ..http_request.request_details import RequestDetails
 7 | from ..http_request.web_client import WebClient
 8 | from ..model.tweet_raw import TweetRaw
 9 | from ..model.user_raw import UserRaw
10 | from ..raw_output.raw_data_output import RawDataOutput
11 | from ..twitter_api.default_twitter_web_client_provider import \
12 |     DefaultTwitterWebClientProvider
13 | from ..twitter_api.twitter_api_requests import TwitterApiRequests
14 | from .search_run_context import SearchRunContext
15 | from .search_tweets_result import SearchTweetsResult
16 | from .search_tweets_task import SearchTweetsTask
17 | from .tweet_raw_parser import get_scroll_cursor, parse_tweets, parse_users
18 | 
19 | 
20 | class TweetSearchRunner:
21 |     """Runner class to process task to search tweets."""
22 | 
23 |     search_run_context: SearchRunContext
24 |     search_tweets_task: SearchTweetsTask
25 |     tweet_raw_data_outputs: List[RawDataOutput]
26 |     user_raw_data_outputs: List[RawDataOutput]
27 |     web_client: WebClient
28 | 
29 |     def __init__(
30 |             self,
31 |             search_tweets_task: SearchTweetsTask,
32 |             tweet_raw_data_outputs: List[RawDataOutput],
33 |             user_raw_data_outputs: List[RawDataOutput],
34 |             search_run_context: Optional[SearchRunContext] = None,
35 |             web_client: Optional[WebClient] = None
36 |     ):
37 |         """Constructor to create object."""
38 |         self.search_run_context = SearchRunContext() if search_run_context is None \
39 |             else search_run_context
40 |         self.search_tweets_task = search_tweets_task
41 |         self.tweet_raw_data_outputs = tweet_raw_data_outputs
42 |         self.user_raw_data_outputs = user_raw_data_outputs
43 |         self.web_client = web_client \
44 |             if web_client is not None \
45 |             else DefaultTwitterWebClientProvider.get_web_client()
46 |         return
47 | 
48 |     def run(self) -> SearchTweetsResult:
49 |         """Main search_runner method."""
50 |         while not self._is_end_of_scrapping():
51 |             self._execute_next_tweets_request()
52 |         return SearchTweetsResult(self.search_run_context.all_download_tweets_count)
53 | 
54 |     def _is_end_of_scrapping(self) -> bool:
55 |         ctx = self.search_run_context
56 |         last_scrap_zero = ctx.last_tweets_download_count == 0
57 |         is_cursor = ctx.cursor is not None
58 |         return (last_scrap_zero and is_cursor) or (not last_scrap_zero and not is_cursor)
59 | 
60 |     def _execute_next_tweets_request(self):
61 |         request_params = self._get_next_request_details()
62 |         response = self.web_client.run_request(request_params)
63 |         if response.is_success():
64 |             tweets = parse_tweets(response.text)
65 |             users = parse_users(response.text)
66 |             cursor = get_scroll_cursor(json.loads(response.text)['timeline']['instructions'])
67 |             self.search_run_context.add_downloaded_tweets_count(len(tweets))
68 |             self.search_run_context.cursor = cursor
69 |             self._process_new_results_to_output(tweets, users)
70 |         else:
71 |             raise ScrapBatchBadResponse(response)
72 |         return
73 | 
74 |     def _get_next_request_details(self) -> RequestDetails:
75 |         return TwitterApiRequests().get_search_tweet_request_details_new_api(
76 |             self.search_run_context.all_download_tweets_count,
77 |             self.search_run_context.cursor,
78 |             self.search_tweets_task.tweets_limit,
79 |             self.search_tweets_task.get_full_search_query()
80 |         )
81 | 
82 |     def _process_new_results_to_output(self, tweets: List[TweetRaw], users: List[UserRaw]):
83 |         for raw_data_output in self.tweet_raw_data_outputs:
84 |             raw_data_output.export_raw_data(tweets)
85 |         for raw_data_output in self.user_raw_data_outputs:
86 |             raw_data_output.export_raw_data(users)
87 |         return
88 | 


--------------------------------------------------------------------------------
/stweet/search_runner/search_tweets_result.py:
--------------------------------------------------------------------------------
 1 | """Class with result of TweetSearchRunner task."""
 2 | 
 3 | from dataclasses import dataclass
 4 | 
 5 | 
 6 | @dataclass
 7 | class SearchTweetsResult:
 8 |     """Class with result of TweetSearchRunner task."""
 9 | 
10 |     downloaded_count: int
11 | 


--------------------------------------------------------------------------------
/stweet/search_runner/search_tweets_task.py:
--------------------------------------------------------------------------------
 1 | """Domain SearchTweetsTask class."""
 2 | 
 3 | from dataclasses import dataclass
 4 | from typing import Optional
 5 | 
 6 | from arrow import Arrow
 7 | 
 8 | from ..model.language import Language
 9 | from .replies_filter import RepliesFilter
10 | 
11 | 
12 | def _format_date(arrow_time: Arrow) -> str:
13 |     return arrow_time.date().isoformat()
14 | 
15 | 
16 | @dataclass(frozen=True)
17 | class SearchTweetsTask:
18 |     """Domain SearchTweetsTask class."""
19 | 
20 |     all_words: Optional[str]
21 |     exact_words: Optional[str]
22 |     any_word: Optional[str]
23 |     from_username: Optional[str]
24 |     to_username: Optional[str]
25 |     since: Optional[Arrow]
26 |     until: Optional[Arrow]
27 |     language: Optional[Language]
28 |     tweets_limit: Optional[int]
29 |     replies_filter: Optional[RepliesFilter]
30 | 
31 |     def __init__(
32 |             self,
33 |             all_words: Optional[str] = None,
34 |             exact_words: Optional[str] = None,
35 |             any_word: Optional[str] = None,
36 |             from_username: Optional[str] = None,
37 |             to_username: Optional[str] = None,
38 |             since: Optional[Arrow] = None,
39 |             until: Optional[Arrow] = None,
40 |             language: Optional[Language] = None,
41 |             tweets_limit: Optional[int] = None,
42 |             replies_filter: Optional[RepliesFilter] = None
43 |     ):
44 |         """Class constructor."""
45 |         object.__setattr__(self, 'all_words', all_words)
46 |         object.__setattr__(self, 'exact_words', exact_words)
47 |         object.__setattr__(self, 'any_word', any_word)
48 |         object.__setattr__(self, 'from_username', from_username)
49 |         object.__setattr__(self, 'to_username', to_username)
50 |         object.__setattr__(self, 'since', since)
51 |         object.__setattr__(self, 'until', until)
52 |         object.__setattr__(self, 'language', language)
53 |         object.__setattr__(self, 'tweets_limit', tweets_limit)
54 |         object.__setattr__(self, 'replies_filter', replies_filter)
55 |         return
56 | 
57 |     def get_full_search_query(self) -> str:
58 |         """Method to return full search query."""
59 |         query = ''
60 |         if self.all_words is not None:
61 |             query += self.all_words
62 |         if self.exact_words is not None:
63 |             query += f' "{self.exact_words}"'
64 |         if self.any_word is not None:
65 |             query += f' ({" OR ".join(self.any_word.split(" "))})'
66 |         if self.language is not None:
67 |             query += f' lang:{self.language.short_value}'
68 |         if self.from_username:
69 |             query += f' from:{self.from_username}'
70 |         if self.since is not None:
71 |             query += f" since:{_format_date(self.since)}"
72 |         if self.until is not None:
73 |             query += f" until:{_format_date(self.until)}"
74 |         if self.to_username:
75 |             query += f" to:{self.to_username}"
76 |         if self.replies_filter is not None:
77 |             if self.replies_filter == RepliesFilter.ONLY_REPLIES:
78 |                 query += " filter:replies"
79 |             elif self.replies_filter == RepliesFilter.ONLY_ORIGINAL:
80 |                 query += " -filter:replies"
81 |         return query
82 | 


--------------------------------------------------------------------------------
/stweet/search_runner/tweet_raw_parser.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import List, Optional
 3 | 
 4 | import arrow
 5 | 
 6 | from ..model.cursor import Cursor
 7 | from ..model.tweet_raw import TweetRaw
 8 | from ..model.user_raw import UserRaw
 9 | 
10 | 
11 | def get_scroll_cursor(instructions: List[any]) -> Optional[Cursor]:
12 |     entries = [
13 |         [entry for entry in instruction['addEntries']['entries']]
14 |         for instruction in instructions if 'addEntries' in instruction
15 |     ]
16 |     entries = [item for sublist in entries for item in sublist]
17 |     replace_entries = [
18 |         instruction['replaceEntry']['entry']
19 |         for instruction in instructions if 'replaceEntry' in instruction
20 |     ]
21 |     entries.extend(replace_entries)
22 |     bottom_entries = [it for it in entries if it['entryId'] == 'cursor-bottom-0']
23 |     bottom_entry = None if len(bottom_entries) == 0 else bottom_entries[0]
24 |     if bottom_entry is not None:
25 |         cursor_raw = bottom_entry['content']['operation']['cursor']
26 |         return Cursor(cursor_raw['cursorType'], cursor_raw['value'])
27 |     else:
28 |         return None
29 | 
30 | 
31 | def parse_users(response: str) -> List[UserRaw]:
32 |     users_dict = json.loads(response)['globalObjects']['users']
33 |     return [
34 |         UserRaw(json.dumps(users_dict[it]), arrow.now())
35 |         for it in users_dict.keys()
36 |     ]
37 | 
38 | 
39 | def parse_tweets(response: str) -> List[TweetRaw]:
40 |     users_dict = json.loads(response)['globalObjects']['tweets']
41 |     return [
42 |         TweetRaw(json.dumps(users_dict[it]), arrow.now())
43 |         for it in users_dict.keys()
44 |     ]
45 | 


--------------------------------------------------------------------------------
/stweet/tweets_by_ids_runner/__init__.py:
--------------------------------------------------------------------------------
1 | from .tweets_by_id_result import TweetsByIdResult
2 | from .tweets_by_id_runner import TweetsByIdRunner
3 | from .tweets_by_id_task import TweetsByIdTask
4 | 


--------------------------------------------------------------------------------
/stweet/tweets_by_ids_runner/tweet_raw_parser.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import List, Union
 3 | 
 4 | import arrow
 5 | 
 6 | from ..model.cursor import Cursor
 7 | from ..model.user_tweet_raw import UserTweetRaw
 8 | 
 9 | 
10 | def _parse_tweets_entry_content(entry_content) -> Union[None, Cursor, UserTweetRaw]:
11 |     if entry_content['entryType'] == 'TimelineTimelineItem':
12 |         item_content = entry_content['itemContent']
13 |         item_content_type = item_content['itemType']
14 |         if item_content_type == 'TimelineTimelineCursor':
15 |             return Cursor(item_content['cursorType'], item_content['value'])
16 |         elif item_content_type == 'TimelineTweet':
17 |             return UserTweetRaw(json.dumps(item_content['tweet_results']['result']), arrow.now())
18 |     elif entry_content['entryType'] == 'TimelineTimelineModule':
19 |         item = entry_content['items'][0]
20 |         if item['item']['itemContent']['itemType'] == 'TimelineTweet':
21 |             return UserTweetRaw(json.dumps(item['item']['itemContent']['tweet_results']['result']),
22 |                                 arrow.now())
23 |     else:
24 |         return None
25 | 
26 | 
27 | def get_all_tweets_from_json(json_str: str) -> List[Union[UserTweetRaw, Cursor]]:
28 |     response_obj = json.loads(json_str)
29 |     instructions = response_obj['data']['threaded_conversation_with_injections']['instructions']
30 |     tweet_instruction = [it for it in instructions if it['type'] == 'TimelineAddEntries'][0]
31 |     entries = tweet_instruction['entries']
32 |     to_return = [_parse_tweets_entry_content(it['content']) for it in entries]
33 |     return [it for it in to_return if it is not None]
34 | 


--------------------------------------------------------------------------------
/stweet/tweets_by_ids_runner/tweets_by_id_context.py:
--------------------------------------------------------------------------------
 1 | """Domain TweetsByIdsContext class."""
 2 | 
 3 | from dataclasses import dataclass
 4 | from typing import Optional
 5 | 
 6 | from ..model.cursor import Cursor
 7 | 
 8 | 
 9 | @dataclass
10 | class TweetsByIdContext:
11 |     """Domain TweetsByIdsContext class."""
12 | 
13 |     all_download_tweets_count: int
14 |     requests_count: int
15 |     cursor: Optional[Cursor]
16 | 
17 |     def __init__(
18 |             self,
19 |             all_download_tweets: int = 0,
20 |             cursor: Optional[Cursor] = None,
21 |             requests_count: int = 0
22 |     ):
23 |         """Class constructor."""
24 |         self.all_download_tweets_count = all_download_tweets
25 |         self.cursor = cursor
26 |         self.requests_count = requests_count
27 |         return
28 | 
29 |     def add_downloaded_tweets_count_in_request(self, new_tweets_count: int):
30 |         """Add download tweet to context counter."""
31 |         self.all_download_tweets_count += new_tweets_count
32 |         self.requests_count += 1
33 | 


--------------------------------------------------------------------------------
/stweet/tweets_by_ids_runner/tweets_by_id_result.py:
--------------------------------------------------------------------------------
 1 | """Class with result of TweetSearchRunner task."""
 2 | 
 3 | from dataclasses import dataclass
 4 | from typing import List
 5 | 
 6 | 
 7 | @dataclass
 8 | class TweetsByIdResult:
 9 |     """Class with result of TweetSearchRunner task."""
10 | 
11 |     downloaded_count: int
12 | 


--------------------------------------------------------------------------------
/stweet/tweets_by_ids_runner/tweets_by_id_runner.py:
--------------------------------------------------------------------------------
 1 | """Runner for get tweets by ids."""
 2 | import json
 3 | from typing import List, Optional
 4 | 
 5 | from ..exceptions import ScrapBatchBadResponse
 6 | from ..http_request import RequestDetails, RequestResponse, WebClient
 7 | from ..model import UserTweetRaw
 8 | from ..model.cursor import Cursor
 9 | from ..raw_output.raw_data_output import RawDataOutput
10 | from ..twitter_api.default_twitter_web_client_provider import \
11 |     DefaultTwitterWebClientProvider
12 | from ..twitter_api.twitter_api_requests import TwitterApiRequests
13 | from .tweet_raw_parser import get_all_tweets_from_json
14 | from .tweets_by_id_context import TweetsByIdContext
15 | from .tweets_by_id_result import TweetsByIdResult
16 | from .tweets_by_id_task import TweetsByIdTask
17 | 
18 | _NOT_FOUND_MESSAGE = '_Missing: No status found with that ID.'
19 | 
20 | 
21 | class TweetsByIdRunner:
22 |     tweets_by_id_context: TweetsByIdContext
23 |     tweets_by_ids_task: TweetsByIdTask
24 |     raw_data_outputs: List[RawDataOutput]
25 |     web_client: WebClient
26 | 
27 |     def __init__(
28 |             self,
29 |             tweets_by_id_task: TweetsByIdTask,
30 |             raw_data_outputs: List[RawDataOutput],
31 |             tweets_by_ids_context: Optional[TweetsByIdContext] = None,
32 |             web_client: Optional[WebClient] = None,
33 |     ):
34 |         self.tweets_by_id_context = TweetsByIdContext() if tweets_by_ids_context is None \
35 |             else tweets_by_ids_context
36 |         self.tweets_by_ids_task = tweets_by_id_task
37 |         self.raw_data_outputs = raw_data_outputs
38 |         self.web_client = web_client if web_client is not None \
39 |             else DefaultTwitterWebClientProvider.get_web_client()
40 |         return
41 | 
42 |     def run(self) -> TweetsByIdResult:
43 |         """Main search_runner method."""
44 |         while not self._is_end_of_scrapping():
45 |             self._execute_next_tweets_request()
46 |         return TweetsByIdResult(self.tweets_by_id_context.all_download_tweets_count)
47 | 
48 |     def _is_end_of_scrapping(self) -> bool:
49 |         ctx = self.tweets_by_id_context
50 |         is_cursor = ctx.cursor is not None
51 |         was_any_call = ctx.requests_count > 0
52 |         return was_any_call and not is_cursor
53 | 
54 |     @staticmethod
55 |     def response_with_not_found(request_response: RequestResponse) -> bool:
56 |         parsed = json.loads(request_response.text)
57 |         if 'errors' not in parsed:
58 |             return False
59 |         errors = parsed['errors']
60 |         filtered_errors = [it for it in errors if _NOT_FOUND_MESSAGE == it['message']]
61 |         return len(filtered_errors) > 0
62 | 
63 |     def _execute_next_tweets_request(self):
64 |         request_params = self._get_next_request_details()
65 |         response = self.web_client.run_request(request_params)
66 |         if response.is_success():
67 |             if self.response_with_not_found(response):
68 |                 self.tweets_by_id_context.add_downloaded_tweets_count_in_request(0)
69 |                 self.tweets_by_id_context.cursor = None
70 |             else:
71 |                 parsed_list = get_all_tweets_from_json(response.text)
72 |                 cursors = [it for it in parsed_list if isinstance(it, Cursor)]
73 |                 cursor = cursors[0] if len(cursors) > 0 else None
74 |                 user_tweet_raw = [it for it in parsed_list if isinstance(it, UserTweetRaw)]
75 |                 self.tweets_by_id_context.add_downloaded_tweets_count_in_request(len(user_tweet_raw))
76 |                 self.tweets_by_id_context.cursor = cursor
77 |                 self._process_new_tweets_to_output(user_tweet_raw)
78 |         else:
79 |             raise ScrapBatchBadResponse(response)
80 |         return
81 | 
82 |     def _process_new_tweets_to_output(self, raw_data_list: List[UserTweetRaw]):
83 |         for raw_output in self.raw_data_outputs:
84 |             raw_output.export_raw_data(raw_data_list)
85 |         return
86 | 
87 |     def _get_next_request_details(self) -> RequestDetails:
88 |         return TwitterApiRequests().get_tweet_request_by_id(
89 |             self.tweets_by_ids_task.tweet_id,
90 |             self.tweets_by_id_context.cursor
91 |         )
92 | 


--------------------------------------------------------------------------------
/stweet/tweets_by_ids_runner/tweets_by_id_task.py:
--------------------------------------------------------------------------------
 1 | """Domain TweetsByIdsTask class."""
 2 | from dataclasses import dataclass
 3 | 
 4 | 
 5 | @dataclass(frozen=True)
 6 | class TweetsByIdTask:
 7 |     """Domain TweetsByIdsTask class."""
 8 | 
 9 |     tweet_id: str
10 | 
11 |     def __init__(
12 |             self,
13 |             tweet_id: str
14 |     ):
15 |         """Class constructor."""
16 |         object.__setattr__(self, 'tweet_id', tweet_id)
17 |         return
18 | 


--------------------------------------------------------------------------------
/stweet/twitter_api/__init__.py:
--------------------------------------------------------------------------------
1 | from .default_twitter_web_client_provider import \
2 |     DefaultTwitterWebClientProvider
3 | 


--------------------------------------------------------------------------------
/stweet/twitter_api/default_twitter_web_client_provider.py:
--------------------------------------------------------------------------------
 1 | """DefaultTwitterWebClientProvider class."""
 2 | from tor_python_easy.tor_control_port_client import TorControlPortClient
 3 | 
 4 | from ..auth import SimpleAuthTokenProvider
 5 | from ..auth.fail_strategy.tor_ip_change_auth_fail_strategy import \
 6 |     TorIpChangeAuthFailStrategy
 7 | from ..http_request import (RequestsWebClient, RequestsWebClientProxyConfig,
 8 |                             WebClient)
 9 | from .twitter_auth_web_client_interceptor import \
10 |     TwitterAuthWebClientInterceptor
11 | 
12 | 
13 | class DefaultTwitterWebClientProvider:
14 | 
15 |     @staticmethod
16 |     def get_web_client() -> WebClient:
17 |         """Method returns default WebClient."""
18 |         return RequestsWebClient(interceptors=[TwitterAuthWebClientInterceptor()])
19 | 
20 |     @staticmethod
21 |     def get_web_client_preconfigured_for_tor_proxy(
22 |             socks_proxy_url: str,
23 |             control_host: str,
24 |             control_port: int,
25 |             control_password: str
26 |     ) -> WebClient:
27 |         tor_control_client = TorControlPortClient(control_host, control_port, control_password)
28 |         fail_strategy = TorIpChangeAuthFailStrategy(tor_control_client)
29 |         auth_token_provider = SimpleAuthTokenProvider(fail_strategy)
30 |         return RequestsWebClient(
31 |             proxy=RequestsWebClientProxyConfig(socks_proxy_url, socks_proxy_url),
32 |             interceptors=[TwitterAuthWebClientInterceptor(auth_token_provider=auth_token_provider)]
33 |         )
34 | 


--------------------------------------------------------------------------------
/stweet/twitter_api/twitter_api_requests.py:
--------------------------------------------------------------------------------
  1 | """Definitions of all api calls."""
  2 | import json
  3 | from typing import Optional
  4 | 
  5 | from ..http_request.http_method import HttpMethod
  6 | from ..http_request.request_details import RequestDetails
  7 | from ..model.cursor import Cursor
  8 | 
  9 | _default_tweets_count_in_batch = 20
 10 | 
 11 | 
 12 | class TwitterApiRequests:
 13 |     """Definitions of all api calls."""
 14 | 
 15 |     timeout: int
 16 | 
 17 |     def __init__(self, timeout: int = 60):
 18 |         """Constructor TwitterApiRequests."""
 19 |         self.timeout = timeout
 20 | 
 21 |     def get_guest_token_request_details(self):
 22 |         """Method return request details to get guest token."""
 23 |         return RequestDetails(
 24 |             HttpMethod.POST,
 25 |             'https://api.twitter.com/1.1/guest/activate.json',
 26 |             dict(),
 27 |             dict(),
 28 |             self.timeout
 29 |         )
 30 | 
 31 |     def get_search_tweet_request_details_new_api(
 32 |             self,
 33 |             all_download_tweets_count: int,
 34 |             cursor: Cursor,
 35 |             tweets_limit: Optional[int],
 36 |             full_search_query: str
 37 |     ) -> RequestDetails:
 38 |         count = _default_tweets_count_in_batch \
 39 |             if tweets_limit is None \
 40 |             else min(_default_tweets_count_in_batch, tweets_limit - all_download_tweets_count)
 41 |         params = dict([
 42 |             ('include_profile_interstitial_type', '1'),
 43 |             ('include_blocking', '1'),
 44 |             ('include_blocked_by', '1'),
 45 |             ('include_followed_by', '1'),
 46 |             ('include_want_retweets', '1'),
 47 |             ('include_mute_edge', '1'),
 48 |             ('include_can_dm', '1'),
 49 |             ('include_can_media_tag', '1'),
 50 |             ('skip_status', '1'),
 51 |             ('cards_platform', 'Web-12'),
 52 |             ('include_cards', '1'),
 53 |             ('include_ext_alt_text', 'true'),
 54 |             ('include_quote_count', 'true'),
 55 |             ('include_reply_count', '1'),
 56 |             ('tweet_mode', 'extended'),
 57 |             ('include_entities', 'true'),
 58 |             ('include_user_entities', 'true'),
 59 |             ('include_ext_media_color', 'true'),
 60 |             ('include_ext_media_availability', 'true'),
 61 |             ('send_error_codes', 'true'),
 62 |             ('simple_quoted_tweet', 'true'),
 63 |             ('q', full_search_query),
 64 |             ('count', count),
 65 |             ('query_source', 'typed_query'),
 66 |             ('pc', '1'),
 67 |             ('spelling_corrections', '1'),
 68 |             ('ext', 'mediaStats,highlightedLabel,voiceInfo')
 69 |         ])
 70 |         if cursor is not None:
 71 |             params['cursor'] = cursor.value
 72 |         return RequestDetails(
 73 |             HttpMethod.GET,
 74 |             url='https://twitter.com/i/api/2/search/adaptive.json',
 75 |             headers=dict(),
 76 |             params=params,
 77 |             timeout=self.timeout
 78 |         )
 79 | 
 80 |     def get_user_details_request_details(self, user_screen_name: str) -> RequestDetails:
 81 |         variable_query = {
 82 |             "screen_name": user_screen_name,
 83 |             "withSafetyModeUserFields": True,
 84 |             "withSuperFollowsUserFields": True
 85 |         }
 86 |         _graphql_token = 'cYsDlVss-qimNYmNlb6inw'  # token generated for ony request in browser
 87 |         return RequestDetails(
 88 |             http_method=HttpMethod.GET,
 89 |             url=f'https://twitter.com/i/api/graphql/{_graphql_token}/UserByScreenName',
 90 |             headers=dict(),
 91 |             params=dict({
 92 |                 'variables': json.dumps(variable_query)
 93 |             }),
 94 |             timeout=self.timeout
 95 |         )
 96 | 
 97 |     def get_tweet_request_by_id(self, tweet_id: str, cursor: Optional[Cursor]) -> RequestDetails:
 98 |         variable_query = {
 99 |             "focalTweetId": tweet_id,
100 |             "with_rux_injections": True,
101 |             "includePromotedContent": True,
102 |             "withCommunity": True,
103 |             "withTweetQuoteCount": True,
104 |             "withBirdwatchNotes": True,
105 |             "withSuperFollowsUserFields": True,
106 |             "withUserResults": True,
107 |             "withBirdwatchPivots": True,
108 |             "withReactionsMetadata": True,
109 |             "withReactionsPerspective": True,
110 |             "withSuperFollowsTweetFields": True,
111 |             "withVoice": True
112 |         }
113 |         if cursor is not None:
114 |             variable_query['cursor'] = cursor.value
115 |         _graphql_token = 'kUnCMgMYZCR8GyRZz76IQg'
116 |         return RequestDetails(
117 |             http_method=HttpMethod.GET,
118 |             url=f'https://twitter.com/i/api/graphql/{_graphql_token}/TweetDetail',
119 |             headers=dict(),
120 |             params=dict({
121 |                 'variables': json.dumps(variable_query)
122 |             }),
123 |             timeout=self.timeout
124 |         )
125 | 


--------------------------------------------------------------------------------
/stweet/twitter_api/twitter_auth_web_client_interceptor.py:
--------------------------------------------------------------------------------
 1 | """Class of TwitterAuthWebClientInterceptor."""
 2 | from threading import Lock
 3 | from typing import List, Optional
 4 | 
 5 | from stweet.auth import AuthTokenProvider, SimpleAuthTokenProvider
 6 | from stweet.exceptions.too_many_requests_exception import \
 7 |     TooManyRequestsException
 8 | from stweet.http_request import (RequestDetails, RequestResponse,
 9 |                                  RequestsWebClient, WebClient)
10 | 
11 | _AUTH_TOKEN = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4p' \
12 |               'uTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
13 | _MAX_TRIES = 5
14 | 
15 | 
16 | class TwitterAuthWebClientInterceptor(WebClient.WebClientInterceptor):
17 |     """Class of TwitterAuthWebClientInterceptor.
18 | 
19 |     Interceptor allows to simple manage auth requests.
20 |     """
21 | 
22 |     _current_token: Optional[str]
23 |     _auth_token_provider: AuthTokenProvider
24 |     _quest_token_lock: Lock
25 | 
26 |     def __init__(
27 |             self,
28 |             init_auth_token: Optional[str] = None,
29 |             auth_token_provider: Optional[AuthTokenProvider] = None
30 |     ):
31 |         """Constructor of AuthWebClientInterceptor."""
32 |         self._current_token = init_auth_token
33 |         self._auth_token_provider = auth_token_provider \
34 |             if auth_token_provider is not None \
35 |             else SimpleAuthTokenProvider()
36 |         self._quest_token_lock = Lock()
37 | 
38 |     def _add_auth_token(self, request_details: RequestDetails):
39 |         request_details.headers['Authorization'] = _AUTH_TOKEN
40 | 
41 |     def _add_guest_token(self, request_details: RequestDetails, web_client: WebClient):
42 |         if self._current_token is None:
43 |             self._call_for_new_auth_request(web_client)
44 |         request_details.headers['x-guest-token'] = self._current_token
45 | 
46 |     def _is_auth_token_to_add(self, request_details: RequestDetails) -> bool:
47 |         return 'http://api.twitter.com' in request_details.url \
48 |                or 'https://api.twitter.com' in request_details.url \
49 |                or 'https://twitter.com/i/api' in request_details.url
50 | 
51 |     def _is_guest_token_to_add(self, request_details: RequestDetails) -> bool:
52 |         if 'https://twitter.com/i/api/graphql/' in request_details.url:
53 |             return True
54 |         is_guest_request = '/1.1/guest/activate.json' in request_details.url
55 |         return self._is_auth_token_to_add(request_details) and not is_guest_request
56 | 
57 |     def _call_for_new_auth_request(self, web_client: WebClient):
58 |         old_token = self._current_token
59 |         with self._quest_token_lock:
60 |             if old_token == self._current_token:
61 |                 self._current_token = self._auth_token_provider.get_new_token(web_client)
62 | 
63 |     def intercept(
64 |             self,
65 |             requests_details: RequestDetails,
66 |             next_interceptors: List[WebClient.WebClientInterceptor],
67 |             web_client: RequestsWebClient
68 |     ) -> RequestResponse:
69 |         """Method intercepts request. It manage with auth headers."""
70 |         need_guest_token = self._is_guest_token_to_add(requests_details)
71 |         if self._is_auth_token_to_add(requests_details):
72 |             self._add_auth_token(requests_details)
73 | 
74 |         if need_guest_token:
75 |             self._add_guest_token(requests_details, web_client)
76 | 
77 |         response: Optional[RequestResponse] = None
78 |         tries_counter = 0
79 | 
80 |         while tries_counter < _MAX_TRIES and (response is None or response.is_429()):
81 |             if need_guest_token and response is not None:
82 |                 self._call_for_new_auth_request(web_client)
83 |                 self._add_guest_token(requests_details, web_client)
84 |             response = self.get_response(requests_details, next_interceptors, web_client)
85 |             tries_counter = tries_counter + 1
86 | 
87 |         if response.is_429():
88 |             raise TooManyRequestsException(requests_details.url)
89 | 
90 |         return response
91 | 


--------------------------------------------------------------------------------
/test-services-docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.8"
2 | services:
3 |   squid_proxy:
4 |     image: docker.io/salrashid123/squidproxy
5 |     command: /apps/squid/sbin/squid -NsY -f /apps/squid.conf.forward
6 |     ports:
7 |       - "3128:3128"
8 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markowanga/stweet/fe34e98254dc7646bde6e083b5f6f745a0ee8cb6/tests/__init__.py


--------------------------------------------------------------------------------
/tests/integration/all_languages_test.py:
--------------------------------------------------------------------------------
 1 | import stweet as st
 2 | from tests.test_util import tweet_list_assert_condition
 3 | 
 4 | 
 5 | def run_test_for_single_language(language: st.Language):
 6 |     search_tweets_task = st.SearchTweetsTask(
 7 |         all_words='#covid19',
 8 |         tweets_limit=10,
 9 |         language=language
10 |     )
11 |     tweets_collector = st.CollectorTweetOutput()
12 |     st.TweetSearchRunner(
13 |         search_tweets_task=search_tweets_task,
14 |         tweet_outputs=[tweets_collector]
15 |     ).run()
16 |     tweet_list_assert_condition(
17 |         tweets_collector.get_raw_list(),
18 |         lambda tweet: tweet.lang in language.short_value
19 |     )
20 | 
21 | 
22 | def test_search_in_all_languages():
23 |     for language in st.Language:
24 |         run_test_for_single_language(language)
25 | 


--------------------------------------------------------------------------------
/tests/integration/exception_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import stweet as st
 4 | from stweet import WebClient
 5 | from stweet.auth import SimpleAuthTokenProvider
 6 | from stweet.exceptions import RefreshTokenException, ScrapBatchBadResponse
 7 | from stweet.exceptions.too_many_requests_exception import TooManyRequestsException
 8 | from stweet.http_request import RequestResponse
 9 | from stweet.twitter_api.twitter_auth_web_client_interceptor import TwitterAuthWebClientInterceptor
10 | from tests.mock_web_client import MockWebClient
11 | 
12 | 
13 | def get_client_with_default_response(response: RequestResponse = RequestResponse(None, None)) -> WebClient:
14 |     return MockWebClient(
15 |         default_response=response,
16 |         interceptors=[TwitterAuthWebClientInterceptor()]
17 |     )
18 | 
19 | 
20 | def test_get_simple_auth_token_with_incorrect_response_1():
21 |     with pytest.raises(RefreshTokenException):
22 |         SimpleAuthTokenProvider().get_new_token(get_client_with_default_response(RequestResponse(400, None)))
23 | 
24 | 
25 | def test_get_auth_token_with_incorrect_response_2():
26 |     with pytest.raises(TooManyRequestsException):
27 |         SimpleAuthTokenProvider(50, 150).get_new_token(get_client_with_default_response(RequestResponse(429, None)))
28 | 
29 | 
30 | def test_get_auth_token_with_incorrect_response_3():
31 |     with pytest.raises(RefreshTokenException):
32 |         SimpleAuthTokenProvider().get_new_token(get_client_with_default_response(RequestResponse(200, '{}')))
33 | 
34 | 
35 | def test_get_auth_token_with_incorrect_response_4():
36 |     with pytest.raises(RefreshTokenException):
37 |         SimpleAuthTokenProvider().get_new_token(get_client_with_default_response(RequestResponse(200, 'LALA')))
38 | 
39 | 
40 | def test_runner_exceptions():
41 |     class TokenExpiryExceptionWebClient(st.WebClient):
42 | 
43 |         count_dict = dict({
44 |             'https://api.twitter.com/2/search/adaptive.json': 0,
45 |             'https://api.twitter.com/1.1/guest/activate.json': 0
46 |         })
47 | 
48 |         def run_clear_request(self, params: st.http_request.RequestDetails) -> st.http_request.RequestResponse:
49 |             self.count_dict[params.url] = self.count_dict[params.url] + 1
50 |             if params.url == 'https://api.twitter.com/2/search/adaptive.json':
51 |                 if self.count_dict[params.url] == 1:
52 |                     return st.http_request.RequestResponse(429, None)
53 |                 else:
54 |                     return st.http_request.RequestResponse(400, '')
55 |             else:
56 |                 return st.http_request.RequestResponse(200, '{"guest_token":"1350356785648062465"}')
57 | 
58 |     with pytest.raises(ScrapBatchBadResponse):
59 |         search_tweets_task = st.SearchTweetsTask(
60 |             all_words='#koronawirus'
61 |         )
62 |         st.TweetSearchRunner(
63 |             search_tweets_task=search_tweets_task,
64 |             tweet_outputs=[],
65 |             web_client=TokenExpiryExceptionWebClient(interceptors=[TwitterAuthWebClientInterceptor()]),
66 |         ).run()
67 | 
68 | 
69 | def test_get_not_existing_user():
70 |     task = st.GetUsersTask(['fcbewkjdsncvjwkfs'])
71 |     result = st.GetUsersRunner(task, []).run()
72 |     assert result.users_count == 0
73 | 


--------------------------------------------------------------------------------
/tests/integration/export_import_test.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import pytest
 4 | 
 5 | import stweet as st
 6 | from tests.test_util import get_temp_test_file_name, two_lists_assert_equal
 7 | 
 8 | # pytest.fixture(autouse=True)(run_around_tests)
 9 | 
10 | 
11 | def get_tweets() -> List[st.UserTweetRaw]:
12 |     collect_tweet_output = st.CollectorTweetOutput()
13 |     task = st.SearchTweetsTask(all_words="#covid19", tweets_limit=100)
14 |     st.TweetSearchRunner(task, [collect_tweet_output]).run()
15 |     return collect_tweet_output.get_raw_list()
16 | 
17 | 
18 | def get_users() -> List[st.User]:
19 |     collect_user_output = st.CollectorUserOutput()
20 |     task = st.GetUsersTask(list(set([tweet.user_name for tweet in get_tweets()]))[:10])
21 |     st.GetUsersRunner(task, [collect_user_output]).run()
22 |     return collect_user_output.get_scrapped_users()
23 | 
24 | 
25 | def test_tweet_json_lines_serialization():
26 |     jl_filename = get_temp_test_file_name('jl')
27 |     tweets = get_tweets()
28 |     st.export_tweets_to_json_lines(tweets, jl_filename)
29 |     imported_tweets = st.read_tweets_from_json_lines_file(jl_filename)
30 |     two_lists_assert_equal(imported_tweets, tweets)
31 | 
32 | 
33 | def test_tweet_csv_serialization():
34 |     csv_filename = get_temp_test_file_name('csv')
35 |     tweets = get_tweets()
36 |     st.export_tweets_to_csv(tweets, csv_filename)
37 |     imported_tweets = st.read_tweets_from_csv_file(csv_filename)
38 |     two_lists_assert_equal(imported_tweets, tweets)
39 | 
40 | 
41 | def test_user_json_lines_serialization():
42 |     jl_filename = get_temp_test_file_name('jl')
43 |     users = get_users()
44 |     st.export_users_to_json_lines(users, jl_filename)
45 |     imported_tweets = st.read_users_from_json_lines_file(jl_filename)
46 |     two_lists_assert_equal(imported_tweets, users)
47 | 
48 | 
49 | def test_user_csv_serialization():
50 |     csv_filename = get_temp_test_file_name('csv')
51 |     users = get_users()
52 |     st.export_users_to_csv(users, csv_filename)
53 |     imported_tweets = st.read_users_from_csv_file(csv_filename)
54 |     two_lists_assert_equal(users, imported_tweets)
55 | 


--------------------------------------------------------------------------------
/tests/integration/get_tweet_by_id_test.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | import stweet as st
 4 | from stweet import RequestsWebClient
 5 | from stweet.http_request import RequestDetails, RequestResponse
 6 | from stweet.http_request.interceptor.logging_requests_web_client_interceptor import \
 7 |     LoggingRequestsWebClientInterceptor
 8 | from stweet.twitter_api.twitter_auth_web_client_interceptor import TwitterAuthWebClientInterceptor
 9 | 
10 | _TWITTER_JSON_NO_TWEETS = '{"globalObjects":{"tweets":{},"users":{},"moments":{},"cards":{},"places":{}' \
11 |                           ',"media":{},"broadcasts":{},"topics":{},"lists":{}},"timeline":{"id":"search' \
12 |                           '-6749090958448035293","instructions":[{"addEntries":{"entries":[{"entryId":"' \
13 |                           'sq-cursor-top","sortIndex":"999999999","content":{"operation":{"cursor":{"va' \
14 |                           'lue":"refresh:thGAVUV0VFVBYBFgESNQAVACUAERXsiHoVgIl6GAdERUZBVUxUFQAVABUBFQAV' \
15 |                           'AAA=","cursorType":"Top"}}}},{"entryId":"sq-cursor-bottom","sortIndex":"0","' \
16 |                           'content":{"operation":{"cursor":{"value":"scroll:thGAVUV0VFVBYBFgESNQAVACUAE' \
17 |                           'RXsiHoVgIl6GAdERUZBVUxUFQAVABUBFQAVAAA=","cursorType":"Bottom"}}}}]}}]}}'
18 | 
19 | 
20 | class CustomAdapter(RequestsWebClient):
21 | 
22 |     def __init__(self, override: List[Tuple[str, RequestResponse]]):
23 |         super().__init__()
24 |         self.override = override
25 | 
26 |     def run_request(self, params: RequestDetails) -> RequestResponse:
27 |         filtered = [it for it in self.override if it[0] == params.url]
28 |         if len(filtered) > 0:
29 |             return filtered[0][1]
30 |         else:
31 |             return super().run_request(params)
32 | 
33 | 
34 | def test_get_tweets_by_ids():
35 |     tweets_ids = ['1337071849772093442', '1337067073051238400']
36 |     task = st.TweetsByIdTask(tweets_ids)
37 |     collect_output = st.CollectorTweetOutput()
38 |     result = st.TweetsByIdRunner(task, [collect_output],
39 |                                  web_client=RequestsWebClient(
40 |                                       interceptors=[LoggingRequestsWebClientInterceptor(),
41 |                                                     TwitterAuthWebClientInterceptor()])).run()
42 |     scrapped_tweets_ids = [it.id_str for it in collect_output.get_raw_list()]
43 |     assert result.downloaded_count == 1
44 |     assert len(collect_output.get_raw_list()) == 1
45 |     assert len(result.tweet_ids_not_scrapped) == 1
46 | 
47 | 
48 | def test_get_not_existing_tweet():
49 |     tweets_ids = ['1337071849772093442']
50 |     task = st.TweetsByIdTask(tweets_ids)
51 |     collect_output = st.CollectorTweetOutput()
52 |     result = st.TweetsByIdRunner(
53 |         task,
54 |         [collect_output],
55 |         web_client=CustomAdapter(
56 |             [('https://cdn.syndication.twimg.com/tweet', RequestResponse(404, ''))])
57 |     ).run()
58 |     assert result.downloaded_count == 0
59 |     assert len(result.tweet_ids_not_scrapped) == 1
60 | 
61 | 
62 | def test_get_not_existing_tweet_in_twitter():
63 |     tweets_ids = ['1337071849772093442']
64 |     task = st.TweetsByIdTask(tweets_ids)
65 |     collect_output = st.CollectorTweetOutput()
66 |     result = st.TweetsByIdRunner(
67 |         task,
68 |         [collect_output],
69 |         web_client=CustomAdapter(
70 |             [('https://api.twitter.com/2/search/adaptive.json',
71 |               RequestResponse(200, _TWITTER_JSON_NO_TWEETS))]
72 |         )
73 |     ).run()
74 |     assert result.downloaded_count == 0
75 |     assert len(result.tweet_ids_not_scrapped) == 1
76 | 
77 | 
78 | test_get_tweets_by_ids()
79 | 


--------------------------------------------------------------------------------
/tests/integration/get_user_test.py:
--------------------------------------------------------------------------------
 1 | import stweet as st
 2 | 
 3 | _usernames = ['ProtasiewiczJ', 'donaldtuskEPP']
 4 | 
 5 | 
 6 | def test_get_user():
 7 |     task = st.GetUsersTask(_usernames)
 8 |     task_result = st.GetUsersRunner(task, [st.PrintUserOutput()]).run()
 9 |     assert len(_usernames) == task_result.users_count
10 | 


--------------------------------------------------------------------------------
/tests/integration/import_older_version_test.py:
--------------------------------------------------------------------------------
 1 | import stweet as st
 2 | 
 3 | _RESOURCES_PATH = 'tests/resources'
 4 | 
 5 | 
 6 | def test_tweets_csv_import_v1_1_2():
 7 |     csv_filename = f'{_RESOURCES_PATH}/tweets_v1.1.2.csv'
 8 |     tweets_from_csv = st.read_tweets_from_csv_file(csv_filename)
 9 |     assert len(tweets_from_csv) == 9
10 | 
11 | 
12 | def test_tweets_json_import_v1_1_2():
13 |     jl_filename = f'{_RESOURCES_PATH}/tweets_v1.1.2.jl'
14 |     tweets_from_csv = st.read_tweets_from_json_lines_file(jl_filename)
15 |     assert len(tweets_from_csv) == 9
16 | 
17 | 
18 | def test_user_csv_import_v1_3_0():
19 |     csv_filename = f'{_RESOURCES_PATH}/users_v1.3.0.csv'
20 |     users = st.read_users_from_csv_file(csv_filename)
21 |     assert len(users) == 2
22 | 
23 | 
24 | def test_user_json_import_v1_3_0():
25 |     jl_filename = f'{_RESOURCES_PATH}/users_v1.3.0.jl'
26 |     users = st.read_users_from_json_lines_file(jl_filename)
27 |     assert len(users) == 2
28 | 


--------------------------------------------------------------------------------
/tests/integration/interceptor_test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from io import StringIO
 3 | 
 4 | import stweet as st
 5 | from stweet.http_request import HttpMethod
 6 | from stweet.http_request.interceptor.logging_requests_web_client_interceptor import LoggingRequestsWebClientInterceptor
 7 | from stweet.http_request.interceptor.params_response_log_web_client_interceptor import \
 8 |     ParamsResponseLogWebClientInterceptor
 9 | from stweet.twitter_api.twitter_api_requests import TwitterApiRequests
10 | 
11 | 
12 | def get_example_request_details() -> st.http_request.RequestDetails:
13 |     return st.http_request.RequestDetails(
14 |         http_method=HttpMethod.GET,
15 |         url='https://api.github.com/events',
16 |         params=dict({}),
17 |         headers=dict({}),
18 |         timeout=200
19 |     )
20 | 
21 | 
22 | def start_redirect_output() -> StringIO:
23 |     captured_output = StringIO()
24 |     sys.stdout = captured_output
25 |     sys.stderr = captured_output
26 |     return captured_output
27 | 
28 | 
29 | def stop_redirect_output():
30 |     sys.stdout = sys.__stdout__
31 |     sys.stderr = sys.__stderr__
32 | 
33 | 
34 | def test_logging_requests_web_client_interceptor():
35 |     captured_output = start_redirect_output()
36 |     request = TwitterApiRequests().get_guest_token_request_details()
37 |     st.RequestsWebClient(interceptors=[LoggingRequestsWebClientInterceptor()]).run_request(request)
38 |     stop_redirect_output()
39 |     content = captured_output.getvalue()
40 |     assert "send: b'POST /1.1/guest/activate.json HTTP/1.1" in content
41 | 
42 | 
43 | def test_params_response_log_web_client_interceptor():
44 |     captured_output = start_redirect_output()
45 |     st.RequestsWebClient(interceptors=[ParamsResponseLogWebClientInterceptor()]).run_request(
46 |         TwitterApiRequests().get_guest_token_request_details())
47 |     stop_redirect_output()
48 |     content = captured_output.getvalue()
49 |     assert "RequestDetails(" in content
50 |     assert "RequestResponse(" in content
51 | 


--------------------------------------------------------------------------------
/tests/integration/large_iterator_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import stweet as st
 4 | from tests.test_util import get_temp_test_file_name, two_lists_assert_equal, get_tweets_to_tweet_output_test
 5 | from tests.test_util import get_users_to_tweet_output_test
 6 | 
 7 | 
 8 | def test_user_json_lines_read_iterator():
 9 |     file_name = get_temp_test_file_name('jl')
10 |     collector = st.CollectorUserOutput()
11 |     get_users_to_tweet_output_test([collector, st.JsonLineFileUserOutput(file_name)])
12 |     iterator = st.UserJsonLineFileIterator(file_name, 2)
13 |     list_from_iterator = []
14 |     iterator.open()
15 |     while True:
16 |         try:
17 |             list_from_iterator.extend(next(iterator))
18 |         except StopIteration:
19 |             break
20 |     iterator.close()
21 |     two_lists_assert_equal(list_from_iterator, collector.get_scrapped_users())
22 | 
23 | 
24 | def test_user_csv_read_iterator():
25 |     file_name = get_temp_test_file_name('csv')
26 |     collector = st.CollectorUserOutput()
27 |     get_users_to_tweet_output_test([collector, st.CsvUserOutput(file_name)])
28 |     iterator = st.UserCsvFileIterator(file_name, 4)
29 |     list_from_iterator = []
30 |     iterator.open()
31 |     while True:
32 |         try:
33 |             list_from_iterator.extend(next(iterator))
34 |         except StopIteration:
35 |             break
36 |     two_lists_assert_equal(list_from_iterator, collector.get_scrapped_users())
37 | 
38 | 
39 | def test_tweet_json_lines_read_iterator():
40 |     file_name = get_temp_test_file_name('jl')
41 |     collector = st.CollectorTweetOutput()
42 |     get_tweets_to_tweet_output_test([collector, st.JsonLineFileTweetOutput(file_name)])
43 |     iterator = st.TweetJsonLineFileIterator(file_name, 4)
44 |     list_from_iterator = []
45 |     iterator.open()
46 |     while True:
47 |         try:
48 |             list_from_iterator.extend(next(iterator))
49 |         except StopIteration:
50 |             break
51 |     iterator.close()
52 |     two_lists_assert_equal(list_from_iterator, collector.get_raw_list())
53 | 
54 | 
55 | def test_tweet_csv_read_iterator():
56 |     file_name = get_temp_test_file_name('csv')
57 |     collector = st.CollectorTweetOutput()
58 |     get_tweets_to_tweet_output_test([collector, st.CsvTweetOutput(file_name)])
59 |     iterator = st.TweetCsvFileIterator(file_name, 4)
60 |     list_from_iterator = []
61 |     iterator.open()
62 |     while True:
63 |         try:
64 |             list_from_iterator.extend(next(iterator))
65 |         except StopIteration:
66 |             break
67 |     two_lists_assert_equal(list_from_iterator, collector.get_raw_list())
68 | 


--------------------------------------------------------------------------------
/tests/integration/parse_media_test.py:
--------------------------------------------------------------------------------
 1 | import stweet as st
 2 | 
 3 | 
 4 | def test_scrap_tweet_with_single_media():
 5 |     tweets_ids = ['1357358278746005508']
 6 |     collector = st.CollectorTweetOutput()
 7 |     st.TweetsByIdRunner(st.TweetsByIdTask(tweets_ids), [collector]).run()
 8 |     tweets = collector.get_raw_list()
 9 |     assert len(tweets) == 1
10 |     assert len(tweets[0].media) == 1
11 | 
12 | 
13 | def test_scrap_tweet_with_double_media():
14 |     tweets_ids = ['1115978039534297088']
15 |     collector = st.CollectorTweetOutput()
16 |     st.TweetsByIdRunner(st.TweetsByIdTask(tweets_ids), [collector]).run()
17 |     tweets = collector.get_raw_list()
18 |     assert len(tweets) == 1
19 |     assert len(tweets[0].media) == 2
20 | 


--------------------------------------------------------------------------------
/tests/integration/print_test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from io import StringIO
 3 | 
 4 | import stweet as st
 5 | from tests.test_util import get_tweets_to_tweet_output_test, get_users_to_tweet_output_test
 6 | from tests.tweet_output_export_call_counter import TweetOutputExportCallCounter
 7 | from tests.tweet_output_tweets_counter import TweetOutputTweetsCounter
 8 | 
 9 | 
10 | def test_print_all_tweet_output():
11 |     captured_output = StringIO()
12 |     sys.stdout = captured_output
13 |     tweets_collector = st.CollectorTweetOutput()
14 |     get_tweets_to_tweet_output_test([
15 |         st.PrintTweetOutput(),
16 |         tweets_collector
17 |     ])
18 |     sys.stdout = sys.__stdout__
19 |     assert captured_output.getvalue().count('Tweet(') == len(tweets_collector.get_raw_list())
20 | 
21 | 
22 | def test_print_all_user_output():
23 |     captured_output = StringIO()
24 |     sys.stdout = captured_output
25 |     users_collector = st.CollectorUserOutput()
26 |     get_users_to_tweet_output_test([
27 |         st.PrintUserOutput(),
28 |         users_collector
29 |     ])
30 |     sys.stdout = sys.__stdout__
31 |     assert captured_output.getvalue().count('User(') == len(users_collector.get_scrapped_users())
32 | 
33 | 
34 | def test_print_batch_single_tweet_tweet_output():
35 |     captured_output = StringIO()
36 |     sys.stdout = captured_output
37 |     tweet_output_counter = TweetOutputExportCallCounter()
38 |     get_tweets_to_tweet_output_test([
39 |         st.PrintFirstInRequestTweetOutput(),
40 |         tweet_output_counter
41 |     ])
42 |     sys.stdout = sys.__stdout__
43 |     print_tweet_count = captured_output.getvalue().count('Tweet(')
44 |     print_no_tweets_line = captured_output.getvalue().count('PrintFirstInRequestTweetOutput -- no tweets to print')
45 |     assert (print_tweet_count + print_no_tweets_line) == tweet_output_counter.get_output_call_count()
46 | 
47 | 
48 | def test_print_each_n_tweet_tweet_output():
49 |     captured_output = StringIO()
50 |     each_n = 7
51 |     sys.stdout = captured_output
52 |     tweet_output_counter = TweetOutputTweetsCounter()
53 |     get_tweets_to_tweet_output_test([
54 |         st.PrintEveryNTweetOutput(each_n),
55 |         tweet_output_counter
56 |     ])
57 |     sys.stdout = sys.__stdout__
58 |     print_tweet_count = captured_output.getvalue().count('Tweet(')
59 |     assert print_tweet_count == int(tweet_output_counter.get_output_call_count() / each_n)
60 | 
61 | 
62 | def test_print_each_n_tweet_user_output():
63 |     captured_output = StringIO()
64 |     each_n = 2
65 |     sys.stdout = captured_output
66 |     scrap_result = get_users_to_tweet_output_test([st.PrintEveryNUserOutput(each_n)])
67 |     sys.stdout = sys.__stdout__
68 |     print_tweet_count = captured_output.getvalue().count('User(')
69 |     assert print_tweet_count == int(scrap_result.users_count / each_n)
70 | 


--------------------------------------------------------------------------------
/tests/integration/proxy_client_requests_test.py:
--------------------------------------------------------------------------------
 1 | import stweet as st
 2 | from stweet.twitter_api.twitter_auth_web_client_interceptor import TwitterAuthWebClientInterceptor
 3 | 
 4 | 
 5 | def test_using_proxy_client():
 6 |     task = st.SearchTweetsTask(
 7 |         all_words='#covid19',
 8 |         tweets_limit=200
 9 |     )
10 |     proxy_client = st.RequestsWebClient(
11 |         proxy=st.RequestsWebClientProxyConfig(
12 |             http_proxy='http://localhost:3128',
13 |             https_proxy='http://localhost:3128'
14 |         ),
15 |         interceptors=[TwitterAuthWebClientInterceptor()]
16 |     )
17 |     tweets_collector = st.CollectorTweetOutput()
18 |     result = st.TweetSearchRunner(
19 |         search_tweets_task=task,
20 |         tweet_outputs=[tweets_collector],
21 |         web_client=proxy_client
22 |     ).run()
23 |     scrapped_tweets = tweets_collector.get_raw_list()
24 |     assert isinstance(result, st.SearchTweetsResult)
25 |     assert len(scrapped_tweets) == task.tweets_limit
26 | 


--------------------------------------------------------------------------------
/tests/integration/reply_filter_test.py:
--------------------------------------------------------------------------------
 1 | import stweet as st
 2 | from tests.test_util import tweet_list_assert_condition
 3 | 
 4 | 
 5 | def test_search_as_replay():
 6 |     search_tweets_task = st.SearchTweetsTask(
 7 |         all_words='#covid19',
 8 |         tweets_limit=500,
 9 |         replies_filter=st.RepliesFilter.ONLY_REPLIES
10 |     )
11 |     tweets_collector = st.CollectorTweetOutput()
12 |     st.TweetSearchRunner(
13 |         search_tweets_task=search_tweets_task,
14 |         tweet_outputs=[tweets_collector]
15 |     ).run()
16 |     tweet_list_assert_condition(
17 |         tweets_collector.get_raw_list(),
18 |         lambda tweet: len(tweet.in_reply_to_status_id_str + tweet.in_reply_to_user_id_str) > 0
19 |     )
20 | 
21 | 
22 | def test_search_as_not_replay():
23 |     search_tweets_task = st.SearchTweetsTask(
24 |         all_words='#covid19',
25 |         tweets_limit=500,
26 |         replies_filter=st.RepliesFilter.ONLY_ORIGINAL
27 |     )
28 |     tweets_collector = st.CollectorTweetOutput()
29 |     st.TweetSearchRunner(
30 |         search_tweets_task=search_tweets_task,
31 |         tweet_outputs=[tweets_collector]
32 |     ).run()
33 |     tweet_list_assert_condition(
34 |         tweets_collector.get_raw_list(),
35 |         lambda tweet: len(tweet.in_reply_to_status_id_str + tweet.in_reply_to_user_id_str) == 0
36 |     )
37 | 


--------------------------------------------------------------------------------
/tests/integration/search_in_language_test.py:
--------------------------------------------------------------------------------
 1 | import stweet as st
 2 | from tests.test_util import tweet_list_assert_condition
 3 | 
 4 | 
 5 | def _run_search_test_covid_tweets_in_language(language: st.Language):
 6 |     search_tweets_task = st.SearchTweetsTask(
 7 |         all_words='#covid19',
 8 |         tweets_limit=100,
 9 |         language=language
10 |     )
11 |     tweets_collector = st.CollectorTweetOutput()
12 |     st.TweetSearchRunner(
13 |         search_tweets_task=search_tweets_task,
14 |         tweet_outputs=[tweets_collector]
15 |     ).run()
16 |     tweet_list_assert_condition(
17 |         tweets_collector.get_raw_list(),
18 |         lambda tweet: tweet.lang == language.short_value
19 |     )
20 | 
21 | 
22 | def test_search_tweets_in_english():
23 |     _run_search_test_covid_tweets_in_language(st.Language.ENGLISH)
24 | 
25 | 
26 | def test_search_tweets_in_polish():
27 |     _run_search_test_covid_tweets_in_language(st.Language.ENGLISH)
28 | 
29 | 
30 | def test_search_tweets_in_german():
31 |     _run_search_test_covid_tweets_in_language(st.Language.GERMAN)
32 | 


--------------------------------------------------------------------------------
/tests/integration/search_return_objest_test.py:
--------------------------------------------------------------------------------
 1 | import stweet as st
 2 | 
 3 | 
 4 | def test_return_tweets_objects():
 5 |     phrase = '#koronawirus'
 6 |     search_tweets_task = st.SearchTweetsTask(
 7 |         all_words=phrase,
 8 |         tweets_limit=200
 9 |     )
10 |     tweets_collector = st.CollectorTweetOutput()
11 |     result = st.TweetSearchRunner(
12 |         search_tweets_task=search_tweets_task,
13 |         tweet_outputs=[tweets_collector]
14 |     ).run()
15 |     scrapped_tweets = tweets_collector.get_raw_list()
16 |     assert isinstance(result, st.SearchTweetsResult)
17 |     assert result.downloaded_count == len(scrapped_tweets)
18 |     assert result.downloaded_count > 0
19 |     assert all([phrase in it.full_text for it in scrapped_tweets if phrase in it.full_text]) is True
20 | 


--------------------------------------------------------------------------------
/tests/integration/serialization_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import stweet as st
 4 | from tests.test_util import get_temp_test_file_name, get_tweets_to_tweet_output_test, \
 5 |     two_lists_assert_equal
 6 | 
 7 | 
 8 | def test_csv_serialization():
 9 |     csv_filename = get_temp_test_file_name('csv')
10 |     tweets_collector = st.CollectorTweetOutput()
11 |     get_tweets_to_tweet_output_test([
12 |         st.CsvTweetOutput(csv_filename),
13 |         tweets_collector
14 |     ])
15 |     tweets_from_csv = st.read_tweets_from_csv_file(csv_filename)
16 |     two_lists_assert_equal(tweets_from_csv, tweets_collector.get_raw_list())
17 | 
18 | 
19 | def test_file_json_lines_serialization():
20 |     jl_filename = get_temp_test_file_name('jl')
21 |     tweets_collector = st.CollectorTweetOutput()
22 |     get_tweets_to_tweet_output_test([
23 |         st.JsonLineFileTweetOutput(jl_filename),
24 |         tweets_collector
25 |     ])
26 |     tweets_from_jl = st.read_tweets_from_json_lines_file(jl_filename)
27 |     two_lists_assert_equal(tweets_from_jl, tweets_collector.get_raw_list())
28 | 


--------------------------------------------------------------------------------
/tests/integration/time_period_test.py:
--------------------------------------------------------------------------------
 1 | from arrow import Arrow
 2 | 
 3 | import stweet as st
 4 | from tests.test_util import tweet_list_assert_condition
 5 | 
 6 | 
 7 | def _run_test_between_dates(since: Arrow, until: Arrow):
 8 |     search_tweets_task = st.SearchTweetsTask(
 9 |         any_word="#koronawirus #covid19",
10 |         since=since,
11 |         until=until
12 |     )
13 |     tweets_collector = st.CollectorTweetOutput()
14 |     st.TweetSearchRunner(
15 |         search_tweets_task=search_tweets_task,
16 |         tweet_outputs=[tweets_collector]
17 |     ).run()
18 |     tweet_list_assert_condition(
19 |         tweets_collector.get_raw_list(),
20 |         lambda tweet: since <= tweet.created_at <= until
21 |     )
22 | 
23 | 
24 | def test_for_polish_timezone():
25 |     _run_test_between_dates(
26 |         since=Arrow(year=2020, month=6, day=11, hour=7),
27 |         until=Arrow(year=2020, month=6, day=11, hour=8)
28 |     )
29 | 
30 | 
31 | def test_for_utc_timezone():
32 |     tz = 'Europe/Warsaw'
33 |     _run_test_between_dates(
34 |         since=Arrow(year=2020, month=6, day=11, hour=7, tzinfo=tz),
35 |         until=Arrow(year=2020, month=6, day=11, hour=8, tzinfo=tz)
36 |     )
37 | 


--------------------------------------------------------------------------------
/tests/integration/tweets_count_test.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import stweet as st
 4 | 
 5 | 
 6 | def _scrap_tweets_with_count_assert(count: int):
 7 |     phrase = '#covid19'
 8 |     search_tweets_task = st.SearchTweetsTask(
 9 |         all_words=phrase,
10 |         tweets_limit=count
11 |     )
12 |     tweets_collector = st.CollectorTweetOutput()
13 |     st.TweetSearchRunner(
14 |         search_tweets_task=search_tweets_task,
15 |         tweet_outputs=[tweets_collector]
16 |     ).run()
17 |     assert len(tweets_collector.get_raw_list()) == count
18 | 
19 | 
20 | def test_scrap_small_count_of_tweets():
21 |     _scrap_tweets_with_count_assert(10)
22 | 
23 | 
24 | def test_scrap_medium_count_of_tweets():
25 |     _scrap_tweets_with_count_assert(100)
26 | 
27 | 
28 | def test_scrap_big_count_of_tweets():
29 |     _scrap_tweets_with_count_assert(299)
30 | 


--------------------------------------------------------------------------------
/tests/integration/username_search_test.py:
--------------------------------------------------------------------------------
 1 | import stweet as st
 2 | from tests.test_util import to_base_text, tweet_list_assert_condition
 3 | 
 4 | 
 5 | def test_search_to_username():
 6 |     username = 'realDonaldTrump'
 7 |     search_tweets_task = st.SearchTweetsTask(
 8 |         to_username=username,
 9 |         tweets_limit=100
10 |     )
11 |     tweets_collector = st.CollectorTweetOutput()
12 |     st.TweetSearchRunner(
13 |         search_tweets_task=search_tweets_task,
14 |         tweet_outputs=[tweets_collector]
15 |     ).run()
16 |     tweet_list_assert_condition(
17 |         tweets_collector.get_raw_list(),
18 |         lambda tweet: to_base_text(username) in to_base_text(tweet.full_text)
19 |     )
20 | 
21 | 
22 | def test_return_tweets_from_user():
23 |     username = 'realDonaldTrump'
24 |     search_tweets_task = st.SearchTweetsTask(
25 |         from_username=username,
26 |         tweets_limit=100
27 |     )
28 |     tweets_collector = st.CollectorTweetOutput()
29 |     st.TweetSearchRunner(
30 |         search_tweets_task=search_tweets_task,
31 |         tweet_outputs=[tweets_collector]
32 |     ).run()
33 |     tweet_list_assert_condition(
34 |         tweets_collector.get_raw_list(),
35 |         lambda tweet: tweet.user_name == username
36 |     )
37 | 


--------------------------------------------------------------------------------
/tests/integration/word_search_test.py:
--------------------------------------------------------------------------------
 1 | import stweet as st
 2 | from tests.test_util import to_base_text, tweet_list_assert_condition
 3 | 
 4 | 
 5 | def search_by_hashtag():
 6 |     phrase = '#koronawirus'
 7 |     search_tweets_task = st.SearchTweetsTask(
 8 |         all_words=phrase,
 9 |         tweets_limit=200
10 |     )
11 |     tweets_collector = st.CollectorTweetOutput()
12 |     st.TweetSearchRunner(
13 |         search_tweets_task=search_tweets_task,
14 |         tweet_outputs=[tweets_collector]
15 |     ).run()
16 |     scrapped_tweets = tweets_collector.get_raw_list()
17 |     assert all([phrase in it.full_text for it in scrapped_tweets if phrase in it.full_text]) is True
18 | 
19 | 
20 | def test_exact_words():
21 |     exact_phrase = 'duda kaczyński kempa'
22 |     search_tweets_task = st.SearchTweetsTask(
23 |         exact_words=exact_phrase
24 |     )
25 |     tweets_collector = st.CollectorTweetOutput()
26 |     st.TweetSearchRunner(
27 |         search_tweets_task=search_tweets_task,
28 |         tweet_outputs=[tweets_collector]
29 |     ).run()
30 |     tweet_list_assert_condition(
31 |         tweets_collector.get_raw_list(),
32 |         lambda tweet: to_base_text(exact_phrase) in to_base_text(tweet.full_text)
33 |     )
34 | 
35 | 
36 | def contains_any_word(words: str, value: str) -> bool:
37 |     return any([to_base_text(word) in to_base_text(value) for word in words.split()]) is True
38 | 
39 | 
40 | def test_any_word():
41 |     any_phrase = 'kaczynski tusk'
42 |     search_tweets_task = st.SearchTweetsTask(
43 |         any_word=any_phrase,
44 |         tweets_limit=100
45 |     )
46 |     tweets_collector = st.CollectorTweetOutput()
47 |     st.TweetSearchRunner(
48 |         search_tweets_task=search_tweets_task,
49 |         tweet_outputs=[tweets_collector]
50 |     ).run()
51 | 
52 |     tweet_list_assert_condition(
53 |         tweets_collector.get_raw_list(),
54 |         lambda tweet: contains_any_word(any_phrase, tweet.full_text) or contains_any_word(
55 |             any_phrase, tweet.user_full_name) or contains_any_word(any_phrase, tweet.user_name)
56 |     )
57 | 


--------------------------------------------------------------------------------
/tests/mock_web_client.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, List, Dict
 2 | 
 3 | import stweet as st
 4 | from stweet import WebClient
 5 | from stweet.http_request import RequestDetails, RequestResponse
 6 | 
 7 | 
 8 | class MockWebClient(st.WebClient):
 9 |     responses: Optional[Dict[str, RequestResponse]]
10 |     default_response: Optional[RequestResponse]
11 | 
12 |     def __init__(
13 |             self,
14 |             interceptors: Optional[List[WebClient.WebClientInterceptor]] = None,
15 |             default_response: Optional[RequestResponse] = None,
16 |             responses: Optional[Dict[str, RequestResponse]] = None
17 |     ):
18 |         super().__init__(interceptors)
19 |         self.responses = responses
20 |         self.default_response = default_response
21 | 
22 |     def run_clear_request(self, params: RequestDetails) -> RequestResponse:
23 |         if self.responses is not None and params.url in self.responses.keys():
24 |             return self.responses[params.url]
25 |         elif self.default_response is not None:
26 |             return self.default_response
27 |         else:
28 |             raise Exception('no value to return')
29 | 


--------------------------------------------------------------------------------
/tests/resources/tweets_v1.1.2.csv:
--------------------------------------------------------------------------------
 1 | created_at,id_str,conversation_id_str,full_text,lang,favorited,retweeted,retweet_count,favorite_count,reply_count,quote_count,quoted_status_id_str,quoted_status_short_url,quoted_status_expand_url,user_id_str,user_name,user_full_name,user_verified,in_reply_to_status_id_str,in_reply_to_user_id_str,hashtags,mentions,urls
 2 | 2021-02-05T00:54:28+00:00,1357492753215729666,1357492753215729666,"It no longer matters. The decision to accept casualties rather than accede to simple acts of social generosity has been made. Hundreds of thousands dead due to bumbling, both willful and unconscious, has been shrugged off by everyone other than, one presumes, the dead. #COVID19",en,False,False,0,0,0,0,1357490037773004802,,,3231804466,robert__gibbons,Robert Gibbons,False,,,#COVID19,,
 3 | 2021-02-05T00:54:27+00:00,1357492749516292098,1357492749516292098,"Here in our hearts
 4 | Oh the answer is there
 5 | If we only would look there inside them
 6 | We can make it better, we can make it better #WhatIf #WorldCancerDay #COVID19",en,False,False,0,0,0,0,1357489296844992512,,,1007294353818226694,bigdan071288,Daniel,False,,,"#WhatIf , #WorldCancerDay , #COVID19",,
 7 | 2021-02-05T00:54:26+00:00,1357492745514942466,1357492745514942466,"Hoy más que nunca, ciudadanos, ciudadanas y autoridades de la @AlcaldiaMHmx nos unimos con un solo propósito; reducir el numero de contagios por #COVID19. No es tiempo de fiestas ni reuniones, continuamos en #SemáforoRojo 🚦 por contingencia sanitaria.
 8 | 
 9 | #MHelCorazónDeLaCapital https://t.co/VRRcVdXjsx",es,False,False,0,0,0,0,,,,1065021849430618112,MHSUrbanos,Servicios Urbanos,False,,,"#COVID19 , #SemáforoRojo , #MHelCorazónDeLaCapital",AlcaldiaMHmx,
10 | 2021-02-05T00:54:25+00:00,1357492742943764480,1357492742943764480,発見遅れたCOCOA不具合、厚生労働省「実機テストせず」：#朝日新聞デジタル https://t.co/ng0nDNqw2M #新型コロナウイルス #COVID19,ja,False,False,0,0,0,0,,,,113366981,otaka_thursday,おたか 🍥,False,,,"#朝日新聞デジタル , #新型コロナウイルス , #COVID19",,https://t.co/ng0nDNqw2M
11 | 2021-02-05T00:54:14+00:00,1357492697901264897,1357492697901264897,"that moment when your patient decides to (stupidly) go to Turks and Caicos and returns with COVID (omg) and uses up precious resources, time, PPE, &amp; ED personnel to take care of her while exposing a slew of other patients and staff in the ER. #COVID19 #canyounot #COVIDIOT https://t.co/Icy9EydgLg",en,False,False,0,0,0,0,,,,2462950457,paper_canyon,paper canyon,False,,,"#COVID19 , #canyounot , #COVIDIOT",,
12 | 2021-02-05T00:54:09+00:00,1357492674731913216,1357492674731913216,“hasta un 40% de las personas que fueron internadas por complicaciones asociadas al Covid-19 tuvieron secuelas en la función de sus pulmones”. Hay que mantener los cuidados y vacunarse cuando nos toque #COVID19,es,False,False,0,0,0,0,1357358481939050496,,,139287395,jota_leonr,José Julio León,False,,,#COVID19,,
13 | 2021-02-05T00:54:08+00:00,1357492671217086464,1357492671217086464,"Feeling sorry for corporations during the #covid19 #pandemic?
14 |  
15 | YOU MIGHT WANT TO EDUCATE YOURSELF on how American companies are treating front line employees during a pandemic? #MustRead",en,False,False,0,0,0,0,1357483359677550592,,,2316413918,GregCurtin,-v|v- 🍁 🇺🇸,False,,,"#covid19 , #pandemic , #MustRead",,
16 | 2021-02-05T00:54:07+00:00,1357492667064623104,1357492667064623104,"Yesterday hot topics:
17 | #lka (16.88%)
18 | #Srilanka (13.71%)
19 | #IndependenceDaySL (5.68%)
20 | #IndependenceDay (3.84%)
21 | #adaderanasinhala (2.75%)
22 | #slnews (2.75%)
23 | #Covid19 (2.09%)
24 | #IndependenceSL (1.58%)
25 | #73rdIndependenceDay (1.17%)
26 | #COVID19SL (1.08%)",en,False,False,0,0,0,0,,,,1343032398238208002,yuganOffcial,Yugan Narmathan 🇱🇰,False,,,"#lka , #Srilanka , #IndependenceDaySL , #IndependenceDay , #adaderanasinhala , #slnews , #Covid19 , #IndependenceSL , #73rdIndependenceDay , #COVID19SL",,
27 | 2021-02-05T00:54:06+00:00,1357492664363532288,1357492664363532288,#COVID19  #Impfung #Impfpflicht  https://t.co/h2175ku7Zp https://t.co/UCJMOTj2Rj,und,False,False,0,0,0,0,,,,394643993,Caputschi,Pit Caputschi,False,,,"#COVID19 , #Impfung , #Impfpflicht",,https://t.co/h2175ku7Zp
28 | 


--------------------------------------------------------------------------------
/tests/resources/tweets_v1.1.2.jl:
--------------------------------------------------------------------------------
 1 | {"created_at": "2021-02-05T00:54:28+00:00", "id_str": "1357492753215729666", "conversation_id_str": "1357492753215729666", "full_text": "It no longer matters. The decision to accept casualties rather than accede to simple acts of social generosity has been made. Hundreds of thousands dead due to bumbling, both willful and unconscious, has been shrugged off by everyone other than, one presumes, the dead. #COVID19", "lang": "en", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "1357490037773004802", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "3231804466", "user_name": "robert__gibbons", "user_full_name": "Robert Gibbons", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#COVID19"], "mentions": [], "urls": []}
 2 | {"created_at": "2021-02-05T00:54:27+00:00", "id_str": "1357492749516292098", "conversation_id_str": "1357492749516292098", "full_text": "Here in our hearts\nOh the answer is there\nIf we only would look there inside them\nWe can make it better, we can make it better #WhatIf #WorldCancerDay #COVID19", "lang": "en", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "1357489296844992512", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "1007294353818226694", "user_name": "bigdan071288", "user_full_name": "Daniel", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#WhatIf", "#WorldCancerDay", "#COVID19"], "mentions": [], "urls": []}
 3 | {"created_at": "2021-02-05T00:54:26+00:00", "id_str": "1357492745514942466", "conversation_id_str": "1357492745514942466", "full_text": "Hoy m\u00e1s que nunca, ciudadanos, ciudadanas y autoridades de la @AlcaldiaMHmx nos unimos con un solo prop\u00f3sito; reducir el numero de contagios por #COVID19. No es tiempo de fiestas ni reuniones, continuamos en #Sem\u00e1foroRojo \ud83d\udea6 por contingencia sanitaria.\n\n#MHelCoraz\u00f3nDeLaCapital https://t.co/VRRcVdXjsx", "lang": "es", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "1065021849430618112", "user_name": "MHSUrbanos", "user_full_name": "Servicios Urbanos", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#COVID19", "#Sem\u00e1foroRojo", "#MHelCoraz\u00f3nDeLaCapital"], "mentions": ["AlcaldiaMHmx"], "urls": []}
 4 | {"created_at": "2021-02-05T00:54:25+00:00", "id_str": "1357492742943764480", "conversation_id_str": "1357492742943764480", "full_text": "\u767a\u898b\u9045\u308c\u305fCOCOA\u4e0d\u5177\u5408\u3001\u539a\u751f\u52b4\u50cd\u7701\u300c\u5b9f\u6a5f\u30c6\u30b9\u30c8\u305b\u305a\u300d\uff1a#\u671d\u65e5\u65b0\u805e\u30c7\u30b8\u30bf\u30eb https://t.co/ng0nDNqw2M #\u65b0\u578b\u30b3\u30ed\u30ca\u30a6\u30a4\u30eb\u30b9 #COVID19", "lang": "ja", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "113366981", "user_name": "otaka_thursday", "user_full_name": "\u304a\u305f\u304b \ud83c\udf65", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#\u671d\u65e5\u65b0\u805e\u30c7\u30b8\u30bf\u30eb", "#\u65b0\u578b\u30b3\u30ed\u30ca\u30a6\u30a4\u30eb\u30b9", "#COVID19"], "mentions": [], "urls": ["https://t.co/ng0nDNqw2M"]}
 5 | {"created_at": "2021-02-05T00:54:14+00:00", "id_str": "1357492697901264897", "conversation_id_str": "1357492697901264897", "full_text": "that moment when your patient decides to (stupidly) go to Turks and Caicos and returns with COVID (omg) and uses up precious resources, time, PPE, &amp; ED personnel to take care of her while exposing a slew of other patients and staff in the ER. #COVID19 #canyounot #COVIDIOT https://t.co/Icy9EydgLg", "lang": "en", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "2462950457", "user_name": "paper_canyon", "user_full_name": "paper canyon", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#COVID19", "#canyounot", "#COVIDIOT"], "mentions": [], "urls": []}
 6 | {"created_at": "2021-02-05T00:54:09+00:00", "id_str": "1357492674731913216", "conversation_id_str": "1357492674731913216", "full_text": "\u201chasta un 40% de las personas que fueron internadas\u00a0por complicaciones asociadas al Covid-19 tuvieron secuelas en la funci\u00f3n de sus pulmones\u201d. Hay que mantener los cuidados y vacunarse cuando nos toque #COVID19", "lang": "es", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "1357358481939050496", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "139287395", "user_name": "jota_leonr", "user_full_name": "Jos\u00e9 Julio Le\u00f3n", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#COVID19"], "mentions": [], "urls": []}
 7 | {"created_at": "2021-02-05T00:54:08+00:00", "id_str": "1357492671217086464", "conversation_id_str": "1357492671217086464", "full_text": "Feeling sorry for corporations during the #covid19 #pandemic?\n \nYOU MIGHT WANT TO EDUCATE YOURSELF on how American companies are treating front line employees during a pandemic? #MustRead", "lang": "en", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "1357483359677550592", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "2316413918", "user_name": "GregCurtin", "user_full_name": "-v|v- \ud83c\udf41 \ud83c\uddfa\ud83c\uddf8", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#covid19", "#pandemic", "#MustRead"], "mentions": [], "urls": []}
 8 | {"created_at": "2021-02-05T00:54:07+00:00", "id_str": "1357492667064623104", "conversation_id_str": "1357492667064623104", "full_text": "Yesterday hot topics:\n#lka (16.88%)\n#Srilanka (13.71%)\n#IndependenceDaySL (5.68%)\n#IndependenceDay (3.84%)\n#adaderanasinhala (2.75%)\n#slnews (2.75%)\n#Covid19 (2.09%)\n#IndependenceSL (1.58%)\n#73rdIndependenceDay (1.17%)\n#COVID19SL (1.08%)", "lang": "en", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "1343032398238208002", "user_name": "yuganOffcial", "user_full_name": "Yugan Narmathan \ud83c\uddf1\ud83c\uddf0", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#lka", "#Srilanka", "#IndependenceDaySL", "#IndependenceDay", "#adaderanasinhala", "#slnews", "#Covid19", "#IndependenceSL", "#73rdIndependenceDay", "#COVID19SL"], "mentions": [], "urls": []}
 9 | {"created_at": "2021-02-05T00:54:06+00:00", "id_str": "1357492664363532288", "conversation_id_str": "1357492664363532288", "full_text": "#COVID19  #Impfung #Impfpflicht  https://t.co/h2175ku7Zp https://t.co/UCJMOTj2Rj", "lang": "und", "favorited": false, "retweeted": false, "retweet_count": 0, "favorite_count": 0, "reply_count": 0, "quote_count": 0, "quoted_status_id_str": "", "quoted_status_short_url": "", "quoted_status_expand_url": "", "user_id_str": "394643993", "user_name": "Caputschi", "user_full_name": "Pit Caputschi", "user_verified": false, "in_reply_to_status_id_str": "", "in_reply_to_user_id_str": "", "hashtags": ["#COVID19", "#Impfung", "#Impfpflicht"], "mentions": [], "urls": ["https://t.co/h2175ku7Zp"]}
10 | 


--------------------------------------------------------------------------------
/tests/resources/users_v1.3.0.csv:
--------------------------------------------------------------------------------
1 | created_at,id_str,rest_id_str,default_profile,default_profile_image,description,favourites_count,followers_count,friends_count,has_custom_timelines,listed_count,location,media_count,name,pinned_tweet_ids_str,profile_banner_url,profile_banner_url,profile_image_url_https,protected,screen_name,statuses_count,verified
2 | 2012-06-27T15:42:43+00:00,VXNlcjo2MjAxODI4NzU=,620182875,True,False,"Posel z Wrocław; PSL-UED;b.Wiceprzewodniczący PE https://t.co/ndbunIxXbL Partn.Wschodnie/MP for Wroclaw,PL;fmr EuroParl Vice-President dealing w EastPartnership",1066,1066,653,False,200,,910,Jacek Protasiewicz,789746720410308608,https://pbs.twimg.com/profile_banners/620182875/1562084177,https://pbs.twimg.com/profile_banners/620182875/1562084177,https://pbs.twimg.com/profile_images/1173463288723759104/zmngswpE_normal.jpg,False,ProtasiewiczJ,11851,True
3 | 2018-01-28T13:50:09+00:00,VXNlcjo5NTc2MTE3NTg0OTUxNjY0NjQ=,957611758495166464,True,False,Młoda Lewica  | Aktywistka społeczna | Feministka,619,619,86,False,0,"Tarnów, Polska",74,Klaudia🏳️‍🌈,1186730782225829888,https://pbs.twimg.com/profile_banners/957611758495166464/1586938851,https://pbs.twimg.com/profile_banners/957611758495166464/1586938851,https://pbs.twimg.com/profile_images/1250338276046557187/wAil8yYf_normal.jpg,False,clavdiie,195,False
4 | 


--------------------------------------------------------------------------------
/tests/resources/users_v1.3.0.jl:
--------------------------------------------------------------------------------
1 | {"created_at": "2012-06-27T15:42:43+00:00", "id_str": "VXNlcjo2MjAxODI4NzU=", "rest_id_str": "620182875", "default_profile": true, "default_profile_image": false, "description": "Posel z Wroc\u0142aw; PSL-UED;b.Wiceprzewodnicz\u0105cy PE https://t.co/ndbunIxXbL Partn.Wschodnie/MP for Wroclaw,PL;fmr EuroParl Vice-President dealing w EastPartnership", "favourites_count": 1066, "followers_count": 1066, "friends_count": 653, "has_custom_timelines": false, "listed_count": 200, "location": "", "media_count": 910, "name": "Jacek Protasiewicz", "pinned_tweet_ids_str": ["789746720410308608"], "profile_banner_url": "https://pbs.twimg.com/profile_banners/620182875/1562084177", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1173463288723759104/zmngswpE_normal.jpg", "protected": false, "screen_name": "ProtasiewiczJ", "statuses_count": 11851, "verified": true}
2 | {"created_at": "2018-01-28T13:50:09+00:00", "id_str": "VXNlcjo5NTc2MTE3NTg0OTUxNjY0NjQ=", "rest_id_str": "957611758495166464", "default_profile": true, "default_profile_image": false, "description": "M\u0142oda Lewica  | Aktywistka spo\u0142eczna | Feministka", "favourites_count": 619, "followers_count": 619, "friends_count": 86, "has_custom_timelines": false, "listed_count": 0, "location": "Tarn\u00f3w, Polska", "media_count": 74, "name": "Klaudia\ud83c\udff3\ufe0f\u200d\ud83c\udf08", "pinned_tweet_ids_str": ["1186730782225829888"], "profile_banner_url": "https://pbs.twimg.com/profile_banners/957611758495166464/1586938851", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1250338276046557187/wAil8yYf_normal.jpg", "protected": false, "screen_name": "clavdiie", "statuses_count": 195, "verified": false}
3 | 


--------------------------------------------------------------------------------
/tests/test_file_manager.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from tests.test_util import remove_all_temp_files
 4 | 
 5 | 
 6 | @pytest.fixture(autouse=True, scope="session")
 7 | def run_around_tests():
 8 |     yield
 9 |     remove_all_temp_files()
10 | 


--------------------------------------------------------------------------------
/tests/test_util.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import string
 4 | import unicodedata
 5 | import uuid
 6 | from typing import List, Callable
 7 | 
 8 | import stweet as st
 9 | 
10 | _temp_file_prefix = 'test_temp_file_'
11 | 
12 | 
13 | def get_temp_test_file_name(file_extension_without_dot: str) -> str:
14 |     return '{}{}.{}'.format(_temp_file_prefix, _get_uuid_str(), file_extension_without_dot)
15 | 
16 | 
17 | def _get_uuid_str() -> str:
18 |     return str(uuid.uuid4()).replace('-', '')
19 | 
20 | 
21 | def remove_all_temp_files():
22 |     files_to_remove = glob.glob("{}*".format(_temp_file_prefix))
23 |     for filePath in files_to_remove:
24 |         os.remove(filePath)
25 |     return
26 | 
27 | 
28 | def _remove_accented_chars(text) -> str:
29 |     new_text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
30 |     return new_text
31 | 
32 | 
33 | def to_base_text(value: str) -> str:
34 |     table = str.maketrans(dict.fromkeys(string.punctuation))
35 |     to_return = _remove_accented_chars(value.translate(table).lower())
36 |     return to_return
37 | 
38 | 
39 | def get_tweets_to_tweet_output_test(tweet_output: List[st.TweetOutput]):
40 |     phrase = '#koronawirus'
41 |     search_tweets_task = st.SearchTweetsTask(
42 |         all_words=phrase,
43 |         tweets_limit=200
44 |     )
45 |     st.TweetSearchRunner(
46 |         search_tweets_task=search_tweets_task,
47 |         tweet_outputs=tweet_output
48 |     ).run()
49 | 
50 | 
51 | def get_users_to_tweet_output_test(user_outputs: List[st.UserOutput]) -> st.GetUsersResult:
52 |     users = ['RealDonaldTrump', 'ProtasiewiczJ', 'donaldtuskEPP', 'RealDonaldTrump', 'ProtasiewiczJ', 'donaldtuskEPP']
53 |     get_users_task = st.GetUsersTask(users)
54 |     return st.GetUsersRunner(
55 |         get_user_task=get_users_task,
56 |         user_outputs=user_outputs
57 |     ).run()
58 | 
59 | 
60 | def tweet_list_assert_condition(tweets: List[st.UserTweetRaw], condition: Callable[[st.UserTweetRaw], bool]):
61 |     for tweet in tweets:
62 |         if not condition(tweet):
63 |             print(f'--- {tweet}')
64 |     assert all([
65 |         condition(tweet)
66 |         for tweet in tweets
67 |     ]) is True
68 | 
69 | 
70 | def two_lists_assert_equal(tweets_1: List[any], tweets_2: List[any]):
71 |     assert len(tweets_1) == len(tweets_2)
72 |     for tweet_id in range(len(tweets_1)):
73 |         if tweets_1[tweet_id] != tweets_2[tweet_id]:
74 |             print('-----')
75 |             print(tweets_1[tweet_id])
76 |             print(tweets_2[tweet_id])
77 |             print('--')
78 |     assert tweets_1 == tweets_2
79 | 


--------------------------------------------------------------------------------
/tests/tweet_output_export_call_counter.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import stweet as st
 4 | 
 5 | 
 6 | class TweetOutputExportCallCounter(st.TweetOutput):
 7 |     counter: int
 8 | 
 9 |     def __init__(self):
10 |         self.counter = 0
11 | 
12 |     def export_tweets(self, tweets: List[st.UserTweetRaw]):
13 |         self.counter += 1
14 |         return
15 | 
16 |     def get_output_call_count(self) -> int:
17 |         return self.counter
18 | 


--------------------------------------------------------------------------------
/tests/tweet_output_tweets_counter.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import stweet as st
 4 | 
 5 | 
 6 | class TweetOutputTweetsCounter(st.TweetOutput):
 7 |     counter: int
 8 | 
 9 |     def __init__(self):
10 |         self.counter = 0
11 | 
12 |     def export_tweets(self, tweets: List[st.UserTweetRaw]):
13 |         self.counter += len(tweets)
14 |         return
15 | 
16 |     def get_output_call_count(self) -> int:
17 |         return self.counter
18 | 


--------------------------------------------------------------------------------
/tests/unit/language_test.py:
--------------------------------------------------------------------------------
1 | import stweet as st
2 | 
3 | 
4 | def test_unique_language_shortcut():
5 |     assert len(st.Language) == len(set([it.short_value for it in st.Language]))
6 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = pep8,docstyle,tests&cov
 3 | skipsdist = True
 4 | 
 5 | [testenv:pep8]
 6 | deps =
 7 |     flake8
 8 | basepython = python3
 9 | commands =
10 |     flake8 {posargs}
11 | 
12 | [testenv:docstyle]
13 | deps =
14 |     pydocstyle
15 | basepython = python3
16 | commands =
17 |     pydocstyle --verbose {posargs}
18 | 
19 | [testenv:tests&cov]
20 | deps =
21 |     pytest
22 |     pytest-cov
23 |     -rrequirements.txt
24 | basepython = python3
25 | setenv =
26 |     PYTHONPATH={toxinidir}/
27 |     COV_CORE_CONFIG={toxinidir}/.coveragerc
28 | commands =
29 |     docker-compose -f {toxinidir}/test-services-docker-compose.yml up -d
30 |     pytest -s tests --cov-fail-under=100 --cov-report=xml --cov-report=term --cov=stweet
31 |     docker-compose -f {toxinidir}/test-services-docker-compose.yml stop
32 |     docker-compose -f {toxinidir}/test-services-docker-compose.yml rm -f
33 | 
34 | [flake8]
35 | # W503 - is said to be incompatible with current PEP8, however flake8 is
36 | #   not updated to handle it
37 | # W504 skipped because it is overeager and unnecessary
38 | ignore = W503,W504
39 | per-file-ignores = __init__.py:F401
40 | show-source = True
41 | exclude = .git,.venv,.tox,dist,doc,*egg,build,venv,tests,tmp
42 | import-order-style = pep8
43 | max-line-length = 120
44 | 
45 | 
46 | [pydocstyle]
47 | # D104 Missing docstring in public package
48 | # D203 1 blank line required before class docstring
49 | # D213 Multi-line docstring summary should start at the second line
50 | # D214 Section is over-indented
51 | # D215 Section underline is over-indented
52 | # D401 First line should be in imperative mood; try rephrasing
53 | # D405 Section name should be properly capitalized
54 | # D406 Section name should end with a newline
55 | # D407 Missing dashed underline after section
56 | # D408 Section underline should be in the line following the section’s name
57 | # D409 Section underline should match the length of its name
58 | # D410 Missing blank line after section
59 | # D411 Missing blank line before section
60 | ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
61 | match-dir = ^(?!\.tox|venv|tests|tmp).*
62 | match = ^(?!setup).*\.py
63 | 


--------------------------------------------------------------------------------