├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE.md
    └── ISSUE_TEMPLATE
    │   └── ISSUE_TEMPLATE.md
├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── automate.py
├── elasticsearch
    ├── README.md
    ├── dashboard.json
    ├── index-follow.json
    ├── index-tweets.json
    ├── index-user.json
    └── visualizations.json
├── requirements.txt
├── setup.py
├── test.py
└── twint
    ├── __init__.py
    ├── __version__.py
    ├── cli.py
    ├── config.py
    ├── datelock.py
    ├── feed.py
    ├── format.py
    ├── get.py
    ├── output.py
    ├── run.py
    ├── storage
        ├── __init__.py
        ├── db.py
        ├── elasticsearch.py
        ├── panda.py
        ├── write.py
        └── write_meta.py
    ├── token.py
    ├── tweet.py
    ├── url.py
    ├── user.py
    └── verbose.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | patreon: twintproject
3 | custom: paypal.me/noneprivacy
4 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # Issue Template
 2 | Please use this template!
 3 | 
 4 | ### Initial Check
 5 | > If the issue is a request please specify that it is a request in the title (Example: [REQUEST] more features). If this is a question regarding 'twint' please specify that it's a question in the title (Example: [QUESTION] What is x?). Please **only** submit issues related to 'twint'. Thanks.
 6 | 
 7 | >Make sure you've checked the following:
 8 | 
 9 | - [] Python version is 3.6;
10 | - [] Updated Twint with `pip3 install --user --upgrade -e git+https://github.com/twintproject/twint.git@origin/master#egg=twint`;
11 | - [] I have searched the issues and there are no duplicates of this issue/question/request.
12 | 
13 | ### Command Ran
14 | >Please provide the _exact_ command ran including the username/search/code so I may reproduce the issue.
15 | 
16 | ### Description of Issue
17 | >Please use **as much detail as possible.**
18 | 
19 | ### Environment Details
20 | >Using Windows, Linux? What OS version? Running this in Anaconda? Jupyter Notebook? Terminal?
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ### Initial Check
 2 | > If the issue is a request please specify that it is a request in the title (Example: [REQUEST] more features). If this is a question regarding 'twint' please specify that it's a question in the title (Example: [QUESTION] What is x?). Please **only** submit issues related to 'twint'. Thanks.
 3 | 
 4 | >Make sure you've checked the following:
 5 | 
 6 | - [] Python version is 3.6;
 7 | - [] Using the latest version of Twint;
 8 | - [] Updated Twint with `pip3 install --upgrade -e git+https://github.com/twintproject/twint.git@origin/master#egg=twint`;
 9 | 
10 | ### Command Ran
11 | >Please provide the _exact_ command ran including the username/search/code so I may reproduce the issue.
12 | 
13 | ### Description of Issue
14 | >Please use **as much detail as possible.**
15 | 
16 | ### Environment Details
17 | >Using Windows, Linux? What OS version? Running this in Anaconda? Jupyter Notebook? Terminal?
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | tweets.db
  6 | # C extensions
  7 | *.so
  8 | 
  9 | config.ini
 10 | twint/storage/mysql.py
 11 | 
 12 | # Node Dependency directories
 13 | node_modules/
 14 | jspm_packages/
 15 | tests/
 16 | # Distribution / packaging
 17 | .Python
 18 | env/
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | .hypothesis/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # pyenv
 81 | .python-version
 82 | 
 83 | # celery beat schedule file
 84 | celerybeat-schedule
 85 | 
 86 | # SageMath parsed files
 87 | *.sage.py
 88 | 
 89 | # dotenv
 90 | .env
 91 | 
 92 | # virtualenv
 93 | .venv
 94 | venv/
 95 | ENV/
 96 | 
 97 | # Spyder project settings
 98 | .spyderproject
 99 | .spyproject
100 | 
101 | # Rope project settings
102 | .ropeproject
103 | 
104 | # mkdocs documentation
105 | /site
106 | 
107 | # mypy
108 | .mypy_cache/
109 | 
110 | # output
111 | *.csv
112 | *.json
113 | *.txt
114 | 
115 | test_twint.py
116 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: bionic
 2 | language: python
 3 | python:
 4 |   - "3.6"
 5 |   - "3.7"
 6 |   - "3.8"
 7 |   - "nightly"
 8 | matrix:
 9 |   allow_failures:
10 |     - python: "nightly"
11 |     - python: "3.8"
12 | install:
13 | - pip install -r requirements.txt
14 | script:
15 | - python test.py
16 | deploy:
17 |   provider: pypi
18 |   user: "codyzacharias"
19 |   password:
20 |     secure: sWWvx50F7KJBtf8z2njc+Q31WIAHiQs4zKEiGD4/7xrshw55H5z+WnqZ9VIP83qm9yKefoRKp7WnaJeXZ3ulZSLn64ue45lqFozWMyGvelRPOKvZi9XPMqBA7+qllR/GseTHSGC3G5EGxac6UEI3irYe3mZXxfjpxNOXVti8rJ2xX8TiJM0AVKRrdDiAstOhMMkXkB7fYXMQALwEp8UoW/UbjbeqsKueXydjStaESNP/QzRFZ3/tuNu+3HMz/olniLUhUWcF/xDbJVpXuaRMUalgqe+BTbDdtUVt/s/GKtpg5GAzJyhQphiCM/huihedUIKSoI+6A8PTzuxrLhB5BMi9pcllED02v7w1enpu5L2l5cRDgQJSOpkxkA5Eese8nxKOOq0KzwDQa3JByrRor8R4yz+p5s4u2r0Rs2A9fkjQYwd/uWBSEIRF4K9WZoniiikahwXq070DMRgV7HbovKSjo5NK5F8j+psrtqPF+OHN2aVfWxbGnezrOOkmzuTHhWZVj3pPSpQU1WFWHo9fPo4I6YstR4q6XjNNjrpY3ojSlv0ThMbUem7zhHTRkRsSA2SpPfqw5E3Jf7vaiQb4M5zkBVqxuq4tXb14GJ26tGD8tel8u8b+ccpkAE9xf+QavP8UHz4PbBhqgFX5TbV/H++cdsICyoZnT35yiaDOELM=
21 |   on:
22 |     tags: true
23 |     python: "3.7"
24 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.6-buster
 2 | LABEL maintainer="codyzacharias@pm.me"
 3 | 
 4 | WORKDIR /root
 5 | 
 6 | RUN git clone --depth=1 https://github.com/twintproject/twint.git && \
 7 | 	cd /root/twint && \
 8 | 	pip3 install . -r requirements.txt
 9 | 
10 | CMD /bin/bash
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Cody Zacharias
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md LICENSE
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TWINT - Twitter Intelligence Tool
  2 | ![2](https://i.imgur.com/iaH3s7z.png)
  3 | ![3](https://i.imgur.com/hVeCrqL.png)
  4 | 
  5 | [![PyPI](https://img.shields.io/pypi/v/twint.svg)](https://pypi.org/project/twint/) [![Build Status](https://travis-ci.org/twintproject/twint.svg?branch=master)](https://travis-ci.org/twintproject/twint) [![Python 3.6|3.7|3.8](https://img.shields.io/badge/Python-3.6%2F3.7%2F3.8-blue.svg)](https://www.python.org/download/releases/3.0/) [![GitHub license](https://img.shields.io/github/license/haccer/tweep.svg)](https://github.com/haccer/tweep/blob/master/LICENSE) [![Downloads](https://pepy.tech/badge/twint)](https://pepy.tech/project/twint) [![Downloads](https://pepy.tech/badge/twint/week)](https://pepy.tech/project/twint/week) [![Patreon](https://img.shields.io/endpoint.svg?url=https:%2F%2Fshieldsio-patreon.herokuapp.com%2Ftwintproject)](https://www.patreon.com/twintproject) ![](https://img.shields.io/twitter/follow/noneprivacy.svg?label=Follow&style=social) 
  6 | 
  7 | >No authentication. No API. No limits.
  8 | 
  9 | Twint is an advanced Twitter scraping tool written in Python that allows for scraping Tweets from Twitter profiles **without** using Twitter's API.
 10 | 
 11 | Twint utilizes Twitter's search operators to let you scrape Tweets from specific users, scrape Tweets relating to certain topics, hashtags & trends, or sort out *sensitive* information from Tweets like e-mail and phone numbers. I find this very useful, and you can get really creative with it too.
 12 | 
 13 | Twint also makes special queries to Twitter allowing you to also scrape a Twitter user's followers, Tweets a user has liked, and who they follow **without** any authentication, API, Selenium, or browser emulation.
 14 | 
 15 | ## tl;dr Benefits
 16 | Some of the benefits of using Twint vs Twitter API:
 17 | - Can fetch almost __all__ Tweets (Twitter API limits to last 3200 Tweets only);
 18 | - Fast initial setup;
 19 | - Can be used anonymously and without Twitter sign up;
 20 | - **No rate limitations**.
 21 | 
 22 | ## Limits imposed by Twitter
 23 | Twitter limits scrolls while browsing the user timeline. This means that with `.Profile` or with `.Favorites` you will be able to get ~3200 tweets.
 24 | 
 25 | ## Requirements
 26 | - Python 3.6;
 27 | - aiohttp;
 28 | - aiodns;
 29 | - beautifulsoup4;
 30 | - cchardet;
 31 | - dataclasses
 32 | - elasticsearch;
 33 | - pysocks;
 34 | - pandas (>=0.23.0);
 35 | - aiohttp_socks;
 36 | - schedule;
 37 | - geopy;
 38 | - fake-useragent;
 39 | - py-googletransx.
 40 | 
 41 | ## Installing
 42 | 
 43 | **Git:**
 44 | ```bash
 45 | git clone --depth=1 https://github.com/twintproject/twint.git
 46 | cd twint
 47 | pip3 install . -r requirements.txt
 48 | ```
 49 | 
 50 | **Pip:**
 51 | ```bash
 52 | pip3 install twint
 53 | ```
 54 | 
 55 | or
 56 | 
 57 | ```bash
 58 | pip3 install --user --upgrade git+https://github.com/twintproject/twint.git@origin/master#egg=twint
 59 | ```
 60 | 
 61 | **Pipenv**:
 62 | ```bash
 63 | pipenv install git+https://github.com/twintproject/twint.git#egg=twint
 64 | ```
 65 | 
 66 | ### March 2, 2021 Update
 67 | 
 68 | **Added**: Dockerfile
 69 | 
 70 | Noticed a lot of people are having issues installing (including me). Please use the Dockerfile temporarily while I look into them. 
 71 | 
 72 | ## CLI Basic Examples and Combos
 73 | A few simple examples to help you understand the basics:
 74 | 
 75 | - `twint -u username` - Scrape all the Tweets of a *user* (doesn't include **retweets** but includes **replies**).
 76 | - `twint -u username -s pineapple` - Scrape all Tweets from the *user*'s timeline containing _pineapple_.
 77 | - `twint -s pineapple` - Collect every Tweet containing *pineapple* from everyone's Tweets.
 78 | - `twint -u username --year 2014` - Collect Tweets that were tweeted **before** 2014.
 79 | - `twint -u username --since "2015-12-20 20:30:15"` - Collect Tweets that were tweeted since 2015-12-20 20:30:15.
 80 | - `twint -u username --since 2015-12-20` - Collect Tweets that were tweeted since 2015-12-20 00:00:00.
 81 | - `twint -u username -o file.txt` - Scrape Tweets and save to file.txt.
 82 | - `twint -u username -o file.csv --csv` - Scrape Tweets and save as a csv file.
 83 | - `twint -u username --email --phone` - Show Tweets that might have phone numbers or email addresses.
 84 | - `twint -s "Donald Trump" --verified` - Display Tweets by verified users that Tweeted about Donald Trump.
 85 | - `twint -g="48.880048,2.385939,1km" -o file.csv --csv` - Scrape Tweets from a radius of 1km around a place in Paris and export them to a csv file.
 86 | - `twint -u username -es localhost:9200` - Output Tweets to Elasticsearch
 87 | - `twint -u username -o file.json --json` - Scrape Tweets and save as a json file.
 88 | - `twint -u username --database tweets.db` - Save Tweets to a SQLite database.
 89 | - `twint -u username --followers` - Scrape a Twitter user's followers.
 90 | - `twint -u username --following` - Scrape who a Twitter user follows.
 91 | - `twint -u username --favorites` - Collect all the Tweets a user has favorited (gathers ~3200 tweet).
 92 | - `twint -u username --following --user-full` - Collect full user information a person follows
 93 | - `twint -u username --timeline` - Use an effective method to gather Tweets from a user's profile (Gathers ~3200 Tweets, including **retweets** & **replies**).
 94 | - `twint -u username --retweets` - Use a quick method to gather the last 900 Tweets (that includes retweets) from a user's profile.
 95 | - `twint -u username --resume resume_file.txt` - Resume a search starting from the last saved scroll-id.
 96 | 
 97 | More detail about the commands and options are located in the [wiki](https://github.com/twintproject/twint/wiki/Commands)
 98 | 
 99 | ## Module Example
100 | 
101 | Twint can now be used as a module and supports custom formatting. **More details are located in the [wiki](https://github.com/twintproject/twint/wiki/Module)**
102 | 
103 | ```python
104 | import twint
105 | 
106 | # Configure
107 | c = twint.Config()
108 | c.Username = "realDonaldTrump"
109 | c.Search = "great"
110 | 
111 | # Run
112 | twint.run.Search(c)
113 | ```
114 | > Output
115 | 
116 | `955511208597184512 2018-01-22 18:43:19 GMT <now> pineapples are the best fruit`
117 | 
118 | ```python
119 | import twint
120 | 
121 | c = twint.Config()
122 | 
123 | c.Username = "noneprivacy"
124 | c.Custom["tweet"] = ["id"]
125 | c.Custom["user"] = ["bio"]
126 | c.Limit = 10
127 | c.Store_csv = True
128 | c.Output = "none"
129 | 
130 | twint.run.Search(c)
131 | ```
132 | 
133 | ## Storing Options
134 | - Write to file;
135 | - CSV;
136 | - JSON;
137 | - SQLite;
138 | - Elasticsearch.
139 | 
140 | ## Elasticsearch Setup
141 | 
142 | Details on setting up Elasticsearch with Twint is located in the [wiki](https://github.com/twintproject/twint/wiki/Elasticsearch).
143 | 
144 | ## Graph Visualization
145 | ![graph](https://i.imgur.com/EEJqB8n.png)
146 | 
147 | [Graph](https://github.com/twintproject/twint/wiki/Graph) details are also located in the [wiki](https://github.com/twintproject/twint/wiki/Graph).
148 | 
149 | We are developing a Twint Desktop App.
150 | 
151 | ![4](https://i.imgur.com/DzcfIgL.png)
152 | 
153 | ## FAQ
154 | > I tried scraping tweets from a user, I know that they exist but I'm not getting them
155 | 
156 | Twitter can shadow-ban accounts, which means that their tweets will not be available via search. To solve this, pass `--profile-full` if you are using Twint via CLI or, if are using Twint as module, add `config.Profile_full = True`. Please note that this process will be quite slow.
157 | ## More Examples
158 | 
159 | #### Followers/Following
160 | 
161 | > To get only follower usernames/following usernames
162 | 
163 | `twint -u username --followers`
164 | 
165 | `twint -u username --following`
166 | 
167 | > To get user info of followers/following users
168 | 
169 | `twint -u username --followers --user-full`
170 | 
171 | `twint -u username --following --user-full`
172 | 
173 | #### userlist
174 | 
175 | > To get only user info of user
176 | 
177 | `twint -u username --user-full`
178 | 
179 | > To get user info of users from a userlist
180 | 
181 | `twint --userlist inputlist --user-full`
182 | 
183 | 
184 | #### tweet translation (experimental)
185 | 
186 | > To get 100 english tweets and translate them to italian
187 | 
188 | `twint -u noneprivacy --csv --output none.csv --lang en --translate --translate-dest it --limit 100`
189 | 
190 | or
191 | 
192 | ```python
193 | import twint
194 | 
195 | c = twint.Config()
196 | c.Username = "noneprivacy"
197 | c.Limit = 100
198 | c.Store_csv = True
199 | c.Output = "none.csv"
200 | c.Lang = "en"
201 | c.Translate = True
202 | c.TranslateDest = "it"
203 | twint.run.Search(c)
204 | ```
205 | 
206 | Notes:
207 | - [Google translate has some quotas](https://cloud.google.com/translate/quotas)
208 | 
209 | ## Featured Blog Posts:
210 | - [How to use Twint as an OSINT tool](https://pielco11.ovh/posts/twint-osint/)
211 | - [Basic tutorial made by Null Byte](https://null-byte.wonderhowto.com/how-to/mine-twitter-for-targeted-information-with-twint-0193853/)
212 | - [Analyzing Tweets with NLP in minutes with Spark, Optimus and Twint](https://towardsdatascience.com/analyzing-tweets-with-nlp-in-minutes-with-spark-optimus-and-twint-a0c96084995f)
213 | - [Loading tweets into Kafka and Neo4j](https://markhneedham.com/blog/2019/05/29/loading-tweets-twint-kafka-neo4j/)
214 | 
215 | ## Contact
216 | 
217 | If you have any question, want to join in discussions, or need extra help, you are welcome to join our Twint focused channel at [OSINT team](https://osint.team)
218 | 


--------------------------------------------------------------------------------
/automate.py:
--------------------------------------------------------------------------------
 1 | import twint
 2 | import schedule
 3 | import time
 4 | 
 5 | # you can change the name of each "job" after "def" if you'd like.
 6 | def jobone():
 7 | 	print ("Fetching Tweets")
 8 | 	c = twint.Config()
 9 | 	# choose username (optional)
10 | 	c.Username = "insert username here"
11 | 	# choose search term (optional)
12 | 	c.Search = "insert search term here"
13 | 	# choose beginning time (narrow results)
14 | 	c.Since = "2018-01-01"
15 | 	# set limit on total tweets
16 | 	c.Limit = 1000
17 | 	# no idea, but makes the csv format properly
18 | 	c.Store_csv = True
19 | 	# format of the csv
20 | 	c.Custom = ["date", "time", "username", "tweet", "link", "likes", "retweets", "replies", "mentions", "hashtags"]
21 | 	# change the name of the csv file
22 | 	c.Output = "filename.csv"
23 | 	twint.run.Search(c)
24 | 
25 | def jobtwo():
26 | 	print ("Fetching Tweets")
27 | 	c = twint.Config()
28 | 	# choose username (optional)
29 | 	c.Username = "insert username here"
30 | 	# choose search term (optional)
31 | 	c.Search = "insert search term here"
32 | 	# choose beginning time (narrow results)
33 | 	c.Since = "2018-01-01"
34 | 	# set limit on total tweets
35 | 	c.Limit = 1000
36 | 	# no idea, but makes the csv format properly
37 | 	c.Store_csv = True
38 | 	# format of the csv
39 | 	c.Custom = ["date", "time", "username", "tweet", "link", "likes", "retweets", "replies", "mentions", "hashtags"]
40 | 	# change the name of the csv file
41 | 	c.Output = "filename2.csv"
42 | 	twint.run.Search(c)
43 | 
44 | # run once when you start the program
45 | 
46 | jobone()
47 | jobtwo()
48 | 
49 | # run every minute(s), hour, day at, day of the week, day of the week and time. Use "#" to block out which ones you don't want to use.  Remove it to active. Also, replace "jobone" and "jobtwo" with your new function names (if applicable)
50 | 
51 | # schedule.every(1).minutes.do(jobone)
52 | schedule.every().hour.do(jobone)
53 | # schedule.every().day.at("10:30").do(jobone)
54 | # schedule.every().monday.do(jobone)
55 | # schedule.every().wednesday.at("13:15").do(jobone)
56 | 
57 | # schedule.every(1).minutes.do(jobtwo)
58 | schedule.every().hour.do(jobtwo)
59 | # schedule.every().day.at("10:30").do(jobtwo)
60 | # schedule.every().monday.do(jobtwo)
61 | # schedule.every().wednesday.at("13:15").do(jobtwo)
62 | 
63 | while True:
64 |   schedule.run_pending()
65 |   time.sleep(1)
66 | 


--------------------------------------------------------------------------------
/elasticsearch/README.md:
--------------------------------------------------------------------------------
1 | # Elasticsearch How-To
2 | 
3 | ![dashboard](https://i.imgur.com/BEbtdo5.png)
4 | 
5 | Please read the Wiki [here](https://github.com/twintproject/twint/wiki/Elasticsearch)
6 | 


--------------------------------------------------------------------------------
/elasticsearch/dashboard.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "_id": "e6d65380-bfe2-11e8-961a-d371b24d5d1d",
 4 |     "_type": "dashboard",
 5 |     "_source": {
 6 |       "title": "Twint Dashboard",
 7 |       "hits": 0,
 8 |       "description": "",
 9 |       "panelsJSON": "[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":40,\"h\":17,\"i\":\"1\"},\"embeddableConfig\":{},\"id\":\"d47421c0-bfd5-11e8-8858-bbc566841533\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"2\",\"gridData\":{\"x\":40,\"y\":6,\"w\":8,\"h\":11,\"i\":\"2\"},\"embeddableConfig\":{\"vis\":{\"legendOpen\":false}},\"id\":\"e2b89640-bfd4-11e8-8858-bbc566841533\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":32,\"w\":20,\"h\":17,\"i\":\"3\"},\"embeddableConfig\":{\"vis\":{\"legendOpen\":false}},\"id\":\"8a8bb420-bfd9-11e8-8858-bbc566841533\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":17,\"w\":33,\"h\":15,\"i\":\"4\"},\"embeddableConfig\":{\"vis\":{\"legendOpen\":false}},\"id\":\"a8d3ee70-bfd9-11e8-8858-bbc566841533\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"6\",\"gridData\":{\"x\":40,\"y\":0,\"w\":8,\"h\":6,\"i\":\"6\"},\"embeddableConfig\":{},\"id\":\"37cd72e0-bfe4-11e8-961a-d371b24d5d1d\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"7\",\"gridData\":{\"x\":33,\"y\":17,\"w\":15,\"h\":15,\"i\":\"7\"},\"embeddableConfig\":{},\"id\":\"149ecbc0-bfe4-11e8-961a-d371b24d5d1d\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"8\",\"gridData\":{\"x\":20,\"y\":32,\"w\":28,\"h\":17,\"i\":\"8\"},\"version\":\"6.3.2\",\"type\":\"visualization\",\"id\":\"b45ec590-c267-11e8-bcd4-3956fe930db7\",\"embeddableConfig\":{}}]",
10 |       "optionsJSON": "{\"darkTheme\":true,\"hidePanelTitles\":true,\"useMargins\":true}",
11 |       "version": 1,
12 |       "timeRestore": false,
13 |       "kibanaSavedObjectMeta": {
14 |         "searchSourceJSON": "{\"query\":{\"language\":\"lucene\",\"query\":\"\"},\"filter\":[],\"highlightAll\":true,\"version\":true}"
15 |       }
16 |     }
17 |   }
18 | ]


--------------------------------------------------------------------------------
/elasticsearch/index-follow.json:
--------------------------------------------------------------------------------
 1 | PUT twintgraph
 2 | {
 3 |   "mappings": {
 4 |     "items": {
 5 |       "properties": {
 6 |         "user": {"type": "keyword"},
 7 |         "follow": {"type": "keyword"},
 8 |         "essid": {"type": "keyword"}
 9 |       }
10 |     }
11 |   },
12 |   "settings": {
13 |     "number_of_shards": 1
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/elasticsearch/index-tweets.json:
--------------------------------------------------------------------------------
 1 | PUT twinttweets
 2 | {
 3 |   "mappings": {
 4 |     "items": {
 5 |       "properties": {
 6 |           "id": {"type": "long"},
 7 |           "conversation_id": {"type": "long"},
 8 |           "created_at": {"type": "long"},
 9 |           "date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
10 |           "timezone": {"type": "keyword"},
11 |           "place": {"type": "keyword"},
12 |           "location": {"type": "keyword"},
13 |           "tweet": {"type": "text"},
14 |           "hashtags": {"type": "keyword"},
15 |           "cashtags": {"type": "keyword"},
16 |           "user_id": {"type": "long"},
17 |           "user_id_str": {"type": "keyword"},
18 |           "username": {"type": "keyword"},
19 |           "name": {"type": "text"},
20 |           "profile_image_url": {"type": "text"},
21 |           "day": {"type": "integer"},
22 |           "hour": {"type": "integer"},
23 |           "link": {"type": "text"},
24 |           "retweet": {"type": "text"},
25 |           "essid": {"type": "keyword"},
26 |           "nlikes": {"type": "integer"},
27 |           "nreplies": {"type": "integer"},
28 |           "nretweets": {"type": "integer"},
29 |           "quote_url": {"type": "text"},
30 |           "video": {"type": "integer"},
31 |           "thumbnail": {"type": "text"},
32 |           "search": {"type": "text"},
33 |           "near":  {"type": "text"},
34 |           "geo_near": {"type": "geo_point"},
35 |           "geo_tweet": {"type": "geo_point"},
36 |           "photos": {"type": "text"},
37 |           "mentions": {"type": "text"},
38 |           "translation": {"type": "text"},          
39 |           "trans_src": {"type": "keyword"},
40 |           "trans_dev": {"type": "keyword"},
41 |       }
42 |     }
43 |   }
44 |   ,
45 |   "settings": {
46 |     "number_of_shards": 1
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/elasticsearch/index-user.json:
--------------------------------------------------------------------------------
 1 | PUT twintuser
 2 | {
 3 |   "mappings": {
 4 |     "items": {
 5 |       "properties": {
 6 |         "id": {"type": "keyword"},
 7 |         "name": {"type": "keyword"},
 8 |         "username": {"type": "keyword"},
 9 |         "bio": {"type": "text"},
10 |         "location": {"type": "keyword"},
11 |         "url": {"type": "text"},
12 |         "join_datetime": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
13 |         "join_date": {"type": "date", "format": "yyyy-MM-dd"},
14 |         "join_time": {"type": "date", "format": "HH:mm:ss"},
15 |         "tweets": {"type": "integer"},
16 |         "following": {"type": "integer"},
17 |         "followers": {"type": "integer"},
18 |         "likes": {"type": "integer"},
19 |         "media": {"type": "integer"},
20 |         "private": {"type": "integer"},
21 |         "verified": {"type": "integer"},
22 |         "avatar": {"type": "text"},
23 |         "background_image": {"type": "text"},
24 |         "session": {"type": "keyword"},
25 |         "geo_user": {"type": "geo_point"}
26 |       }
27 |     }
28 |   }
29 |   ,
30 |   "settings": {
31 |     "number_of_shards": 1
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/elasticsearch/visualizations.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "_id": "d47421c0-bfd5-11e8-8858-bbc566841533",
  4 |     "_type": "visualization",
  5 |     "_source": {
  6 |       "title": "Activity [twinttweets]",
  7 |       "visState": "{\"title\":\"Activity [twinttweets]\",\"type\":\"histogram\",\"params\":{\"type\":\"histogram\",\"grid\":{\"categoryLines\":true,\"style\":{\"color\":\"#eee\"},\"valueAxis\":\"ValueAxis-1\"},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Tweets\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"area\",\"mode\":\"stacked\",\"data\":{\"label\":\"Tweets\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"cardinal\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":true},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Tweets\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Days\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"user_id\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"User ids\"}}]}",
  8 |       "uiStateJSON": "{}",
  9 |       "description": "",
 10 |       "version": 1,
 11 |       "kibanaSavedObjectMeta": {
 12 |         "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\",\"language\":\"lucene\"},\"filter\":[]}"
 13 |       }
 14 |     }
 15 |   },
 16 |   {
 17 |     "_id": "e2b89640-bfd4-11e8-8858-bbc566841533",
 18 |     "_type": "visualization",
 19 |     "_source": {
 20 |       "title": "Activity - pie [twinttweets]",
 21 |       "visState": "{\"aggs\":[{\"enabled\":true,\"id\":\"1\",\"params\":{},\"schema\":\"metric\",\"type\":\"count\"},{\"enabled\":true,\"id\":\"2\",\"params\":{\"field\":\"user_id\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"size\":5},\"schema\":\"segment\",\"type\":\"terms\"}],\"params\":{\"addLegend\":true,\"addTooltip\":true,\"isDonut\":true,\"labels\":{\"last_level\":true,\"show\":false,\"truncate\":100,\"values\":true},\"legendPosition\":\"right\",\"type\":\"pie\"},\"title\":\"Activity - pie [twinttweets]\",\"type\":\"pie\"}",
 22 |       "uiStateJSON": "{}",
 23 |       "description": "",
 24 |       "version": 1,
 25 |       "kibanaSavedObjectMeta": {
 26 |         "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"language\":\"lucene\",\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\"},\"filter\":[]}"
 27 |       }
 28 |     }
 29 |   },
 30 |   {
 31 |     "_id": "37cd72e0-bfe4-11e8-961a-d371b24d5d1d",
 32 |     "_type": "visualization",
 33 |     "_source": {
 34 |       "title": "Tweets Count [twinttweet]",
 35 |       "visState": "{\"title\":\"Tweets Count [twinttweet]\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\",\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":33}}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Tweets\"}}]}",
 36 |       "uiStateJSON": "{}",
 37 |       "description": "",
 38 |       "version": 1,
 39 |       "kibanaSavedObjectMeta": {
 40 |         "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"language\":\"lucene\",\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\"},\"filter\":[]}"
 41 |       }
 42 |     }
 43 |   },
 44 |   {
 45 |     "_id": "149ecbc0-bfe4-11e8-961a-d371b24d5d1d",
 46 |     "_type": "visualization",
 47 |     "_source": {
 48 |       "title": "Word Cloud [twinttweets]",
 49 |       "visState": "{\"title\":\"Word Cloud [twinttweets]\",\"type\":\"tagcloud\",\"params\":{\"scale\":\"linear\",\"orientation\":\"single\",\"minFontSize\":10,\"maxFontSize\":50,\"showLabel\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"username\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}]}",
 50 |       "uiStateJSON": "{}",
 51 |       "description": "",
 52 |       "version": 1,
 53 |       "kibanaSavedObjectMeta": {
 54 |         "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\",\"language\":\"lucene\"},\"filter\":[]}"
 55 |       }
 56 |     }
 57 |   },
 58 |   {
 59 |     "_id": "a8d3ee70-bfd9-11e8-8858-bbc566841533",
 60 |     "_type": "visualization",
 61 |     "_source": {
 62 |       "title": "Day-activity [twinttweet]",
 63 |       "visState": "{\"title\":\"Day-activity [twinttweet]\",\"type\":\"histogram\",\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"labels\":{\"show\":true,\"truncate\":100,\"rotate\":0},\"position\":\"bottom\",\"scale\":{\"type\":\"linear\"},\"show\":true,\"style\":{},\"title\":{},\"type\":\"category\"}],\"grid\":{\"categoryLines\":true,\"style\":{\"color\":\"#eee\"},\"valueAxis\":\"ValueAxis-3\"},\"legendPosition\":\"right\",\"orderBucketsBySum\":false,\"seriesParams\":[{\"data\":{\"id\":\"1\",\"label\":\"Tweets\"},\"drawLinesBetweenPoints\":true,\"mode\":\"normal\",\"show\":\"true\",\"showCircles\":true,\"type\":\"histogram\",\"valueAxis\":\"ValueAxis-3\"}],\"times\":[],\"type\":\"histogram\",\"valueAxes\":[{\"id\":\"ValueAxis-3\",\"labels\":{\"filter\":false,\"rotate\":0,\"show\":true,\"truncate\":100},\"name\":\"LeftAxis-1\",\"position\":\"left\",\"scale\":{\"mode\":\"normal\",\"type\":\"linear\"},\"show\":true,\"style\":{},\"title\":{\"text\":\"Tweets\"},\"type\":\"value\"}]},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Tweets\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"hour\",\"interval\":1,\"min_doc_count\":true,\"extended_bounds\":{\"min\":0,\"max\":23}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"user_id\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"size\":10,\"order\":\"asc\",\"orderBy\":\"_term\",\"customLabel\":\"\"}}]}",
 64 |       "uiStateJSON": "{\"vis\":{\"legendOpen\":true}}",
 65 |       "description": "",
 66 |       "version": 1,
 67 |       "kibanaSavedObjectMeta": {
 68 |         "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"language\":\"lucene\",\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\"},\"filter\":[]}"
 69 |       }
 70 |     }
 71 |   },
 72 |   {
 73 |     "_id": "8a8bb420-bfd9-11e8-8858-bbc566841533",
 74 |     "_type": "visualization",
 75 |     "_source": {
 76 |       "title": "Week-activity [twinttweet]",
 77 |       "visState": "{\"title\":\"Week-activity [twinttweet]\",\"type\":\"histogram\",\"params\":{\"type\":\"histogram\",\"grid\":{\"categoryLines\":true,\"style\":{\"color\":\"#eee\"},\"valueAxis\":\"ValueAxis-1\"},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100,\"rotate\":0},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Tweets\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"histogram\",\"mode\":\"normal\",\"data\":{\"label\":\"Tweets\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Tweets\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"day\",\"interval\":1,\"min_doc_count\":true,\"extended_bounds\":{},\"customLabel\":\"Days of the week\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"user_id\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\",\"customLabel\":\"\"}}]}",
 78 |       "uiStateJSON": "{}",
 79 |       "description": "",
 80 |       "version": 1,
 81 |       "kibanaSavedObjectMeta": {
 82 |         "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\",\"language\":\"lucene\"},\"filter\":[]}"
 83 |       }
 84 |     }
 85 |   },
 86 |   {
 87 |     "_id": "b45ec590-c267-11e8-bcd4-3956fe930db7",
 88 |     "_type": "visualization",
 89 |     "_source": {
 90 |       "title": "Heat-map [twinttweets]",
 91 |       "visState": "{\"title\":\"Heat-map [twinttweets]\",\"type\":\"heatmap\",\"params\":{\"type\":\"heatmap\",\"addTooltip\":true,\"addLegend\":true,\"enableHover\":true,\"legendPosition\":\"right\",\"times\":[],\"colorsNumber\":10,\"colorSchema\":\"Reds\",\"setColorRange\":false,\"colorsRange\":[{\"from\":0,\"to\":10},{\"from\":10,\"to\":100},{\"from\":100,\"to\":200},{\"from\":200,\"to\":500},{\"from\":500,\"to\":1000},{\"from\":1000,\"to\":2000},{\"from\":2000,\"to\":3000},{\"from\":3000,\"to\":4000},{\"from\":4000,\"to\":5000},{\"from\":7000,\"to\":null}],\"invertColors\":false,\"percentageMode\":false,\"valueAxes\":[{\"show\":false,\"id\":\"ValueAxis-1\",\"type\":\"value\",\"scale\":{\"type\":\"linear\",\"defaultYExtents\":true},\"labels\":{\"show\":false,\"rotate\":270,\"overwriteColor\":false,\"color\":\"#555\"}}]},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"hour\",\"interval\":1,\"min_doc_count\":false,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"histogram\",\"schema\":\"group\",\"params\":{\"field\":\"day\",\"interval\":1,\"min_doc_count\":false,\"extended_bounds\":{\"min\":0,\"max\":2}}}]}",
 92 |       "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"3 - 592\":\"rgb(255,245,240)\",\"592 - 1.180\":\"rgb(254,228,216)\",\"1.180 - 1.769\":\"rgb(253,202,181)\",\"1.769 - 2.357\":\"rgb(252,171,142)\",\"2.357 - 2.945\":\"rgb(252,138,106)\",\"2.945 - 3.534\":\"rgb(251,106,74)\",\"3.534 - 4.122\":\"rgb(241,68,50)\",\"4.122 - 4.711\":\"rgb(217,38,35)\",\"4.711 - 5.299\":\"rgb(188,20,26)\",\"5.299 - 5.887\":\"rgb(152,12,19)\"},\"colors\":{\"3 - 592\":\"#FCEACA\",\"592 - 1.180\":\"#F9E2D2\",\"1.180 - 1.769\":\"#F9BA8F\"}}}",
 93 |       "description": "",
 94 |       "version": 1,
 95 |       "kibanaSavedObjectMeta": {
 96 |         "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"filter\":[],\"query\":{\"language\":\"lucene\",\"query\":\"\"}}"
 97 |       }
 98 |     }
 99 |   }
100 | ]


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp
 2 | aiodns
 3 | beautifulsoup4
 4 | cchardet
 5 | dataclasses
 6 | elasticsearch
 7 | pysocks
 8 | pandas>=0.23.0
 9 | aiohttp_socks<=0.4.1
10 | schedule
11 | geopy
12 | fake-useragent
13 | googletransx
14 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | from setuptools import setup
 3 | import io
 4 | import os
 5 | 
 6 | # Package meta-data
 7 | NAME = 'twint'
 8 | DESCRIPTION = 'An advanced Twitter scraping & OSINT tool.'
 9 | URL = 'https://github.com/twintproject/twint'
10 | EMAIL = 'codyzacharias@pm.me'
11 | AUTHOR = 'Cody Zacharias'
12 | REQUIRES_PYTHON = '>=3.6.0'
13 | VERSION = None
14 | 
15 | # Packages required
16 | REQUIRED = [
17 |     'aiohttp', 'aiodns', 'beautifulsoup4', 'cchardet', 'dataclasses',
18 |     'elasticsearch', 'pysocks', 'pandas', 'aiohttp_socks',
19 |     'schedule', 'geopy', 'fake-useragent', 'googletransx'
20 | ]
21 | 
22 | here = os.path.abspath(os.path.dirname(__file__))
23 | 
24 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
25 |     long_description = '\n' + f.read()
26 | 
27 | # Load the package's __version__.py
28 | about = {}
29 | if not VERSION:
30 |     with open(os.path.join(here, NAME, '__version__.py')) as f:
31 |         exec(f.read(), about)
32 | else:
33 |     about['__version__'] = VERSION
34 | 
35 | setup(
36 |     name=NAME,
37 |     version=about['__version__'],
38 |     description=DESCRIPTION,
39 |     long_description=long_description,
40 |     long_description_content_type="text/markdown",
41 |     author=AUTHOR,
42 |     author_email=EMAIL,
43 |     python_requires=REQUIRES_PYTHON,
44 |     url=URL,
45 |     packages=['twint', 'twint.storage'],
46 |     entry_points={
47 |         'console_scripts': [
48 |             'twint = twint.cli:run_as_command',
49 |         ],
50 |     },
51 |     install_requires=REQUIRED,
52 |     dependency_links=[
53 |         'git+https://github.com/x0rzkov/py-googletrans#egg=googletrans'
54 |     ],
55 |     license='MIT',
56 |     classifiers=[
57 |         'License :: OSI Approved :: MIT License',
58 |         'Programming Language :: Python',
59 |         'Programming Language :: Python :: 3',
60 |         'Programming Language :: Python :: 3.6',
61 |         'Programming Language :: Python :: 3.7',
62 |         'Programming Language :: Python :: 3.8',
63 |         'Programming Language :: Python :: Implementation :: CPython',
64 |     ],
65 | )
66 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import twint
 2 | import os
 3 | 
 4 | '''
 5 | Test.py - Testing TWINT to make sure everything works.
 6 | '''
 7 | 
 8 | 
 9 | def test_reg(c, run):
10 |     print("[+] Beginning vanilla test in {}".format(str(run)))
11 |     run(c)
12 | 
13 | 
14 | def test_db(c, run):
15 |     print("[+] Beginning DB test in {}".format(str(run)))
16 |     c.Database = "test_twint.db"
17 |     run(c)
18 | 
19 | 
20 | def custom(c, run, _type):
21 |     print("[+] Beginning custom {} test in {}".format(_type, str(run)))
22 |     c.Custom['tweet'] = ["id", "username"]
23 |     c.Custom['user'] = ["id", "username"]
24 |     run(c)
25 | 
26 | 
27 | def test_json(c, run):
28 |     c.Store_json = True
29 |     c.Output = "test_twint.json"
30 |     custom(c, run, "JSON")
31 |     print("[+] Beginning JSON test in {}".format(str(run)))
32 |     run(c)
33 | 
34 | 
35 | def test_csv(c, run):
36 |     c.Store_csv = True
37 |     c.Output = "test_twint.csv"
38 |     custom(c, run, "CSV")
39 |     print("[+] Beginning CSV test in {}".format(str(run)))
40 |     run(c)
41 | 
42 | 
43 | def main():
44 |     c = twint.Config()
45 |     c.Username = "verified"
46 |     c.Limit = 20
47 |     c.Store_object = True
48 | 
49 |     # Separate objects are necessary.
50 | 
51 |     f = twint.Config()
52 |     f.Username = "verified"
53 |     f.Limit = 20
54 |     f.Store_object = True
55 |     f.User_full = True
56 | 
57 |     runs = [
58 |         twint.run.Profile,  # this doesn't
59 |         twint.run.Search,  # this works
60 |         twint.run.Following,
61 |         twint.run.Followers,
62 |         twint.run.Favorites,
63 |     ]
64 | 
65 |     tests = [test_reg, test_json, test_csv, test_db]
66 | 
67 |     # Something breaks if we don't split these up
68 | 
69 |     for run in runs[:3]:
70 |         if run == twint.run.Search:
71 |             c.Since = "2012-1-1 20:30:22"
72 |             c.Until = "2017-1-1"
73 |         else:
74 |             c.Since = ""
75 |             c.Until = ""
76 | 
77 |         for test in tests:
78 |             test(c, run)
79 | 
80 |     for run in runs[3:]:
81 |         for test in tests:
82 |             test(f, run)
83 | 
84 |     files = ["test_twint.db", "test_twint.json", "test_twint.csv"]
85 |     for _file in files:
86 |         os.remove(_file)
87 | 
88 |     print("[+] Testing complete!")
89 | 
90 | 
91 | if __name__ == '__main__':
92 |     main()
93 | 


--------------------------------------------------------------------------------
/twint/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | TWINT - Twitter Intelligence Tool (formerly known as Tweep).
 3 | 
 4 | See wiki on Github for in-depth details.
 5 | https://github.com/twintproject/twint/wiki
 6 | 
 7 | Licensed under MIT License
 8 | Copyright (c) 2018 Cody Zacharias
 9 | '''
10 | import logging, os
11 | 
12 | from .config import Config
13 | from .__version__ import __version__
14 | from . import run
15 | 
16 | _levels = {
17 |     'info': logging.INFO,
18 |     'debug': logging.DEBUG
19 | }
20 | 
21 | _level = os.getenv('TWINT_DEBUG', 'info')
22 | _logLevel = _levels[_level]
23 | 
24 | if _level == "debug":
25 |     logger = logging.getLogger()
26 |     _output_fn = 'twint.log'
27 |     logger.setLevel(_logLevel)
28 |     formatter = logging.Formatter('%(levelname)s:%(asctime)s:%(name)s:%(message)s')
29 |     fileHandler = logging.FileHandler(_output_fn)
30 |     fileHandler.setLevel(_logLevel)
31 |     fileHandler.setFormatter(formatter)
32 |     logger.addHandler(fileHandler)
33 | 


--------------------------------------------------------------------------------
/twint/__version__.py:
--------------------------------------------------------------------------------
1 | VERSION = (2, 1, 21)
2 | 
3 | __version__ = '.'.join(map(str, VERSION))
4 | 


--------------------------------------------------------------------------------
/twint/cli.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | '''
  3 | Twint.py - Twitter Intelligence Tool (formerly known as Tweep).
  4 | 
  5 | See wiki on Github for in-depth details.
  6 | https://github.com/twintproject/twint/wiki
  7 | 
  8 | Licensed under MIT License
  9 | Copyright (c) 2018 The Twint Project  
 10 | '''
 11 | import sys
 12 | import os
 13 | import argparse
 14 | 
 15 | from . import run
 16 | from . import config
 17 | from . import storage
 18 | 
 19 | 
 20 | def error(_error, message):
 21 |     """ Print errors to stdout
 22 |     """
 23 |     print("[-] {}: {}".format(_error, message))
 24 |     sys.exit(0)
 25 | 
 26 | 
 27 | def check(args):
 28 |     """ Error checking
 29 |     """
 30 |     if args.username is not None or args.userlist or args.members_list:
 31 |         if args.verified:
 32 |             error("Contradicting Args",
 33 |                   "Please use --verified in combination with -s.")
 34 |         if args.userid:
 35 |             error("Contradicting Args",
 36 |                   "--userid and -u cannot be used together.")
 37 |         if args.all:
 38 |             error("Contradicting Args",
 39 |                   "--all and -u cannot be used together.")
 40 |     elif args.search and args.timeline:
 41 |         error("Contradicting Args",
 42 |               "--s and --tl cannot be used together.")
 43 |     elif args.timeline and not args.username:
 44 |         error("Error", "-tl cannot be used without -u.")
 45 |     elif args.search is None:
 46 |         if args.custom_query is not None:
 47 |             pass
 48 |         elif (args.geo or args.near) is None and not (args.all or args.userid):
 49 |             error("Error", "Please use at least -u, -s, -g or --near.")
 50 |     elif args.all and args.userid:
 51 |         error("Contradicting Args",
 52 |               "--all and --userid cannot be used together")
 53 |     if args.output is None:
 54 |         if args.csv:
 55 |             error("Error", "Please specify an output file (Example: -o file.csv).")
 56 |         elif args.json:
 57 |             error("Error", "Please specify an output file (Example: -o file.json).")
 58 |     if args.backoff_exponent <= 0:
 59 |         error("Error", "Please specifiy a positive value for backoff_exponent")
 60 |     if args.min_wait_time < 0:
 61 |         error("Error", "Please specifiy a non negative value for min_wait_time")
 62 | 
 63 | 
 64 | def loadUserList(ul, _type):
 65 |     """ Concatenate users
 66 |     """
 67 |     if os.path.exists(os.path.abspath(ul)):
 68 |         userlist = open(os.path.abspath(ul), "r").read().splitlines()
 69 |     else:
 70 |         userlist = ul.split(",")
 71 |     if _type == "search":
 72 |         un = ""
 73 |         for user in userlist:
 74 |             un += "%20OR%20from%3A" + user
 75 |         return un[15:]
 76 |     return userlist
 77 | 
 78 | 
 79 | def initialize(args):
 80 |     """ Set default values for config from args
 81 |     """
 82 |     c = config.Config()
 83 |     c.Username = args.username
 84 |     c.User_id = args.userid
 85 |     c.Search = args.search
 86 |     c.Geo = args.geo
 87 |     c.Location = args.location
 88 |     c.Near = args.near
 89 |     c.Lang = args.lang
 90 |     c.Output = args.output
 91 |     c.Elasticsearch = args.elasticsearch
 92 |     c.Year = args.year
 93 |     c.Since = args.since
 94 |     c.Until = args.until
 95 |     c.Email = args.email
 96 |     c.Phone = args.phone
 97 |     c.Verified = args.verified
 98 |     c.Store_csv = args.csv
 99 |     c.Tabs = args.tabs
100 |     c.Store_json = args.json
101 |     c.Show_hashtags = args.hashtags
102 |     c.Show_cashtags = args.cashtags
103 |     c.Limit = args.limit
104 |     c.Count = args.count
105 |     c.Stats = args.stats
106 |     c.Database = args.database
107 |     c.To = args.to
108 |     c.All = args.all
109 |     c.Essid = args.essid
110 |     c.Format = args.format
111 |     c.User_full = args.user_full
112 |     # c.Profile_full = args.profile_full
113 |     c.Pandas_type = args.pandas_type
114 |     c.Index_tweets = args.index_tweets
115 |     c.Index_follow = args.index_follow
116 |     c.Index_users = args.index_users
117 |     c.Debug = args.debug
118 |     c.Resume = args.resume
119 |     c.Images = args.images
120 |     c.Videos = args.videos
121 |     c.Media = args.media
122 |     c.Replies = args.replies
123 |     c.Pandas_clean = args.pandas_clean
124 |     c.Proxy_host = args.proxy_host
125 |     c.Proxy_port = args.proxy_port
126 |     c.Proxy_type = args.proxy_type
127 |     c.Tor_control_port = args.tor_control_port
128 |     c.Tor_control_password = args.tor_control_password
129 |     c.Retweets = args.retweets
130 |     c.Custom_query = args.custom_query
131 |     c.Popular_tweets = args.popular_tweets
132 |     c.Skip_certs = args.skip_certs
133 |     c.Hide_output = args.hide_output
134 |     c.Native_retweets = args.native_retweets
135 |     c.Min_likes = args.min_likes
136 |     c.Min_retweets = args.min_retweets
137 |     c.Min_replies = args.min_replies
138 |     c.Links = args.links
139 |     c.Source = args.source
140 |     c.Members_list = args.members_list
141 |     c.Filter_retweets = args.filter_retweets
142 |     c.Translate = args.translate
143 |     c.TranslateDest = args.translate_dest
144 |     c.Backoff_exponent = args.backoff_exponent
145 |     c.Min_wait_time = args.min_wait_time
146 |     return c
147 | 
148 | 
149 | def options():
150 |     """ Parse arguments
151 |     """
152 |     ap = argparse.ArgumentParser(prog="twint",
153 |                                  usage="python3 %(prog)s [options]",
154 |                                  description="TWINT - An Advanced Twitter Scraping Tool.")
155 |     ap.add_argument("-u", "--username", help="User's Tweets you want to scrape.")
156 |     ap.add_argument("-s", "--search", help="Search for Tweets containing this word or phrase.")
157 |     ap.add_argument("-g", "--geo", help="Search for geocoded Tweets.")
158 |     ap.add_argument("--near", help="Near a specified city.")
159 |     ap.add_argument("--location", help="Show user's location (Experimental).", action="store_true")
160 |     ap.add_argument("-l", "--lang", help="Search for Tweets in a specific language.")
161 |     ap.add_argument("-o", "--output", help="Save output to a file.")
162 |     ap.add_argument("-es", "--elasticsearch", help="Index to Elasticsearch.")
163 |     ap.add_argument("--year", help="Filter Tweets before specified year.")
164 |     ap.add_argument("--since", help="Filter Tweets sent since date (Example: \"2017-12-27 20:30:15\" or 2017-12-27).",
165 |                     metavar="DATE")
166 |     ap.add_argument("--until", help="Filter Tweets sent until date (Example: \"2017-12-27 20:30:15\" or 2017-12-27).",
167 |                     metavar="DATE")
168 |     ap.add_argument("--email", help="Filter Tweets that might have email addresses", action="store_true")
169 |     ap.add_argument("--phone", help="Filter Tweets that might have phone numbers", action="store_true")
170 |     ap.add_argument("--verified", help="Display Tweets only from verified users (Use with -s).",
171 |                     action="store_true")
172 |     ap.add_argument("--csv", help="Write as .csv file.", action="store_true")
173 |     ap.add_argument("--tabs", help="Separate CSV fields with tab characters, not commas.", action="store_true")
174 |     ap.add_argument("--json", help="Write as .json file", action="store_true")
175 |     ap.add_argument("--hashtags", help="Output hashtags in seperate column.", action="store_true")
176 |     ap.add_argument("--cashtags", help="Output cashtags in seperate column.", action="store_true")
177 |     ap.add_argument("--userid", help="Twitter user id.")
178 |     ap.add_argument("--limit", help="Number of Tweets to pull (Increments of 20).")
179 |     ap.add_argument("--count", help="Display number of Tweets scraped at the end of session.",
180 |                     action="store_true")
181 |     ap.add_argument("--stats", help="Show number of replies, retweets, and likes.",
182 |                     action="store_true")
183 |     ap.add_argument("-db", "--database", help="Store Tweets in a sqlite3 database.")
184 |     ap.add_argument("--to", help="Search Tweets to a user.", metavar="USERNAME")
185 |     ap.add_argument("--all", help="Search all Tweets associated with a user.", metavar="USERNAME")
186 |     ap.add_argument("--followers", help="Scrape a person's followers.", action="store_true")
187 |     ap.add_argument("--following", help="Scrape a person's follows", action="store_true")
188 |     ap.add_argument("--favorites", help="Scrape Tweets a user has liked.", action="store_true")
189 |     ap.add_argument("--proxy-type", help="Socks5, HTTP, etc.")
190 |     ap.add_argument("--proxy-host", help="Proxy hostname or IP.")
191 |     ap.add_argument("--proxy-port", help="The port of the proxy server.")
192 |     ap.add_argument("--tor-control-port", help="If proxy-host is set to tor, this is the control port", default=9051)
193 |     ap.add_argument("--tor-control-password",
194 |                     help="If proxy-host is set to tor, this is the password for the control port",
195 |                     default="my_password")
196 |     ap.add_argument("--essid",
197 |                     help="Elasticsearch Session ID, use this to differentiate scraping sessions.",
198 |                     nargs="?", default="")
199 |     ap.add_argument("--userlist", help="Userlist from list or file.")
200 |     ap.add_argument("--retweets",
201 |                     help="Include user's Retweets (Warning: limited).",
202 |                     action="store_true")
203 |     ap.add_argument("--format", help="Custom output format (See wiki for details).")
204 |     ap.add_argument("--user-full",
205 |                     help="Collect all user information (Use with followers or following only).",
206 |                     action="store_true")
207 |     # I am removing this this feature for the time being, because it is no longer required, default method will do this
208 |     # ap.add_argument("--profile-full",
209 |     #                 help="Slow, but effective method of collecting a user's Tweets and RT.",
210 |     #                 action="store_true")
211 |     ap.add_argument(
212 |         "-tl",
213 |         "--timeline",
214 |         help="Collects every tweet from a User's Timeline. (Tweets, RTs & Replies)",
215 |         action="store_true",
216 |     )
217 |     ap.add_argument("--translate",
218 |                     help="Get tweets translated by Google Translate.",
219 |                     action="store_true")
220 |     ap.add_argument("--translate-dest", help="Translate tweet to language (ISO2).",
221 |                     default="en")
222 |     ap.add_argument("--store-pandas", help="Save Tweets in a DataFrame (Pandas) file.")
223 |     ap.add_argument("--pandas-type",
224 |                     help="Specify HDF5 or Pickle (HDF5 as default)", nargs="?", default="HDF5")
225 |     ap.add_argument("-it", "--index-tweets",
226 |                     help="Custom Elasticsearch Index name for Tweets.", nargs="?", default="twinttweets")
227 |     ap.add_argument("-if", "--index-follow",
228 |                     help="Custom Elasticsearch Index name for Follows.",
229 |                     nargs="?", default="twintgraph")
230 |     ap.add_argument("-iu", "--index-users", help="Custom Elasticsearch Index name for Users.",
231 |                     nargs="?", default="twintuser")
232 |     ap.add_argument("--debug",
233 |                     help="Store information in debug logs", action="store_true")
234 |     ap.add_argument("--resume", help="Resume from Tweet ID.", metavar="TWEET_ID")
235 |     ap.add_argument("--videos", help="Display only Tweets with videos.", action="store_true")
236 |     ap.add_argument("--images", help="Display only Tweets with images.", action="store_true")
237 |     ap.add_argument("--media",
238 |                     help="Display Tweets with only images or videos.", action="store_true")
239 |     ap.add_argument("--replies", help="Display replies to a subject.", action="store_true")
240 |     ap.add_argument("-pc", "--pandas-clean",
241 |                     help="Automatically clean Pandas dataframe at every scrape.")
242 |     ap.add_argument("-cq", "--custom-query", help="Custom search query.")
243 |     ap.add_argument("-pt", "--popular-tweets", help="Scrape popular tweets instead of recent ones.",
244 |                     action="store_true")
245 |     ap.add_argument("-sc", "--skip-certs", help="Skip certs verification, useful for SSC.", action="store_false")
246 |     ap.add_argument("-ho", "--hide-output", help="Hide output, no tweets will be displayed.", action="store_true")
247 |     ap.add_argument("-nr", "--native-retweets", help="Filter the results for retweets only.", action="store_true")
248 |     ap.add_argument("--min-likes", help="Filter the tweets by minimum number of likes.")
249 |     ap.add_argument("--min-retweets", help="Filter the tweets by minimum number of retweets.")
250 |     ap.add_argument("--min-replies", help="Filter the tweets by minimum number of replies.")
251 |     ap.add_argument("--links", help="Include or exclude tweets containing one o more links. If not specified" +
252 |                                     " you will get both tweets that might contain links or not.")
253 |     ap.add_argument("--source", help="Filter the tweets for specific source client.")
254 |     ap.add_argument("--members-list", help="Filter the tweets sent by users in a given list.")
255 |     ap.add_argument("-fr", "--filter-retweets", help="Exclude retweets from the results.", action="store_true")
256 |     ap.add_argument("--backoff-exponent", help="Specify a exponent for the polynomial backoff in case of errors.",
257 |                     type=float, default=3.0)
258 |     ap.add_argument("--min-wait-time", type=float, default=15,
259 |                     help="specifiy a minimum wait time in case of scraping limit error. This value will be adjusted by twint if the value provided does not satisfy the limits constraints")
260 |     args = ap.parse_args()
261 | 
262 |     return args
263 | 
264 | 
265 | def main():
266 |     """ Main
267 |     """
268 |     args = options()
269 |     check(args)
270 | 
271 |     if args.pandas_clean:
272 |         storage.panda.clean()
273 | 
274 |     c = initialize(args)
275 | 
276 |     if args.userlist:
277 |         c.Query = loadUserList(args.userlist, "search")
278 | 
279 |     if args.pandas_clean:
280 |         storage.panda.clean()
281 | 
282 |     if args.favorites:
283 |         if args.userlist:
284 |             _userlist = loadUserList(args.userlist, "favorites")
285 |             for _user in _userlist:
286 |                 args.username = _user
287 |                 c = initialize(args)
288 |                 run.Favorites(c)
289 |         else:
290 |             run.Favorites(c)
291 |     elif args.following:
292 |         if args.userlist:
293 |             _userlist = loadUserList(args.userlist, "following")
294 |             for _user in _userlist:
295 |                 args.username = _user
296 |                 c = initialize(args)
297 |                 run.Following(c)
298 |         else:
299 |             run.Following(c)
300 |     elif args.followers:
301 |         if args.userlist:
302 |             _userlist = loadUserList(args.userlist, "followers")
303 |             for _user in _userlist:
304 |                 args.username = _user
305 |                 c = initialize(args)
306 |                 run.Followers(c)
307 |         else:
308 |             run.Followers(c)
309 |     elif args.retweets:  # or args.profile_full:
310 |         if args.userlist:
311 |             _userlist = loadUserList(args.userlist, "profile")
312 |             for _user in _userlist:
313 |                 args.username = _user
314 |                 c = initialize(args)
315 |                 run.Profile(c)
316 |         else:
317 |             run.Profile(c)
318 |     elif args.user_full:
319 |         if args.userlist:
320 |             _userlist = loadUserList(args.userlist, "userlist")
321 |             for _user in _userlist:
322 |                 args.username = _user
323 |                 c = initialize(args)
324 |                 run.Lookup(c)
325 |         else:
326 |             run.Lookup(c)
327 |     elif args.timeline:
328 |         run.Profile(c)
329 |     else:
330 |         run.Search(c)
331 | 
332 | 
333 | def run_as_command():
334 |     version = ".".join(str(v) for v in sys.version_info[:2])
335 |     if float(version) < 3.6:
336 |         print("[-] TWINT requires Python version 3.6+.")
337 |         sys.exit(0)
338 | 
339 |     main()
340 | 
341 | 
342 | if __name__ == '__main__':
343 |     main()
344 | 


--------------------------------------------------------------------------------
/twint/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Optional
 3 | 
 4 | @dataclass
 5 | class Config:
 6 |     Username: Optional[str] = None
 7 |     User_id: Optional[str] = None
 8 |     Search: Optional[str] = None
 9 |     Lookup: bool = False
10 |     Geo: str = ""
11 |     Location: bool = False
12 |     Near: str = None
13 |     Lang: Optional[str] = None
14 |     Output: Optional[str] = None
15 |     Elasticsearch: object = None
16 |     Year: Optional[int] = None
17 |     Since: Optional[str] = None
18 |     Until: Optional[str] = None
19 |     Email: Optional[str] = None
20 |     Phone: Optional[str] = None
21 |     Verified: bool = False
22 |     Store_csv: bool = False
23 |     Store_json: bool = False
24 |     Custom = {"tweet": None, "user": None, "username": None}
25 |     Show_hashtags: bool = False
26 |     Show_cashtags: bool = False
27 |     Limit: Optional[int] = None
28 |     Count: Optional[int] = None
29 |     Stats: bool = False
30 |     Database: object = None
31 |     To: str = None
32 |     All = None
33 |     Debug: bool = False
34 |     Format = None
35 |     Essid: str = ""
36 |     Profile: bool = False
37 |     Followers: bool = False
38 |     Following: bool = False
39 |     Favorites: bool = False
40 |     TwitterSearch: bool = False
41 |     User_full: bool = False
42 |     # Profile_full: bool = False
43 |     Store_object: bool = False
44 |     Store_object_tweets_list: list = None
45 |     Store_object_users_list: list = None
46 |     Store_object_follow_list: list = None
47 |     Pandas_type: type = None
48 |     Pandas: bool = False
49 |     Index_tweets: str = "twinttweets"
50 |     Index_follow: str = "twintgraph"
51 |     Index_users: str = "twintuser"
52 |     Retries_count: int = 10
53 |     Resume: object = None
54 |     Images: bool = False
55 |     Videos: bool = False
56 |     Media: bool = False
57 |     Replies: bool = False
58 |     Pandas_clean: bool = True
59 |     Lowercase: bool = True
60 |     Pandas_au: bool = True
61 |     Proxy_host: str = ""
62 |     Proxy_port: int = 0
63 |     Proxy_type: object = None
64 |     Tor_control_port: int = 9051
65 |     Tor_control_password: str = None
66 |     Retweets: bool = False
67 |     Query: str = None
68 |     Hide_output: bool = False
69 |     Custom_query: str = ""
70 |     Popular_tweets: bool = False
71 |     Skip_certs: bool = False
72 |     Native_retweets: bool = False
73 |     Min_likes: int = 0
74 |     Min_retweets: int = 0
75 |     Min_replies: int = 0
76 |     Links: Optional[str] = None
77 |     Source: Optional[str] = None
78 |     Members_list: Optional[str] = None
79 |     Filter_retweets: bool = False
80 |     Translate: bool = False
81 |     TranslateSrc: str = "en"
82 |     TranslateDest: str = "en"
83 |     Backoff_exponent: float = 3.0
84 |     Min_wait_time: int = 0
85 |     Bearer_token: str = None
86 |     Guest_token: str = None
87 |     deleted: list = None
88 | 


--------------------------------------------------------------------------------
/twint/datelock.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | import logging as logme
 4 | 
 5 | from .tweet import utc_to_local
 6 | 
 7 | 
 8 | class Datelock:
 9 |     until = None
10 |     since = None
11 |     _since_def_user = None
12 | 
13 | 
14 | def convertToDateTime(string):
15 |     dateTimeList = string.split()
16 |     ListLength = len(dateTimeList)
17 |     if ListLength == 2:
18 |         return string
19 |     if ListLength == 1:
20 |         return string + " 00:00:00"
21 |     else:
22 |         return ""
23 | 
24 | 
25 | def Set(Until, Since):
26 |     logme.debug(__name__+':Set')
27 |     d = Datelock()
28 | 
29 |     if Until:
30 |         d.until = datetime.datetime.strptime(convertToDateTime(Until), "%Y-%m-%d %H:%M:%S")
31 |         d.until = utc_to_local(d.until)
32 |     else:
33 |         d.until = datetime.datetime.today()
34 | 
35 |     if Since:
36 |         d.since = datetime.datetime.strptime(convertToDateTime(Since), "%Y-%m-%d %H:%M:%S")
37 |         d.since = utc_to_local(d.since)
38 |         d._since_def_user = True
39 |     else:
40 |         d.since = datetime.datetime.strptime("2006-03-21 00:00:00", "%Y-%m-%d %H:%M:%S")
41 |         d.since = utc_to_local(d.since)
42 |         d._since_def_user = False
43 | 
44 |     return d
45 | 


--------------------------------------------------------------------------------
/twint/feed.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from datetime import datetime
  3 | 
  4 | from bs4 import BeautifulSoup
  5 | from re import findall
  6 | from json import loads
  7 | 
  8 | import logging as logme
  9 | 
 10 | from .tweet import utc_to_local, Tweet_formats
 11 | 
 12 | 
 13 | class NoMoreTweetsException(Exception):
 14 |     def __init__(self, msg):
 15 |         super().__init__(msg)
 16 | 
 17 | 
 18 | def Follow(response):
 19 |     logme.debug(__name__ + ':Follow')
 20 |     soup = BeautifulSoup(response, "html.parser")
 21 |     follow = soup.find_all("td", "info fifty screenname")
 22 |     cursor = soup.find_all("div", "w-button-more")
 23 |     try:
 24 |         cursor = findall(r'cursor=(.*?)">', str(cursor))[0]
 25 |     except IndexError:
 26 |         logme.critical(__name__ + ':Follow:IndexError')
 27 | 
 28 |     return follow, cursor
 29 | 
 30 | 
 31 | # TODO: this won't be used by --profile-full anymore. if it isn't used anywhere else, perhaps remove this in future
 32 | def Mobile(response):
 33 |     logme.debug(__name__ + ':Mobile')
 34 |     soup = BeautifulSoup(response, "html.parser")
 35 |     tweets = soup.find_all("span", "metadata")
 36 |     max_id = soup.find_all("div", "w-button-more")
 37 |     try:
 38 |         max_id = findall(r'max_id=(.*?)">', str(max_id))[0]
 39 |     except Exception as e:
 40 |         logme.critical(__name__ + ':Mobile:' + str(e))
 41 | 
 42 |     return tweets, max_id
 43 | 
 44 | 
 45 | def MobileFav(response):
 46 |     soup = BeautifulSoup(response, "html.parser")
 47 |     tweets = soup.find_all("table", "tweet")
 48 |     max_id = soup.find_all("div", "w-button-more")
 49 |     try:
 50 |         max_id = findall(r'max_id=(.*?)">', str(max_id))[0]
 51 |     except Exception as e:
 52 |         print(str(e) + " [x] feed.MobileFav")
 53 | 
 54 |     return tweets, max_id
 55 | 
 56 | 
 57 | def _get_cursor(response):
 58 |     try:
 59 |         next_cursor = response['timeline']['instructions'][0]['addEntries']['entries'][-1]['content'][
 60 |             'operation']['cursor']['value']
 61 |     except KeyError:
 62 |         # this is needed because after the first request location of cursor is changed
 63 |         next_cursor = response['timeline']['instructions'][-1]['replaceEntry']['entry']['content']['operation'][
 64 |             'cursor']['value']
 65 |     return next_cursor
 66 | 
 67 | 
 68 | def Json(response):
 69 |     logme.debug(__name__ + ':Json')
 70 |     json_response = loads(response)
 71 |     html = json_response["items_html"]
 72 |     soup = BeautifulSoup(html, "html.parser")
 73 |     feed = soup.find_all("div", "tweet")
 74 |     return feed, json_response["min_position"]
 75 | 
 76 | 
 77 | def parse_tweets(config, response):
 78 |     logme.debug(__name__ + ':parse_tweets')
 79 |     response = loads(response)
 80 |     if len(response['globalObjects']['tweets']) == 0:
 81 |         msg = 'No more data!'
 82 |         raise NoMoreTweetsException(msg)
 83 |     feed = []
 84 |     for timeline_entry in response['timeline']['instructions'][0]['addEntries']['entries']:
 85 |         # this will handle the cases when the timeline entry is a tweet
 86 |         if (config.TwitterSearch or config.Profile) and (timeline_entry['entryId'].startswith('sq-I-t-') or
 87 |                                                          timeline_entry['entryId'].startswith('tweet-')):
 88 |             if 'tweet' in timeline_entry['content']['item']['content']:
 89 |                 _id = timeline_entry['content']['item']['content']['tweet']['id']
 90 |                 # skip the ads
 91 |                 if 'promotedMetadata' in timeline_entry['content']['item']['content']['tweet']:
 92 |                     continue
 93 |             elif 'tombstone' in timeline_entry['content']['item']['content'] and 'tweet' in \
 94 |                     timeline_entry['content']['item']['content']['tombstone']:
 95 |                 _id = timeline_entry['content']['item']['content']['tombstone']['tweet']['id']
 96 |             else:
 97 |                 _id = None
 98 |             if _id is None:
 99 |                 raise ValueError('Unable to find ID of tweet in timeline.')
100 |             try:
101 |                 temp_obj = response['globalObjects']['tweets'][_id]
102 |             except KeyError:
103 |                 logme.info('encountered a deleted tweet with id {}'.format(_id))
104 | 
105 |                 config.deleted.append(_id)
106 |                 continue
107 |             temp_obj['user_data'] = response['globalObjects']['users'][temp_obj['user_id_str']]
108 |             if 'retweeted_status_id_str' in temp_obj:
109 |                 rt_id = temp_obj['retweeted_status_id_str']
110 |                 _dt = response['globalObjects']['tweets'][rt_id]['created_at']
111 |                 _dt = datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
112 |                 _dt = utc_to_local(_dt)
113 |                 _dt = str(_dt.strftime(Tweet_formats['datetime']))
114 |                 temp_obj['retweet_data'] = {
115 |                     'user_rt_id': response['globalObjects']['tweets'][rt_id]['user_id_str'],
116 |                     'user_rt': response['globalObjects']['tweets'][rt_id]['full_text'],
117 |                     'retweet_id': rt_id,
118 |                     'retweet_date': _dt,
119 |                 }
120 |             feed.append(temp_obj)
121 |     next_cursor = _get_cursor(response)
122 |     return feed, next_cursor
123 | 


--------------------------------------------------------------------------------
/twint/format.py:
--------------------------------------------------------------------------------
 1 | import logging as logme
 2 | 
 3 | def Tweet(config, t):
 4 |     if config.Format:
 5 |         logme.debug(__name__+':Tweet:Format')
 6 |         output = config.Format.replace("{id}", t.id_str)
 7 |         output = output.replace("{conversation_id}", t.conversation_id)
 8 |         output = output.replace("{date}", t.datestamp)
 9 |         output = output.replace("{time}", t.timestamp)
10 |         output = output.replace("{user_id}", t.user_id_str)
11 |         output = output.replace("{username}", t.username)
12 |         output = output.replace("{name}", t.name)
13 |         output = output.replace("{place}", t.place)
14 |         output = output.replace("{timezone}", t.timezone)
15 |         output = output.replace("{urls}", ",".join(t.urls))
16 |         output = output.replace("{photos}", ",".join(t.photos))
17 |         output = output.replace("{video}", str(t.video))
18 |         output = output.replace("{thumbnail}", t.thumbnail)
19 |         output = output.replace("{tweet}", t.tweet)
20 |         output = output.replace("{language}", t.lang)
21 |         output = output.replace("{hashtags}", ",".join(t.hashtags))
22 |         output = output.replace("{cashtags}", ",".join(t.cashtags))
23 |         output = output.replace("{replies}", t.replies_count)
24 |         output = output.replace("{retweets}", t.retweets_count)
25 |         output = output.replace("{likes}", t.likes_count)
26 |         output = output.replace("{link}", t.link)
27 |         output = output.replace("{is_retweet}", str(t.retweet))
28 |         output = output.replace("{user_rt_id}", str(t.user_rt_id))
29 |         output = output.replace("{quote_url}", t.quote_url)
30 |         output = output.replace("{near}", t.near)
31 |         output = output.replace("{geo}", t.geo)
32 |         output = output.replace("{mentions}", ",".join(t.mentions))
33 |         output = output.replace("{translate}", t.translate)
34 |         output = output.replace("{trans_src}", t.trans_src)
35 |         output = output.replace("{trans_dest}", t.trans_dest)
36 |     else:
37 |         logme.debug(__name__+':Tweet:notFormat')
38 |         output = f"{t.id_str} {t.datestamp} {t.timestamp} {t.timezone} "
39 | 
40 |         # TODO: someone who is familiar with this code, needs to take a look at what this is <also see tweet.py>
41 |         # if t.retweet:
42 |         #    output += "RT "
43 | 
44 |         output += f"<{t.username}> {t.tweet}"
45 | 
46 |         if config.Show_hashtags:
47 |             hashtags = ",".join(t.hashtags)
48 |             output += f" {hashtags}"
49 |         if config.Show_cashtags:
50 |             cashtags = ",".join(t.cashtags)
51 |             output += f" {cashtags}"
52 |         if config.Stats:
53 |             output += f" | {t.replies_count} replies {t.retweets_count} retweets {t.likes_count} likes"
54 |         if config.Translate:
55 |             output += f" {t.translate} {t.trans_src} {t.trans_dest}"
56 |     return output
57 | 
58 | def User(_format, u):
59 |     if _format:
60 |         logme.debug(__name__+':User:Format')
61 |         output = _format.replace("{id}", str(u.id))
62 |         output = output.replace("{name}", u.name)
63 |         output = output.replace("{username}", u.username)
64 |         output = output.replace("{bio}", u.bio)
65 |         output = output.replace("{location}", u.location)
66 |         output = output.replace("{url}", u.url)
67 |         output = output.replace("{join_date}", u.join_date)
68 |         output = output.replace("{join_time}", u.join_time)
69 |         output = output.replace("{tweets}", str(u.tweets))
70 |         output = output.replace("{following}", str(u.following))
71 |         output = output.replace("{followers}", str(u.followers))
72 |         output = output.replace("{likes}", str(u.likes))
73 |         output = output.replace("{media}", str(u.media_count))
74 |         output = output.replace("{private}", str(u.is_private))
75 |         output = output.replace("{verified}", str(u.is_verified))
76 |         output = output.replace("{avatar}", u.avatar)
77 |         if u.background_image:
78 |             output = output.replace("{background_image}", u.background_image)
79 |         else:
80 |             output = output.replace("{background_image}", "")
81 |     else:
82 |         logme.debug(__name__+':User:notFormat')
83 |         output = f"{u.id} | {u.name} | @{u.username} | Private: "
84 |         output += f"{u.is_private} | Verified: {u.is_verified} |"
85 |         output += f" Bio: {u.bio} | Location: {u.location} | Url: "
86 |         output += f"{u.url} | Joined: {u.join_date} {u.join_time} "
87 |         output += f"| Tweets: {u.tweets} | Following: {u.following}"
88 |         output += f" | Followers: {u.followers} | Likes: {u.likes} "
89 |         output += f"| Media: {u.media_count} | Avatar: {u.avatar}"
90 | 
91 |     return output
92 | 


--------------------------------------------------------------------------------
/twint/get.py:
--------------------------------------------------------------------------------
  1 | from async_timeout import timeout
  2 | from datetime import datetime
  3 | from bs4 import BeautifulSoup
  4 | import sys
  5 | import socket
  6 | import aiohttp
  7 | from fake_useragent import UserAgent
  8 | import asyncio
  9 | import concurrent.futures
 10 | import random
 11 | from json import loads, dumps
 12 | from aiohttp_socks import ProxyConnector, ProxyType
 13 | from urllib.parse import quote
 14 | 
 15 | from . import url
 16 | from .output import Tweets, Users
 17 | from .token import TokenExpiryException
 18 | 
 19 | import logging as logme
 20 | 
 21 | httpproxy = None
 22 | 
 23 | user_agent_list = [
 24 |     # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
 25 |     # ' Chrome/60.0.3112.113 Safari/537.36',
 26 |     # 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
 27 |     # ' Chrome/60.0.3112.90 Safari/537.36',
 28 |     # 'Mozilla/5.0 (Windows NT 5.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
 29 |     # ' Chrome/60.0.3112.90 Safari/537.36',
 30 |     # 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
 31 |     # ' Chrome/60.0.3112.90 Safari/537.36',
 32 |     # 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)'
 33 |     # ' Chrome/44.0.2403.157 Safari/537.36',
 34 |     # 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
 35 |     # ' Chrome/60.0.3112.113 Safari/537.36',
 36 |     # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
 37 |     # ' Chrome/57.0.2987.133 Safari/537.36',
 38 |     # 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
 39 |     # ' Chrome/57.0.2987.133 Safari/537.36',
 40 |     # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
 41 |     # ' Chrome/55.0.2883.87 Safari/537.36',
 42 |     # 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
 43 |     # ' Chrome/55.0.2883.87 Safari/537.36',
 44 | 
 45 |     'Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1)',
 46 |     'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
 47 |     'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
 48 |     'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
 49 |     'Mozilla/5.0 (Windows NT 6.2; WOW64; Trident/7.0; rv:11.0) like Gecko',
 50 |     'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
 51 |     'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)',
 52 |     'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
 53 |     'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
 54 |     'Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko',
 55 |     'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)',
 56 |     'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
 57 |     'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET '
 58 |     'CLR 3.5.30729)',
 59 | ]
 60 | 
 61 | 
 62 | # function to convert python `dict` to json and then encode it to be passed in the url as a parameter
 63 | # some urls require this format
 64 | def dict_to_url(dct):
 65 |     return quote(dumps(dct))
 66 | 
 67 | 
 68 | def get_connector(config):
 69 |     logme.debug(__name__ + ':get_connector')
 70 |     _connector = None
 71 |     if config.Proxy_host:
 72 |         if config.Proxy_host.lower() == "tor":
 73 |             _connector = ProxyConnector(
 74 |                 host='127.0.0.1',
 75 |                 port=9050,
 76 |                 rdns=True)
 77 |         elif config.Proxy_port and config.Proxy_type:
 78 |             if config.Proxy_type.lower() == "socks5":
 79 |                 _type = ProxyType.SOCKS5
 80 |             elif config.Proxy_type.lower() == "socks4":
 81 |                 _type = ProxyType.SOCKS4
 82 |             elif config.Proxy_type.lower() == "http":
 83 |                 global httpproxy
 84 |                 httpproxy = "http://" + config.Proxy_host + ":" + str(config.Proxy_port)
 85 |                 return _connector
 86 |             else:
 87 |                 logme.critical("get_connector:proxy-type-error")
 88 |                 print("Error: Proxy types allowed are: http, socks5 and socks4. No https.")
 89 |                 sys.exit(1)
 90 |             _connector = ProxyConnector(
 91 |                 proxy_type=_type,
 92 |                 host=config.Proxy_host,
 93 |                 port=config.Proxy_port,
 94 |                 rdns=True)
 95 |         else:
 96 |             logme.critical(__name__ + ':get_connector:proxy-port-type-error')
 97 |             print("Error: Please specify --proxy-host, --proxy-port, and --proxy-type")
 98 |             sys.exit(1)
 99 |     else:
100 |         if config.Proxy_port or config.Proxy_type:
101 |             logme.critical(__name__ + ':get_connector:proxy-host-arg-error')
102 |             print("Error: Please specify --proxy-host, --proxy-port, and --proxy-type")
103 |             sys.exit(1)
104 | 
105 |     return _connector
106 | 
107 | 
108 | async def RequestUrl(config, init):
109 |     logme.debug(__name__ + ':RequestUrl')
110 |     _connector = get_connector(config)
111 |     _serialQuery = ""
112 |     params = []
113 |     _url = ""
114 |     _headers = [("authorization", config.Bearer_token), ("x-guest-token", config.Guest_token)]
115 | 
116 |     # TODO : do this later
117 |     if config.Profile:
118 |         logme.debug(__name__ + ':RequestUrl:Profile')
119 |         _url, params, _serialQuery = url.SearchProfile(config, init)
120 |     elif config.TwitterSearch:
121 |         logme.debug(__name__ + ':RequestUrl:TwitterSearch')
122 |         _url, params, _serialQuery = await url.Search(config, init)
123 |     else:
124 |         if config.Following:
125 |             logme.debug(__name__ + ':RequestUrl:Following')
126 |             _url = await url.Following(config.Username, init)
127 |         elif config.Followers:
128 |             logme.debug(__name__ + ':RequestUrl:Followers')
129 |             _url = await url.Followers(config.Username, init)
130 |         else:
131 |             logme.debug(__name__ + ':RequestUrl:Favorites')
132 |             _url = await url.Favorites(config.Username, init)
133 |         _serialQuery = _url
134 | 
135 |     response = await Request(_url, params=params, connector=_connector, headers=_headers)
136 | 
137 |     if config.Debug:
138 |         print(_serialQuery, file=open("twint-request_urls.log", "a", encoding="utf-8"))
139 | 
140 |     return response
141 | 
142 | 
143 | def ForceNewTorIdentity(config):
144 |     logme.debug(__name__ + ':ForceNewTorIdentity')
145 |     try:
146 |         tor_c = socket.create_connection(('127.0.0.1', config.Tor_control_port))
147 |         tor_c.send('AUTHENTICATE "{}"\r\nSIGNAL NEWNYM\r\n'.format(config.Tor_control_password).encode())
148 |         response = tor_c.recv(1024)
149 |         if response != b'250 OK\r\n250 OK\r\n':
150 |             sys.stderr.write('Unexpected response from Tor control port: {}\n'.format(response))
151 |             logme.critical(__name__ + ':ForceNewTorIdentity:unexpectedResponse')
152 |     except Exception as e:
153 |         logme.debug(__name__ + ':ForceNewTorIdentity:errorConnectingTor')
154 |         sys.stderr.write('Error connecting to Tor control port: {}\n'.format(repr(e)))
155 |         sys.stderr.write('If you want to rotate Tor ports automatically - enable Tor control port\n')
156 | 
157 | 
158 | async def Request(_url, connector=None, params=None, headers=None):
159 |     logme.debug(__name__ + ':Request:Connector')
160 |     async with aiohttp.ClientSession(connector=connector, headers=headers) as session:
161 |         return await Response(session, _url, params)
162 | 
163 | 
164 | async def Response(session, _url, params=None):
165 |     logme.debug(__name__ + ':Response')
166 |     with timeout(120):
167 |         async with session.get(_url, ssl=True, params=params, proxy=httpproxy) as response:
168 |             resp = await response.text()
169 |             if response.status == 429:  # 429 implies Too many requests i.e. Rate Limit Exceeded
170 |                 raise TokenExpiryException(loads(resp)['errors'][0]['message'])
171 |             return resp
172 | 
173 | 
174 | async def RandomUserAgent(wa=None):
175 |     logme.debug(__name__ + ':RandomUserAgent')
176 |     try:
177 |         if wa:
178 |             return "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36"
179 |         return UserAgent(verify_ssl=False, use_cache_server=False).random
180 |     except:
181 |         return random.choice(user_agent_list)
182 | 
183 | 
184 | async def Username(_id, bearer_token, guest_token):
185 |     logme.debug(__name__ + ':Username')
186 |     _dct = {'userId': _id, 'withHighlightedLabel': False}
187 |     _url = "https://api.twitter.com/graphql/B9FuNQVmyx32rdbIPEZKag/UserByRestId?variables={}".format(dict_to_url(_dct))
188 |     _headers = {
189 |         'authorization': bearer_token,
190 |         'x-guest-token': guest_token,
191 |     }
192 |     r = await Request(_url, headers=_headers)
193 |     j_r = loads(r)
194 |     username = j_r['data']['user']['legacy']['screen_name']
195 |     return username
196 | 
197 | 
198 | async def Tweet(url, config, conn):
199 |     logme.debug(__name__ + ':Tweet')
200 |     try:
201 |         response = await Request(url)
202 |         soup = BeautifulSoup(response, "html.parser")
203 |         tweets = soup.find_all("div", "tweet")
204 |         await Tweets(tweets, config, conn, url)
205 |     except Exception as e:
206 |         logme.critical(__name__ + ':Tweet:' + str(e))
207 | 
208 | 
209 | async def User(username, config, conn, user_id=False):
210 |     logme.debug(__name__ + ':User')
211 |     _dct = {'screen_name': username, 'withHighlightedLabel': False}
212 |     _url = 'https://api.twitter.com/graphql/jMaTS-_Ea8vh9rpKggJbCQ/UserByScreenName?variables={}'\
213 |         .format(dict_to_url(_dct))
214 |     _headers = {
215 |         'authorization': config.Bearer_token,
216 |         'x-guest-token': config.Guest_token,
217 |     }
218 |     try:
219 |         response = await Request(_url, headers=_headers)
220 |         j_r = loads(response)
221 |         if user_id:
222 |             try:
223 |                 _id = j_r['data']['user']['rest_id']
224 |                 return _id
225 |             except KeyError as e:
226 |                 logme.critical(__name__ + ':User:' + str(e))
227 |                 return
228 |         await Users(j_r, config, conn)
229 |     except Exception as e:
230 |         logme.critical(__name__ + ':User:' + str(e))
231 |         raise
232 | 
233 | 
234 | def Limit(Limit, count):
235 |     logme.debug(__name__ + ':Limit')
236 |     if Limit is not None and count >= int(Limit):
237 |         return True
238 | 
239 | 
240 | async def Multi(feed, config, conn):
241 |     logme.debug(__name__ + ':Multi')
242 |     count = 0
243 |     try:
244 |         with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
245 |             loop = asyncio.get_event_loop()
246 |             futures = []
247 |             for tweet in feed:
248 |                 count += 1
249 |                 if config.Favorites or config.Profile_full:
250 |                     logme.debug(__name__ + ':Multi:Favorites-profileFull')
251 |                     link = tweet.find("a")["href"]
252 |                     url = f"https://twitter.com{link}&lang=en"
253 |                 elif config.User_full:
254 |                     logme.debug(__name__ + ':Multi:userFull')
255 |                     username = tweet.find("a")["name"]
256 |                     url = f"http://twitter.com/{username}?lang=en"
257 |                 else:
258 |                     logme.debug(__name__ + ':Multi:else-url')
259 |                     link = tweet.find("a", "tweet-timestamp js-permalink js-nav js-tooltip")["href"]
260 |                     url = f"https://twitter.com{link}?lang=en"
261 | 
262 |                 if config.User_full:
263 |                     logme.debug(__name__ + ':Multi:user-full-Run')
264 |                     futures.append(loop.run_in_executor(executor, await User(url,
265 |                                                                              config, conn)))
266 |                 else:
267 |                     logme.debug(__name__ + ':Multi:notUser-full-Run')
268 |                     futures.append(loop.run_in_executor(executor, await Tweet(url,
269 |                                                                               config, conn)))
270 |             logme.debug(__name__ + ':Multi:asyncioGather')
271 |             await asyncio.gather(*futures)
272 |     except Exception as e:
273 |         # TODO: fix error not error
274 |         # print(str(e) + " [x] get.Multi")
275 |         # will return "'NoneType' object is not callable"
276 |         # but still works
277 |         # logme.critical(__name__+':Multi:' + str(e))
278 |         pass
279 | 
280 |     return count
281 | 


--------------------------------------------------------------------------------
/twint/output.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | from . import format, get
  4 | from .tweet import Tweet
  5 | from .user import User
  6 | from .storage import db, elasticsearch, write, panda
  7 | 
  8 | import logging as logme
  9 | 
 10 | follows_list = []
 11 | tweets_list = []
 12 | users_list = []
 13 | 
 14 | author_list = {''}
 15 | author_list.pop()
 16 | 
 17 | # used by Pandas
 18 | _follows_object = {}
 19 | 
 20 | 
 21 | def _formatDateTime(datetimestamp):
 22 |     try:
 23 |         return int(datetime.strptime(datetimestamp, "%Y-%m-%d %H:%M:%S").timestamp())
 24 |     except ValueError:
 25 |         return int(datetime.strptime(datetimestamp, "%Y-%m-%d").timestamp())
 26 | 
 27 | 
 28 | def _clean_follow_list():
 29 |     logme.debug(__name__ + ':clean_follow_list')
 30 |     global _follows_object
 31 |     _follows_object = {}
 32 | 
 33 | 
 34 | def clean_lists():
 35 |     logme.debug(__name__ + ':clean_lists')
 36 |     global follows_list
 37 |     global tweets_list
 38 |     global users_list
 39 |     follows_list = []
 40 |     tweets_list = []
 41 |     users_list = []
 42 | 
 43 | 
 44 | def datecheck(datetimestamp, config):
 45 |     logme.debug(__name__ + ':datecheck')
 46 |     if config.Since:
 47 |         logme.debug(__name__ + ':datecheck:SinceTrue')
 48 | 
 49 |         d = _formatDateTime(datetimestamp)
 50 |         s = _formatDateTime(config.Since)
 51 | 
 52 |         if d < s:
 53 |             return False
 54 |     if config.Until:
 55 |         logme.debug(__name__ + ':datecheck:UntilTrue')
 56 | 
 57 |         d = _formatDateTime(datetimestamp)
 58 |         s = _formatDateTime(config.Until)
 59 | 
 60 |         if d > s:
 61 |             return False
 62 |     logme.debug(__name__ + ':datecheck:dateRangeFalse')
 63 |     return True
 64 | 
 65 | 
 66 | # TODO In this method we need to delete the quoted tweets, because twitter also sends the quoted tweets in the
 67 | #  `tweets` list along with the other tweets
 68 | def is_tweet(tw):
 69 |     try:
 70 |         tw["data-item-id"]
 71 |         logme.debug(__name__ + ':is_tweet:True')
 72 |         return True
 73 |     except:
 74 |         logme.critical(__name__ + ':is_tweet:False')
 75 |         return False
 76 | 
 77 | 
 78 | def _output(obj, output, config, **extra):
 79 |     logme.debug(__name__ + ':_output')
 80 |     if config.Lowercase:
 81 |         if isinstance(obj, str):
 82 |             logme.debug(__name__ + ':_output:Lowercase:username')
 83 |             obj = obj.lower()
 84 |         elif obj.__class__.__name__ == "user":
 85 |             logme.debug(__name__ + ':_output:Lowercase:user')
 86 |             pass
 87 |         elif obj.__class__.__name__ == "tweet":
 88 |             logme.debug(__name__ + ':_output:Lowercase:tweet')
 89 |             obj.username = obj.username.lower()
 90 |             author_list.update({obj.username})
 91 |             for dct in obj.mentions:
 92 |                 for key, val in dct.items():
 93 |                     dct[key] = val.lower()
 94 |             for i in range(len(obj.hashtags)):
 95 |                 obj.hashtags[i] = obj.hashtags[i].lower()
 96 |             for i in range(len(obj.cashtags)):
 97 |                 obj.cashtags[i] = obj.cashtags[i].lower()
 98 |         else:
 99 |             logme.info('_output:Lowercase:hiddenTweetFound')
100 |             print("[x] Hidden tweet found, account suspended due to violation of TOS")
101 |             return
102 |     if config.Output != None:
103 |         if config.Store_csv:
104 |             try:
105 |                 write.Csv(obj, config)
106 |                 logme.debug(__name__ + ':_output:CSV')
107 |             except Exception as e:
108 |                 logme.critical(__name__ + ':_output:CSV:Error:' + str(e))
109 |                 print(str(e) + " [x] output._output")
110 |         elif config.Store_json:
111 |             write.Json(obj, config)
112 |             logme.debug(__name__ + ':_output:JSON')
113 |         else:
114 |             write.Text(output, config.Output)
115 |             logme.debug(__name__ + ':_output:Text')
116 | 
117 |     if config.Elasticsearch:
118 |         logme.debug(__name__ + ':_output:Elasticsearch')
119 |         print("", end=".", flush=True)
120 |     else:
121 |         if not config.Hide_output:
122 |             try:
123 |                 print(output.replace('\n', ' '))
124 |             except UnicodeEncodeError:
125 |                 logme.critical(__name__ + ':_output:UnicodeEncodeError')
126 |                 print("unicode error [x] output._output")
127 | 
128 | 
129 | async def checkData(tweet, config, conn):
130 |     logme.debug(__name__ + ':checkData')
131 |     tweet = Tweet(tweet, config)
132 |     if not tweet.datestamp:
133 |         logme.critical(__name__ + ':checkData:hiddenTweetFound')
134 |         print("[x] Hidden tweet found, account suspended due to violation of TOS")
135 |         return
136 |     if datecheck(tweet.datestamp + " " + tweet.timestamp, config):
137 |         output = format.Tweet(config, tweet)
138 |         if config.Database:
139 |             logme.debug(__name__ + ':checkData:Database')
140 |             db.tweets(conn, tweet, config)
141 |         if config.Pandas:
142 |             logme.debug(__name__ + ':checkData:Pandas')
143 |             panda.update(tweet, config)
144 |         if config.Store_object:
145 |             logme.debug(__name__ + ':checkData:Store_object')
146 |             if hasattr(config.Store_object_tweets_list, 'append'):
147 |                 config.Store_object_tweets_list.append(tweet)
148 |             else:
149 |                 tweets_list.append(tweet)
150 |         if config.Elasticsearch:
151 |             logme.debug(__name__ + ':checkData:Elasticsearch')
152 |             elasticsearch.Tweet(tweet, config)
153 |         _output(tweet, output, config)
154 |     # else:
155 |     #     logme.critical(__name__+':checkData:copyrightedTweet')
156 | 
157 | 
158 | async def Tweets(tweets, config, conn):
159 |     logme.debug(__name__ + ':Tweets')
160 |     if config.Favorites or config.Location:
161 |         logme.debug(__name__ + ':Tweets:fav+full+loc')
162 |         for tw in tweets:
163 |             await checkData(tw, config, conn)
164 |     elif config.TwitterSearch or config.Profile:
165 |         logme.debug(__name__ + ':Tweets:TwitterSearch')
166 |         await checkData(tweets, config, conn)
167 |     else:
168 |         logme.debug(__name__ + ':Tweets:else')
169 |         if int(tweets["data-user-id"]) == config.User_id or config.Retweets:
170 |             await checkData(tweets, config, conn)
171 | 
172 | 
173 | async def Users(u, config, conn):
174 |     logme.debug(__name__ + ':User')
175 |     global users_list
176 | 
177 |     user = User(u)
178 |     output = format.User(config.Format, user)
179 | 
180 |     if config.Database:
181 |         logme.debug(__name__ + ':User:Database')
182 |         db.user(conn, config, user)
183 | 
184 |     if config.Elasticsearch:
185 |         logme.debug(__name__ + ':User:Elasticsearch')
186 |         _save_date = user.join_date
187 |         _save_time = user.join_time
188 |         user.join_date = str(datetime.strptime(user.join_date, "%d %b %Y")).split()[0]
189 |         user.join_time = str(datetime.strptime(user.join_time, "%I:%M %p")).split()[1]
190 |         elasticsearch.UserProfile(user, config)
191 |         user.join_date = _save_date
192 |         user.join_time = _save_time
193 | 
194 |     if config.Store_object:
195 |         logme.debug(__name__ + ':User:Store_object')
196 | 
197 |         if hasattr(config.Store_object_follow_list, 'append'):
198 |             config.Store_object_follow_list.append(user)
199 |         elif hasattr(config.Store_object_users_list, 'append'):
200 |             config.Store_object_users_list.append(user)
201 |         else:
202 |             users_list.append(user)  # twint.user.user
203 | 
204 |     if config.Pandas:
205 |         logme.debug(__name__ + ':User:Pandas+user')
206 |         panda.update(user, config)
207 | 
208 |     _output(user, output, config)
209 | 
210 | 
211 | async def Username(username, config, conn):
212 |     logme.debug(__name__ + ':Username')
213 |     global _follows_object
214 |     global follows_list
215 |     follow_var = config.Following * "following" + config.Followers * "followers"
216 | 
217 |     if config.Database:
218 |         logme.debug(__name__ + ':Username:Database')
219 |         db.follow(conn, config.Username, config.Followers, username)
220 | 
221 |     if config.Elasticsearch:
222 |         logme.debug(__name__ + ':Username:Elasticsearch')
223 |         elasticsearch.Follow(username, config)
224 | 
225 |     if config.Store_object:
226 |         if hasattr(config.Store_object_follow_list, 'append'):
227 |             config.Store_object_follow_list.append(username)
228 |         else:
229 |             follows_list.append(username)  # twint.user.user
230 | 
231 |     if config.Pandas:
232 |         logme.debug(__name__ + ':Username:object+pandas')
233 |         try:
234 |             _ = _follows_object[config.Username][follow_var]
235 |         except KeyError:
236 |             _follows_object.update({config.Username: {follow_var: []}})
237 |         _follows_object[config.Username][follow_var].append(username)
238 |         if config.Pandas_au:
239 |             logme.debug(__name__ + ':Username:object+pandas+au')
240 |             panda.update(_follows_object[config.Username], config)
241 |     _output(username, username, config)
242 | 


--------------------------------------------------------------------------------
/twint/run.py:
--------------------------------------------------------------------------------
  1 | import sys, os, datetime
  2 | from asyncio import get_event_loop, TimeoutError, ensure_future, new_event_loop, set_event_loop
  3 | 
  4 | from . import datelock, feed, get, output, verbose, storage
  5 | from .token import TokenExpiryException
  6 | from . import token
  7 | from .storage import db
  8 | from .feed import NoMoreTweetsException
  9 | 
 10 | import logging as logme
 11 | 
 12 | import time
 13 | 
 14 | bearer = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs' \
 15 |          '%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
 16 | 
 17 | 
 18 | class Twint:
 19 |     def __init__(self, config):
 20 |         logme.debug(__name__ + ':Twint:__init__')
 21 |         if config.Resume is not None and (config.TwitterSearch or config.Followers or config.Following):
 22 |             logme.debug(__name__ + ':Twint:__init__:Resume')
 23 |             self.init = self.get_resume(config.Resume)
 24 |         else:
 25 |             self.init = -1
 26 | 
 27 |         config.deleted = []
 28 |         self.feed: list = [-1]
 29 |         self.count = 0
 30 |         self.user_agent = ""
 31 |         self.config = config
 32 |         self.config.Bearer_token = bearer
 33 |         # TODO might have to make some adjustments for it to work with multi-treading
 34 |         # USAGE : to get a new guest token simply do `self.token.refresh()`
 35 |         self.token = token.Token(config)
 36 |         self.token.refresh()
 37 |         self.conn = db.Conn(config.Database)
 38 |         self.d = datelock.Set(self.config.Until, self.config.Since)
 39 |         verbose.Elastic(config.Elasticsearch)
 40 | 
 41 |         if self.config.Store_object:
 42 |             logme.debug(__name__ + ':Twint:__init__:clean_follow_list')
 43 |             output._clean_follow_list()
 44 | 
 45 |         if self.config.Pandas_clean:
 46 |             logme.debug(__name__ + ':Twint:__init__:pandas_clean')
 47 |             storage.panda.clean()
 48 | 
 49 |     def get_resume(self, resumeFile):
 50 |         if not os.path.exists(resumeFile):
 51 |             return '-1'
 52 |         with open(resumeFile, 'r') as rFile:
 53 |             _init = rFile.readlines()[-1].strip('\n')
 54 |             return _init
 55 | 
 56 |     async def Feed(self):
 57 |         logme.debug(__name__ + ':Twint:Feed')
 58 |         consecutive_errors_count = 0
 59 |         while True:
 60 |             # this will receive a JSON string, parse it into a `dict` and do the required stuff
 61 |             try:
 62 |                 response = await get.RequestUrl(self.config, self.init)
 63 |             except TokenExpiryException as e:
 64 |                 logme.debug(__name__ + 'Twint:Feed:' + str(e))
 65 |                 self.token.refresh()
 66 |                 response = await get.RequestUrl(self.config, self.init)
 67 | 
 68 |             if self.config.Debug:
 69 |                 print(response, file=open("twint-last-request.log", "w", encoding="utf-8"))
 70 | 
 71 |             self.feed = []
 72 |             try:
 73 |                 if self.config.Favorites:
 74 |                     self.feed, self.init = feed.MobileFav(response)
 75 |                     favorite_err_cnt = 0
 76 |                     if len(self.feed) == 0 and len(self.init) == 0:
 77 |                         while (len(self.feed) == 0 or len(self.init) == 0) and favorite_err_cnt < 5:
 78 |                             self.user_agent = await get.RandomUserAgent(wa=False)
 79 |                             response = await get.RequestUrl(self.config, self.init,
 80 |                                                             headers=[("User-Agent", self.user_agent)])
 81 |                             self.feed, self.init = feed.MobileFav(response)
 82 |                             favorite_err_cnt += 1
 83 |                             time.sleep(1)
 84 |                         if favorite_err_cnt == 5:
 85 |                             print("Favorite page could not be fetched")
 86 |                     if not self.count % 40:
 87 |                         time.sleep(5)
 88 |                 elif self.config.Followers or self.config.Following:
 89 |                     self.feed, self.init = feed.Follow(response)
 90 |                     if not self.count % 40:
 91 |                         time.sleep(5)
 92 |                 elif self.config.Profile or self.config.TwitterSearch:
 93 |                     try:
 94 |                         self.feed, self.init = feed.parse_tweets(self.config, response)
 95 |                     except NoMoreTweetsException as e:
 96 |                         logme.debug(__name__ + ':Twint:Feed:' + str(e))
 97 |                         print('[!] ' + str(e) + ' Scraping will stop now.')
 98 |                         print('found {} deleted tweets in this search.'.format(len(self.config.deleted)))
 99 |                         break
100 |                 break
101 |             except TimeoutError as e:
102 |                 if self.config.Proxy_host.lower() == "tor":
103 |                     print("[?] Timed out, changing Tor identity...")
104 |                     if self.config.Tor_control_password is None:
105 |                         logme.critical(__name__ + ':Twint:Feed:tor-password')
106 |                         sys.stderr.write("Error: config.Tor_control_password must be set for proxy auto-rotation!\r\n")
107 |                         sys.stderr.write(
108 |                             "Info: What is it? See https://stem.torproject.org/faq.html#can-i-interact-with-tors"
109 |                             "-controller-interface-directly\r\n")
110 |                         break
111 |                     else:
112 |                         get.ForceNewTorIdentity(self.config)
113 |                         continue
114 |                 else:
115 |                     logme.critical(__name__ + ':Twint:Feed:' + str(e))
116 |                     print(str(e))
117 |                     break
118 |             except Exception as e:
119 |                 if self.config.Profile or self.config.Favorites:
120 |                     print("[!] Twitter does not return more data, scrape stops here.")
121 |                     break
122 | 
123 |                 logme.critical(__name__ + ':Twint:Feed:noData' + str(e))
124 |                 # Sometimes Twitter says there is no data. But it's a lie.
125 |                 # raise
126 |                 consecutive_errors_count += 1
127 |                 if consecutive_errors_count < self.config.Retries_count:
128 |                     # skip to the next iteration if wait time does not satisfy limit constraints
129 |                     delay = round(consecutive_errors_count ** self.config.Backoff_exponent, 1)
130 | 
131 |                     # if the delay is less than users set min wait time then replace delay
132 |                     if self.config.Min_wait_time > delay:
133 |                         delay = self.config.Min_wait_time
134 | 
135 |                     sys.stderr.write('sleeping for {} secs\n'.format(delay))
136 |                     time.sleep(delay)
137 |                     self.user_agent = await get.RandomUserAgent(wa=True)
138 |                     continue
139 |                 logme.critical(__name__ + ':Twint:Feed:Tweets_known_error:' + str(e))
140 |                 sys.stderr.write(str(e) + " [x] run.Feed")
141 |                 sys.stderr.write(
142 |                     "[!] if you get this error but you know for sure that more tweets exist, please open an issue and "
143 |                     "we will investigate it!")
144 |                 break
145 |         if self.config.Resume:
146 |             print(self.init, file=open(self.config.Resume, "a", encoding="utf-8"))
147 | 
148 |     async def follow(self):
149 |         await self.Feed()
150 |         if self.config.User_full:
151 |             logme.debug(__name__ + ':Twint:follow:userFull')
152 |             self.count += await get.Multi(self.feed, self.config, self.conn)
153 |         else:
154 |             logme.debug(__name__ + ':Twint:follow:notUserFull')
155 |             for user in self.feed:
156 |                 self.count += 1
157 |                 username = user.find("a")["name"]
158 |                 await output.Username(username, self.config, self.conn)
159 | 
160 |     async def favorite(self):
161 |         logme.debug(__name__ + ':Twint:favorite')
162 |         await self.Feed()
163 |         favorited_tweets_list = []
164 |         for tweet in self.feed:
165 |             tweet_dict = {}
166 |             self.count += 1
167 |             try:
168 |                 tweet_dict['data-item-id'] = tweet.find("div", {"class": "tweet-text"})['data-id']
169 |                 t_url = tweet.find("span", {"class": "metadata"}).find("a")["href"]
170 |                 tweet_dict['data-conversation-id'] = t_url.split('?')[0].split('/')[-1]
171 |                 tweet_dict['username'] = tweet.find("div", {"class": "username"}).text.replace('\n', '').replace(' ',
172 |                                                                                                                  '')
173 |                 tweet_dict['tweet'] = tweet.find("div", {"class": "tweet-text"}).find("div", {"class": "dir-ltr"}).text
174 |                 date_str = tweet.find("td", {"class": "timestamp"}).find("a").text
175 |                 # test_dates = ["1m", "2h", "Jun 21, 2019", "Mar 12", "28 Jun 19"]
176 |                 # date_str = test_dates[3]
177 |                 if len(date_str) <= 3 and (date_str[-1] == "m" or date_str[-1] == "h"):  # 25m 1h
178 |                     dateu = str(datetime.date.today())
179 |                     tweet_dict['date'] = dateu
180 |                 elif ',' in date_str:  # Aug 21, 2019
181 |                     sp = date_str.replace(',', '').split(' ')
182 |                     date_str_formatted = sp[1] + ' ' + sp[0] + ' ' + sp[2]
183 |                     dateu = datetime.datetime.strptime(date_str_formatted, "%d %b %Y").strftime("%Y-%m-%d")
184 |                     tweet_dict['date'] = dateu
185 |                 elif len(date_str.split(' ')) == 3:  # 28 Jun 19
186 |                     sp = date_str.split(' ')
187 |                     if len(sp[2]) == 2:
188 |                         sp[2] = '20' + sp[2]
189 |                     date_str_formatted = sp[0] + ' ' + sp[1] + ' ' + sp[2]
190 |                     dateu = datetime.datetime.strptime(date_str_formatted, "%d %b %Y").strftime("%Y-%m-%d")
191 |                     tweet_dict['date'] = dateu
192 |                 else:  # Aug 21
193 |                     sp = date_str.split(' ')
194 |                     date_str_formatted = sp[1] + ' ' + sp[0] + ' ' + str(datetime.date.today().year)
195 |                     dateu = datetime.datetime.strptime(date_str_formatted, "%d %b %Y").strftime("%Y-%m-%d")
196 |                     tweet_dict['date'] = dateu
197 | 
198 |                 favorited_tweets_list.append(tweet_dict)
199 | 
200 |             except Exception as e:
201 |                 logme.critical(__name__ + ':Twint:favorite:favorite_field_lack')
202 |                 print("shit: ", date_str, " ", str(e))
203 | 
204 |         try:
205 |             self.config.favorited_tweets_list += favorited_tweets_list
206 |         except AttributeError:
207 |             self.config.favorited_tweets_list = favorited_tweets_list
208 | 
209 |     async def profile(self):
210 |         await self.Feed()
211 |         logme.debug(__name__ + ':Twint:profile')
212 |         for tweet in self.feed:
213 |             self.count += 1
214 |             await output.Tweets(tweet, self.config, self.conn)
215 | 
216 |     async def tweets(self):
217 |         await self.Feed()
218 |         # TODO : need to take care of this later
219 |         if self.config.Location:
220 |             logme.debug(__name__ + ':Twint:tweets:location')
221 |             self.count += await get.Multi(self.feed, self.config, self.conn)
222 |         else:
223 |             logme.debug(__name__ + ':Twint:tweets:notLocation')
224 |             for tweet in self.feed:
225 |                 self.count += 1
226 |                 await output.Tweets(tweet, self.config, self.conn)
227 | 
228 |     async def main(self, callback=None):
229 | 
230 |         task = ensure_future(self.run())  # Might be changed to create_task in 3.7+.
231 | 
232 |         if callback:
233 |             task.add_done_callback(callback)
234 | 
235 |         await task
236 | 
237 |     async def run(self):
238 |         if self.config.TwitterSearch:
239 |             self.user_agent = await get.RandomUserAgent(wa=True)
240 |         else:
241 |             self.user_agent = await get.RandomUserAgent()
242 | 
243 |         if self.config.User_id is not None and self.config.Username is None:
244 |             logme.debug(__name__ + ':Twint:main:user_id')
245 |             self.config.Username = await get.Username(self.config.User_id, self.config.Bearer_token,
246 |                                                       self.config.Guest_token)
247 | 
248 |         if self.config.Username is not None and self.config.User_id is None:
249 |             logme.debug(__name__ + ':Twint:main:username')
250 | 
251 |             self.config.User_id = await get.User(self.config.Username, self.config, self.conn, True)
252 |             if self.config.User_id is None:
253 |                 raise ValueError("Cannot find twitter account with name = " + self.config.Username)
254 | 
255 |         # TODO : will need to modify it to work with the new endpoints
256 |         if self.config.TwitterSearch and self.config.Since and self.config.Until:
257 |             logme.debug(__name__ + ':Twint:main:search+since+until')
258 |             while self.d.since < self.d.until:
259 |                 self.config.Since = datetime.datetime.strftime(self.d.since, "%Y-%m-%d %H:%M:%S")
260 |                 self.config.Until = datetime.datetime.strftime(self.d.until, "%Y-%m-%d %H:%M:%S")
261 |                 if len(self.feed) > 0:
262 |                     await self.tweets()
263 |                 else:
264 |                     logme.debug(__name__ + ':Twint:main:gettingNewTweets')
265 |                     break
266 | 
267 |                 if get.Limit(self.config.Limit, self.count):
268 |                     break
269 |         elif self.config.Lookup:
270 |             await self.Lookup()
271 |         else:
272 |             logme.debug(__name__ + ':Twint:main:not-search+since+until')
273 |             while True:
274 |                 if len(self.feed) > 0:
275 |                     if self.config.Followers or self.config.Following:
276 |                         logme.debug(__name__ + ':Twint:main:follow')
277 |                         await self.follow()
278 |                     elif self.config.Favorites:
279 |                         logme.debug(__name__ + ':Twint:main:favorites')
280 |                         await self.favorite()
281 |                     elif self.config.Profile:
282 |                         logme.debug(__name__ + ':Twint:main:profile')
283 |                         await self.profile()
284 |                     elif self.config.TwitterSearch:
285 |                         logme.debug(__name__ + ':Twint:main:twitter-search')
286 |                         await self.tweets()
287 |                 else:
288 |                     logme.debug(__name__ + ':Twint:main:no-more-tweets')
289 |                     break
290 | 
291 |                 # logging.info("[<] " + str(datetime.now()) + ':: run+Twint+main+CallingGetLimit2')
292 |                 if get.Limit(self.config.Limit, self.count):
293 |                     logme.debug(__name__ + ':Twint:main:reachedLimit')
294 |                     break
295 | 
296 |         if self.config.Count:
297 |             verbose.Count(self.count, self.config)
298 | 
299 |     async def Lookup(self):
300 |         logme.debug(__name__ + ':Twint:Lookup')
301 | 
302 |         try:
303 |             if self.config.User_id is not None and self.config.Username is None:
304 |                 logme.debug(__name__ + ':Twint:Lookup:user_id')
305 |                 self.config.Username = await get.Username(self.config.User_id, self.config.Bearer_token,
306 |                                                           self.config.Guest_token)
307 |             await get.User(self.config.Username, self.config, db.Conn(self.config.Database))
308 | 
309 |         except Exception as e:
310 |             logme.exception(__name__ + ':Twint:Lookup:Unexpected exception occurred.')
311 |             raise
312 | 
313 | 
314 | def run(config, callback=None):
315 |     logme.debug(__name__ + ':run')
316 |     try:
317 |         get_event_loop()
318 |     except RuntimeError as e:
319 |         if "no current event loop" in str(e):
320 |             set_event_loop(new_event_loop())
321 |         else:
322 |             logme.exception(__name__ + ':run:Unexpected exception while handling an expected RuntimeError.')
323 |             raise
324 |     except Exception as e:
325 |         logme.exception(
326 |             __name__ + ':run:Unexpected exception occurred while attempting to get or create a new event loop.')
327 |         raise
328 | 
329 |     get_event_loop().run_until_complete(Twint(config).main(callback))
330 | 
331 | 
332 | def Favorites(config):
333 |     logme.debug(__name__ + ':Favorites')
334 |     config.Favorites = True
335 |     config.Following = False
336 |     config.Followers = False
337 |     config.Profile = False
338 |     config.TwitterSearch = False
339 |     run(config)
340 |     if config.Pandas_au:
341 |         storage.panda._autoget("tweet")
342 | 
343 | 
344 | def Followers(config):
345 |     logme.debug(__name__ + ':Followers')
346 |     config.Followers = True
347 |     config.Following = False
348 |     config.Profile = False
349 |     config.Favorites = False
350 |     config.TwitterSearch = False
351 |     run(config)
352 |     if config.Pandas_au:
353 |         storage.panda._autoget("followers")
354 |         if config.User_full:
355 |             storage.panda._autoget("user")
356 |     if config.Pandas_clean and not config.Store_object:
357 |         # storage.panda.clean()
358 |         output._clean_follow_list()
359 | 
360 | 
361 | def Following(config):
362 |     logme.debug(__name__ + ':Following')
363 |     config.Following = True
364 |     config.Followers = False
365 |     config.Profile = False
366 |     config.Favorites = False
367 |     config.TwitterSearch = False
368 |     run(config)
369 |     if config.Pandas_au:
370 |         storage.panda._autoget("following")
371 |         if config.User_full:
372 |             storage.panda._autoget("user")
373 |     if config.Pandas_clean and not config.Store_object:
374 |         # storage.panda.clean()
375 |         output._clean_follow_list()
376 | 
377 | 
378 | def Lookup(config):
379 |     logme.debug(__name__ + ':Lookup')
380 |     config.Profile = False
381 |     config.Lookup = True
382 |     config.Favorites = False
383 |     config.FOllowing = False
384 |     config.Followers = False
385 |     config.TwitterSearch = False
386 |     run(config)
387 |     if config.Pandas_au:
388 |         storage.panda._autoget("user")
389 | 
390 | 
391 | def Profile(config):
392 |     logme.debug(__name__ + ':Profile')
393 |     config.Profile = True
394 |     config.Favorites = False
395 |     config.Following = False
396 |     config.Followers = False
397 |     config.TwitterSearch = False
398 |     run(config)
399 |     if config.Pandas_au:
400 |         storage.panda._autoget("tweet")
401 | 
402 | 
403 | def Search(config, callback=None):
404 |     logme.debug(__name__ + ':Search')
405 |     config.TwitterSearch = True
406 |     config.Favorites = False
407 |     config.Following = False
408 |     config.Followers = False
409 |     config.Profile = False
410 |     run(config, callback)
411 |     if config.Pandas_au:
412 |         storage.panda._autoget("tweet")
413 | 


--------------------------------------------------------------------------------
/twint/storage/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twintproject/twint/e7c8a0c764f6879188e5c21e25fb6f1f856a7221/twint/storage/__init__.py


--------------------------------------------------------------------------------
/twint/storage/db.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | import sys
  3 | import time
  4 | import hashlib
  5 | 
  6 | from datetime import datetime
  7 | 
  8 | def Conn(database):
  9 |     if database:
 10 |         print("[+] Inserting into Database: " + str(database))
 11 |         conn = init(database)
 12 |         if isinstance(conn, str): # error
 13 |             print(conn)
 14 |             sys.exit(1)
 15 |     else:
 16 |         conn = ""
 17 | 
 18 |     return conn
 19 | 
 20 | def init(db):
 21 |     try:
 22 |         conn = sqlite3.connect(db)
 23 |         cursor = conn.cursor()
 24 | 
 25 |         table_users = """
 26 |             CREATE TABLE IF NOT EXISTS
 27 |                 users(
 28 |                     id integer not null,
 29 |                     id_str text not null,
 30 |                     name text,
 31 |                     username text not null,
 32 |                     bio text,
 33 |                     location text,
 34 |                     url text,
 35 |                     join_date text not null,
 36 |                     join_time text not null,
 37 |                     tweets integer,
 38 |                     following integer,
 39 |                     followers integer,
 40 |                     likes integer,
 41 |                     media integer,
 42 |                     private integer not null,
 43 |                     verified integer not null,
 44 |                     profile_image_url text not null,
 45 |                     background_image text,
 46 |                     hex_dig  text not null,
 47 |                     time_update integer not null,
 48 |                     CONSTRAINT users_pk PRIMARY KEY (id, hex_dig)
 49 |                 );
 50 |             """
 51 |         cursor.execute(table_users)
 52 | 
 53 |         table_tweets = """
 54 |             CREATE TABLE IF NOT EXISTS
 55 |                 tweets (
 56 |                     id integer not null,
 57 |                     id_str text not null,
 58 |                     tweet text default '',
 59 |                     language text default '',
 60 |                     conversation_id text not null,
 61 |                     created_at integer not null,
 62 |                     date text not null,
 63 |                     time text not null,
 64 |                     timezone text not null,
 65 |                     place text default '',
 66 |                     replies_count integer,
 67 |                     likes_count integer,
 68 |                     retweets_count integer,
 69 |                     user_id integer not null,
 70 |                     user_id_str text not null,
 71 |                     screen_name text not null,
 72 |                     name text default '',
 73 |                     link text,
 74 |                     mentions text,
 75 |                     hashtags text,
 76 |                     cashtags text,
 77 |                     urls text,
 78 |                     photos text,
 79 |                     thumbnail text,
 80 |                     quote_url text,
 81 |                     video integer,
 82 |                     geo text,
 83 |                     near text,
 84 |                     source text,
 85 |                     time_update integer not null,
 86 |                     `translate` text default '',
 87 |                     trans_src text default '',
 88 |                     trans_dest text default '',
 89 |                     PRIMARY KEY (id)
 90 |                 );
 91 |         """
 92 |         cursor.execute(table_tweets)
 93 | 
 94 |         table_retweets = """
 95 |             CREATE TABLE IF NOT EXISTS
 96 |                 retweets(
 97 |                     user_id integer not null,
 98 |                     username text not null,
 99 |                     tweet_id integer not null,
100 |                     retweet_id integer not null,
101 |                     retweet_date integer,
102 |                     CONSTRAINT retweets_pk PRIMARY KEY(user_id, tweet_id),
103 |                     CONSTRAINT user_id_fk FOREIGN KEY(user_id) REFERENCES users(id),
104 |                     CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
105 |                 );
106 |         """
107 |         cursor.execute(table_retweets)
108 | 
109 |         table_reply_to = """
110 |             CREATE TABLE IF NOT EXISTS
111 |                 replies(
112 |                     tweet_id integer not null,
113 |                     user_id integer not null,
114 |                     username text not null,
115 |                     CONSTRAINT replies_pk PRIMARY KEY (user_id, tweet_id),
116 |                     CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id)
117 |                 );
118 |         """
119 |         cursor.execute(table_reply_to)
120 | 
121 |         table_favorites =  """
122 |             CREATE TABLE IF NOT EXISTS
123 |                 favorites(
124 |                     user_id integer not null,
125 |                     tweet_id integer not null,
126 |                     CONSTRAINT favorites_pk PRIMARY KEY (user_id, tweet_id),
127 |                     CONSTRAINT user_id_fk FOREIGN KEY (user_id) REFERENCES users(id),
128 |                     CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id)
129 |                 );
130 |         """
131 |         cursor.execute(table_favorites)
132 | 
133 |         table_followers = """
134 |             CREATE TABLE IF NOT EXISTS
135 |                 followers (
136 |                     id integer not null,
137 |                     follower_id integer not null,
138 |                     CONSTRAINT followers_pk PRIMARY KEY (id, follower_id),
139 |                     CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id),
140 |                     CONSTRAINT follower_id_fk FOREIGN KEY(follower_id) REFERENCES users(id)
141 |                 );
142 |         """
143 |         cursor.execute(table_followers)
144 | 
145 |         table_following = """
146 |             CREATE TABLE IF NOT EXISTS
147 |                 following (
148 |                     id integer not null,
149 |                     following_id integer not null,
150 |                     CONSTRAINT following_pk PRIMARY KEY (id, following_id),
151 |                     CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id),
152 |                     CONSTRAINT following_id_fk FOREIGN KEY(following_id) REFERENCES users(id)
153 |                 );
154 |         """
155 |         cursor.execute(table_following)
156 | 
157 |         table_followers_names = """
158 |             CREATE TABLE IF NOT EXISTS
159 |                 followers_names (
160 |                     user text not null,
161 |                     time_update integer not null,
162 |                     follower text not null,
163 |                     PRIMARY KEY (user, follower)
164 |                 );
165 |         """
166 |         cursor.execute(table_followers_names)
167 | 
168 |         table_following_names = """
169 |             CREATE TABLE IF NOT EXISTS
170 |                 following_names (
171 |                     user text not null,
172 |                     time_update integer not null,
173 |                     follows text not null,
174 |                     PRIMARY KEY (user, follows)
175 |                 );
176 |         """
177 |         cursor.execute(table_following_names)
178 | 
179 |         return conn
180 |     except Exception as e:
181 |         return str(e)
182 | 
183 | def fTable(Followers):
184 |     if Followers:
185 |         table = "followers_names"
186 |     else:
187 |         table = "following_names"
188 | 
189 |     return table
190 | 
191 | def uTable(Followers):
192 |     if Followers:
193 |         table = "followers"
194 |     else:
195 |         table = "following"
196 | 
197 |     return table
198 | 
199 | def follow(conn, Username, Followers, User):
200 |     try:
201 |         time_ms = round(time.time()*1000)
202 |         cursor = conn.cursor()
203 |         entry = (User, time_ms, Username,)
204 |         table = fTable(Followers)
205 |         query = f"INSERT INTO {table} VALUES(?,?,?)"
206 |         cursor.execute(query, entry)
207 |         conn.commit()
208 |     except sqlite3.IntegrityError:
209 |         pass
210 | 
211 | def get_hash_id(conn, id):
212 |     cursor = conn.cursor()
213 |     cursor.execute('SELECT hex_dig FROM users WHERE id = ? LIMIT 1', (id,))
214 |     resultset = cursor.fetchall()
215 |     return resultset[0][0] if resultset else -1
216 | 
217 | def user(conn, config, User):
218 |     try:
219 |         time_ms = round(time.time()*1000)
220 |         cursor = conn.cursor()
221 |         user = [int(User.id), User.id, User.name, User.username, User.bio, User.location, User.url,User.join_date, User.join_time, User.tweets, User.following, User.followers, User.likes, User.media_count, User.is_private, User.is_verified, User.avatar, User.background_image]
222 | 
223 |         hex_dig = hashlib.sha256(','.join(str(v) for v in user).encode()).hexdigest()
224 |         entry = tuple(user) + (hex_dig,time_ms,)
225 |         old_hash = get_hash_id(conn, User.id)
226 | 
227 |         if old_hash == -1 or old_hash != hex_dig:
228 |             query = f"INSERT INTO users VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
229 |             cursor.execute(query, entry)
230 |         else:
231 |             pass
232 | 
233 |         if config.Followers or config.Following:
234 |             table = uTable(config.Followers)
235 |             query = f"INSERT INTO {table} VALUES(?,?)"
236 |             cursor.execute(query, (config.User_id, int(User.id)))
237 | 
238 |         conn.commit()
239 |     except sqlite3.IntegrityError:
240 |         pass
241 | 
242 | def tweets(conn, Tweet, config):
243 |     try:
244 |         time_ms = round(time.time()*1000)
245 |         cursor = conn.cursor()
246 |         entry = (Tweet.id,
247 |                     Tweet.id_str,
248 |                     Tweet.tweet,
249 |                     Tweet.lang,
250 |                     Tweet.conversation_id,
251 |                     Tweet.datetime,
252 |                     Tweet.datestamp,
253 |                     Tweet.timestamp,
254 |                     Tweet.timezone,
255 |                     Tweet.place,
256 |                     Tweet.replies_count,
257 |                     Tweet.likes_count,
258 |                     Tweet.retweets_count,
259 |                     Tweet.user_id,
260 |                     Tweet.user_id_str,
261 |                     Tweet.username,
262 |                     Tweet.name,
263 |                     Tweet.link,
264 |                     ",".join(Tweet.mentions),
265 |                     ",".join(Tweet.hashtags),
266 |                     ",".join(Tweet.cashtags),
267 |                     ",".join(Tweet.urls),
268 |                     ",".join(Tweet.photos),
269 |                     Tweet.thumbnail,
270 |                     Tweet.quote_url,
271 |                     Tweet.video,
272 |                     Tweet.geo,
273 |                     Tweet.near,
274 |                     Tweet.source,
275 |                     time_ms,
276 |                     Tweet.translate,
277 |                     Tweet.trans_src,
278 |                     Tweet.trans_dest)
279 |         cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)
280 | 
281 |         if config.Favorites:
282 |             query = 'INSERT INTO favorites VALUES(?,?)'
283 |             cursor.execute(query, (config.User_id, Tweet.id))
284 | 
285 |         if Tweet.retweet:
286 |             query = 'INSERT INTO retweets VALUES(?,?,?,?,?)'
287 |             _d = datetime.timestamp(datetime.strptime(Tweet.retweet_date, "%Y-%m-%d %H:%M:%S"))
288 |             cursor.execute(query, (int(Tweet.user_rt_id), Tweet.user_rt, Tweet.id, int(Tweet.retweet_id), _d))
289 | 
290 |         if Tweet.reply_to:
291 |             for reply in Tweet.reply_to:
292 |                 query = 'INSERT INTO replies VALUES(?,?,?)'
293 |                 cursor.execute(query, (Tweet.id, int(reply['user_id']), reply['username']))
294 | 
295 |         conn.commit()
296 |     except sqlite3.IntegrityError:
297 |         pass
298 | 


--------------------------------------------------------------------------------
/twint/storage/elasticsearch.py:
--------------------------------------------------------------------------------
  1 | ## TODO - Fix Weekday situation
  2 | from elasticsearch import Elasticsearch, helpers
  3 | from geopy.geocoders import Nominatim
  4 | from datetime import datetime
  5 | import contextlib
  6 | import sys
  7 | 
  8 | _index_tweet_status = False
  9 | _index_follow_status = False
 10 | _index_user_status = False
 11 | _is_near_def = False
 12 | _is_location_def = False
 13 | _near = {}
 14 | _location = {}
 15 | 
 16 | geolocator = Nominatim(user_agent="twint-1.2")
 17 | 
 18 | class RecycleObject(object):
 19 |     def write(self, junk): pass
 20 |     def flush(self): pass
 21 | 
 22 | def getLocation(place, **options):
 23 |     location = geolocator.geocode(place,timeout=1000)
 24 |     if location:
 25 |         if options.get("near"):
 26 |             global _near
 27 |             _near = {"lat": location.latitude, "lon": location.longitude}
 28 |             return True
 29 |         elif options.get("location"):
 30 |             global _location
 31 |             _location = {"lat": location.latitude, "lon": location.longitude}
 32 |             return True
 33 |         return {"lat": location.latitude, "lon": location.longitude}
 34 |     else:
 35 |         return {}
 36 | 
 37 | def handleIndexResponse(response):
 38 |     try:
 39 |         if response["status"] == 400:
 40 |             return True
 41 |     except KeyError:
 42 |         pass
 43 |     if response["acknowledged"]:
 44 |         print("[+] Index \"" + response["index"] + "\" created!")
 45 |     else:
 46 |         print("[x] error index creation :: storage.elasticsearch.handleIndexCreation")
 47 |     if response["shards_acknowledged"]:
 48 |         print("[+] Shards acknowledged, everything is ready to be used!")
 49 |         return True
 50 |     else:
 51 |         print("[x] error with shards :: storage.elasticsearch.HandleIndexCreation")
 52 |         return False
 53 | 
 54 | def createIndex(config, instance, **scope):
 55 |     if scope.get("scope") == "tweet":
 56 |         tweets_body = {
 57 |                 "mappings": {
 58 |                     "properties": {
 59 |                         "id": {"type": "long"},
 60 |                         "conversation_id": {"type": "long"},
 61 |                         "created_at": {"type": "text"},
 62 |                         "date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
 63 |                         "timezone": {"type": "keyword"},
 64 |                         "place": {"type": "keyword"},
 65 |                         "location": {"type": "keyword"},
 66 |                         "tweet": {"type": "text"},
 67 |                         "lang": {"type": "keyword"},
 68 |                         "hashtags": {"type": "keyword", "normalizer": "hashtag_normalizer"},
 69 |                         "cashtags": {"type": "keyword", "normalizer": "hashtag_normalizer"},
 70 |                         "user_id_str": {"type": "keyword"},
 71 |                         "username": {"type": "keyword", "normalizer": "hashtag_normalizer"},
 72 |                         "name": {"type": "text"},
 73 |                         "profile_image_url": {"type": "text"},
 74 |                         "day": {"type": "integer"},
 75 |                         "hour": {"type": "integer"},
 76 |                         "link": {"type": "text"},
 77 |                         "retweet": {"type": "text"},
 78 |                         "essid": {"type": "keyword"},
 79 |                         "nlikes": {"type": "integer"},
 80 |                         "nreplies": {"type": "integer"},
 81 |                         "nretweets": {"type": "integer"},
 82 |                         "quote_url": {"type": "text"},
 83 |                         "video": {"type":"integer"},
 84 |                         "thumbnail": {"type":"text"},
 85 |                         "search": {"type": "text"},
 86 |                         "near": {"type": "text"},
 87 |                         "geo_near": {"type": "geo_point"},
 88 |                         "geo_tweet": {"type": "geo_point"},
 89 |                         "photos": {"type": "text"},
 90 |                         "user_rt_id": {"type": "keyword"},
 91 |                         "mentions": {"type": "keyword", "normalizer": "hashtag_normalizer"},
 92 |                         "source": {"type": "keyword"},
 93 |                         "user_rt": {"type": "keyword"},
 94 |                         "retweet_id": {"type": "keyword"},
 95 |                         "reply_to": {
 96 |                             "type": "nested",
 97 |                             "properties": {
 98 |                                 "user_id": {"type": "keyword"},
 99 |                                 "username": {"type": "keyword"}
100 |                             }
101 |                         },
102 |                         "retweet_date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss", "ignore_malformed": True},
103 |                         "urls": {"type": "keyword"},
104 |                         "translate": {"type": "text"},
105 |                         "trans_src": {"type": "keyword"},
106 |                         "trans_dest": {"type": "keyword"},
107 |                         }
108 |                     },
109 |                     "settings": {
110 |                         "number_of_shards": 1,
111 |                         "analysis": {
112 |                             "normalizer": {
113 |                                 "hashtag_normalizer": {
114 |                                     "type": "custom",
115 |                                     "char_filter": [],
116 |                                     "filter": ["lowercase", "asciifolding"]
117 |                                 }
118 |                             }
119 |                         }
120 |                     }
121 |                 }
122 |         with nostdout():
123 |             resp = instance.indices.create(index=config.Index_tweets, body=tweets_body, ignore=400)
124 |         return handleIndexResponse(resp)
125 |     elif scope.get("scope") == "follow":
126 |         follow_body = {
127 |                 "mappings": {
128 |                     "properties": {
129 |                         "user": {"type": "keyword"},
130 |                         "follow": {"type": "keyword"},
131 |                         "essid": {"type": "keyword"}
132 |                         }
133 |                     },
134 |                     "settings": {
135 |                         "number_of_shards": 1
136 |                     }
137 |                 }
138 |         with nostdout():
139 |             resp = instance.indices.create(index=config.Index_follow, body=follow_body, ignore=400)
140 |         return handleIndexResponse(resp)
141 |     elif scope.get("scope") == "user":
142 |         user_body = {
143 |                 "mappings": {
144 |                     "properties": {
145 |                         "id": {"type": "keyword"},
146 |                         "name": {"type": "keyword"},
147 |                         "username": {"type": "keyword"},
148 |                         "bio": {"type": "text"},
149 |                         "location": {"type": "keyword"},
150 |                         "url": {"type": "text"},
151 |                         "join_datetime": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
152 |                         "tweets": {"type": "integer"},
153 |                         "following": {"type": "integer"},
154 |                         "followers": {"type": "integer"},
155 |                         "likes": {"type": "integer"},
156 |                         "media": {"type": "integer"},
157 |                         "private": {"type": "integer"},
158 |                         "verified": {"type": "integer"},
159 |                         "avatar": {"type": "text"},
160 |                         "background_image": {"type": "text"},
161 |                         "session": {"type": "keyword"},
162 |                         "geo_user": {"type": "geo_point"}
163 |                         }
164 |                     },
165 |                     "settings": {
166 |                         "number_of_shards": 1
167 |                     }
168 |                 }
169 |         with nostdout():
170 |             resp = instance.indices.create(index=config.Index_users, body=user_body, ignore=400)
171 |         return handleIndexResponse(resp)
172 |     else:
173 |         print("[x] error index pre-creation :: storage.elasticsearch.createIndex")
174 |         return False
175 | 
176 | @contextlib.contextmanager
177 | def nostdout():
178 |     savestdout = sys.stdout
179 |     sys.stdout = RecycleObject()
180 |     yield
181 |     sys.stdout = savestdout
182 | 
183 | def weekday(day):
184 |     weekdays = {
185 |             "Monday": 1,
186 |             "Tuesday": 2,
187 |             "Wednesday": 3,
188 |             "Thursday": 4,
189 |             "Friday": 5,
190 |             "Saturday": 6,
191 |             "Sunday": 7,
192 |             }
193 | 
194 |     return weekdays[day]
195 | 
196 | def Tweet(Tweet, config):
197 |     global _index_tweet_status
198 |     global _is_near_def
199 |     date_obj = datetime.strptime(Tweet.datetime, "%Y-%m-%d %H:%M:%S %Z")
200 | 
201 |     actions = []
202 | 
203 |     try:
204 |         retweet = Tweet.retweet
205 |     except AttributeError:
206 |         retweet = None
207 | 
208 |     dt = f"{Tweet.datestamp} {Tweet.timestamp}"
209 | 
210 |     j_data = {
211 |             "_index": config.Index_tweets,
212 |             "_id": str(Tweet.id) + "_raw_" + config.Essid,
213 |             "_source": {
214 |                 "id": str(Tweet.id),
215 |                 "conversation_id": Tweet.conversation_id,
216 |                 "created_at": Tweet.datetime,
217 |                 "date": dt,
218 |                 "timezone": Tweet.timezone,
219 |                 "place": Tweet.place,
220 |                 "tweet": Tweet.tweet,
221 |                 "language": Tweet.lang,
222 |                 "hashtags": Tweet.hashtags,
223 |                 "cashtags": Tweet.cashtags,
224 |                 "user_id_str": Tweet.user_id_str,
225 |                 "username": Tweet.username,
226 |                 "name": Tweet.name,
227 |                 "day": date_obj.weekday(),
228 |                 "hour": date_obj.hour,
229 |                 "link": Tweet.link,
230 |                 "retweet": retweet,
231 |                 "essid": config.Essid,
232 |                 "nlikes": int(Tweet.likes_count),
233 |                 "nreplies": int(Tweet.replies_count),
234 |                 "nretweets": int(Tweet.retweets_count),
235 |                 "quote_url": Tweet.quote_url,
236 |                 "video": Tweet.video,
237 |                 "search": str(config.Search),
238 |                 "near": config.Near
239 |                 }
240 |             }
241 |     if retweet is not None:
242 |         j_data["_source"].update({"user_rt_id": Tweet.user_rt_id})
243 |         j_data["_source"].update({"user_rt": Tweet.user_rt})
244 |         j_data["_source"].update({"retweet_id": Tweet.retweet_id})
245 |         j_data["_source"].update({"retweet_date": Tweet.retweet_date})
246 |     if Tweet.reply_to:
247 |         j_data["_source"].update({"reply_to": Tweet.reply_to})
248 |     if Tweet.photos:
249 |         _photos = []
250 |         for photo in Tweet.photos:
251 |             _photos.append(photo)
252 |         j_data["_source"].update({"photos": _photos})
253 |     if Tweet.thumbnail:
254 |         j_data["_source"].update({"thumbnail": Tweet.thumbnail})
255 |     if Tweet.mentions:
256 |         _mentions = []
257 |         for mention in Tweet.mentions:
258 |             _mentions.append(mention)
259 |         j_data["_source"].update({"mentions": _mentions})
260 |     if Tweet.urls:
261 |         _urls = []
262 |         for url in Tweet.urls:
263 |             _urls.append(url)
264 |         j_data["_source"].update({"urls": _urls})
265 |     if config.Near or config.Geo:
266 |         if not _is_near_def:
267 |             __geo = ""
268 |             __near = ""
269 |             if config.Geo:
270 |                 __geo = config.Geo
271 |             if config.Near:
272 |                 __near = config.Near
273 |             _is_near_def = getLocation(__near + __geo, near=True)
274 |         if _near:
275 |             j_data["_source"].update({"geo_near": _near})
276 |     if Tweet.place:
277 |         _t_place = getLocation(Tweet.place)
278 |         if _t_place:
279 |             j_data["_source"].update({"geo_tweet": getLocation(Tweet.place)})
280 |     if Tweet.source:
281 |         j_data["_source"].update({"source": Tweet.Source})
282 |     if config.Translate:
283 |         j_data["_source"].update({"translate": Tweet.translate})        
284 |         j_data["_source"].update({"trans_src": Tweet.trans_src})
285 |         j_data["_source"].update({"trans_dest": Tweet.trans_dest})
286 | 
287 |     actions.append(j_data)
288 | 
289 |     es = Elasticsearch(config.Elasticsearch, verify_certs=config.Skip_certs)
290 |     if not _index_tweet_status:
291 |         _index_tweet_status = createIndex(config, es, scope="tweet")
292 |     with nostdout():
293 |         helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
294 |     actions = []
295 | 
296 | def Follow(user, config):
297 |     global _index_follow_status
298 |     actions = []
299 | 
300 |     if config.Following:
301 |         _user = config.Username
302 |         _follow = user
303 |     else:
304 |         _user = user
305 |         _follow = config.Username
306 |     j_data = {
307 |             "_index": config.Index_follow,
308 |             "_id": _user + "_" + _follow + "_" + config.Essid,
309 |             "_source": {
310 |                 "user": _user,
311 |                 "follow": _follow,
312 |                 "essid": config.Essid
313 |                 }
314 |             }
315 |     actions.append(j_data)
316 | 
317 |     es = Elasticsearch(config.Elasticsearch, verify_certs=config.Skip_certs)
318 |     if not _index_follow_status:
319 |         _index_follow_status = createIndex(config, es, scope="follow")
320 |     with nostdout():
321 |         helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
322 |     actions = []
323 | 
324 | def UserProfile(user, config):
325 |     global _index_user_status
326 |     global _is_location_def
327 |     actions = []
328 | 
329 |     j_data = {
330 |             "_index": config.Index_users,
331 |             "_id": user.id + "_" + user.join_date + "_" + user.join_time + "_" + config.Essid,
332 |             "_source": {
333 |                 "id": user.id,
334 |                 "name": user.name,
335 |                 "username": user.username,
336 |                 "bio": user.bio,
337 |                 "location": user.location,
338 |                 "url": user.url,
339 |                 "join_datetime": user.join_date + " " + user.join_time,
340 |                 "tweets": user.tweets,
341 |                 "following": user.following,
342 |                 "followers": user.followers,
343 |                 "likes": user.likes,
344 |                 "media": user.media_count,
345 |                 "private": user.is_private,
346 |                 "verified": user.is_verified,
347 |                 "avatar": user.avatar,
348 |                 "background_image": user.background_image,
349 |                 "session": config.Essid
350 |                 }
351 |             }
352 |     if config.Location:
353 |         if not _is_location_def:
354 |             _is_location_def = getLocation(user.location, location=True)
355 |         if _location:
356 |             j_data["_source"].update({"geo_user": _location})
357 |     actions.append(j_data)
358 | 
359 |     es = Elasticsearch(config.Elasticsearch, verify_certs=config.Skip_certs)
360 |     if not _index_user_status:
361 |         _index_user_status = createIndex(config, es, scope="user")
362 |     with nostdout():
363 |         helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
364 |     actions = []
365 | 


--------------------------------------------------------------------------------
/twint/storage/panda.py:
--------------------------------------------------------------------------------
  1 | import datetime, pandas as pd, warnings
  2 | from time import strftime, localtime
  3 | from twint.tweet import Tweet_formats
  4 | 
  5 | Tweets_df = None
  6 | Follow_df = None
  7 | User_df = None
  8 | 
  9 | _object_blocks = {
 10 |     "tweet": [],
 11 |     "user": [],
 12 |     "following": [],
 13 |     "followers": []
 14 | }
 15 | 
 16 | weekdays = {
 17 |         "Monday": 1,
 18 |         "Tuesday": 2,
 19 |         "Wednesday": 3,
 20 |         "Thursday": 4,
 21 |         "Friday": 5,
 22 |         "Saturday": 6,
 23 |         "Sunday": 7,
 24 |         }
 25 | 
 26 | _type = ""
 27 | 
 28 | def _concat(df, _type):
 29 |     if df is None:
 30 |         df = pd.DataFrame(_object_blocks[_type])
 31 |     else:
 32 |         _df = pd.DataFrame(_object_blocks[_type])
 33 |         df = pd.concat([df, _df], sort=True)
 34 |     return df
 35 | 
 36 | def _autoget(_type):
 37 |     global Tweets_df
 38 |     global Follow_df
 39 |     global User_df
 40 | 
 41 |     if _type == "tweet":
 42 |         Tweets_df = _concat(Tweets_df, _type)
 43 |     elif _type == "followers" or _type == "following":
 44 |         Follow_df = _concat(Follow_df, _type)
 45 |     elif _type == "user":
 46 |         User_df = _concat(User_df, _type)
 47 |     else:
 48 |         error("[x] Wrong type of object passed")
 49 | 
 50 | 
 51 | def update(object, config):
 52 |     global _type
 53 | 
 54 |     #try:
 55 |     #    _type = ((object.__class__.__name__ == "tweet")*"tweet" +
 56 |     #             (object.__class__.__name__ == "user")*"user")
 57 |     #except AttributeError:
 58 |     #    _type = config.Following*"following" + config.Followers*"followers"
 59 |     if object.__class__.__name__ == "tweet":
 60 |         _type = "tweet"
 61 |     elif object.__class__.__name__ == "user":
 62 |         _type = "user"
 63 |     elif object.__class__.__name__ == "dict":
 64 |         _type = config.Following*"following" + config.Followers*"followers"
 65 | 
 66 |     if _type == "tweet":
 67 |         Tweet = object
 68 |         datetime_ms = datetime.datetime.strptime(Tweet.datetime, Tweet_formats['datetime']).timestamp() * 1000
 69 |         day = weekdays[strftime("%A", localtime(datetime_ms/1000))]
 70 |         dt = f"{object.datestamp} {object.timestamp}"
 71 |         _data = {
 72 |             "id": str(Tweet.id),
 73 |             "conversation_id": Tweet.conversation_id,
 74 |             "created_at": datetime_ms,
 75 |             "date": dt,
 76 |             "timezone": Tweet.timezone,
 77 |             "place": Tweet.place,
 78 |             "tweet": Tweet.tweet,
 79 |             "language": Tweet.lang,
 80 |             "hashtags": Tweet.hashtags,
 81 |             "cashtags": Tweet.cashtags,
 82 |             "user_id": Tweet.user_id,
 83 |             "user_id_str": Tweet.user_id_str,
 84 |             "username": Tweet.username,
 85 |             "name": Tweet.name,
 86 |             "day": day,
 87 |             "hour": strftime("%H", localtime(datetime_ms/1000)),
 88 |             "link": Tweet.link,
 89 |             "urls": Tweet.urls,
 90 |             "photos": Tweet.photos,
 91 |             "video": Tweet.video,
 92 |             "thumbnail": Tweet.thumbnail,
 93 |             "retweet": Tweet.retweet,
 94 |             "nlikes": int(Tweet.likes_count),
 95 |             "nreplies": int(Tweet.replies_count),
 96 |             "nretweets": int(Tweet.retweets_count),
 97 |             "quote_url": Tweet.quote_url,
 98 |             "search": str(config.Search),
 99 |             "near": Tweet.near,
100 |             "geo": Tweet.geo,
101 |             "source": Tweet.source,
102 |             "user_rt_id": Tweet.user_rt_id,
103 |             "user_rt": Tweet.user_rt,
104 |             "retweet_id": Tweet.retweet_id,
105 |             "reply_to": Tweet.reply_to,
106 |             "retweet_date": Tweet.retweet_date,
107 |             "translate": Tweet.translate,
108 |             "trans_src": Tweet.trans_src,
109 |             "trans_dest": Tweet.trans_dest
110 |             }
111 |         _object_blocks[_type].append(_data)
112 |     elif _type == "user":
113 |         user = object
114 |         try:
115 |             background_image = user.background_image
116 |         except:
117 |             background_image = ""
118 |         _data = {
119 |             "id": user.id,
120 |             "name": user.name,
121 |             "username": user.username,
122 |             "bio": user.bio,
123 |             "url": user.url,
124 |             "join_datetime": user.join_date + " " + user.join_time,
125 |             "join_date": user.join_date,
126 |             "join_time": user.join_time,
127 |             "tweets": user.tweets,
128 |             "location": user.location,
129 |             "following": user.following,
130 |             "followers": user.followers,
131 |             "likes": user.likes,
132 |             "media": user.media_count,
133 |             "private": user.is_private,
134 |             "verified": user.is_verified,
135 |             "avatar": user.avatar,
136 |             "background_image": background_image,
137 |             }
138 |         _object_blocks[_type].append(_data)
139 |     elif _type == "followers" or _type == "following":
140 |         _data = {
141 |             config.Following*"following" + config.Followers*"followers" :
142 |                              {config.Username: object[_type]}
143 |         }
144 |         _object_blocks[_type] = _data
145 |     else:
146 |         print("Wrong type of object passed!")
147 | 
148 | 
149 | def clean():
150 |     global Tweets_df
151 |     global Follow_df
152 |     global User_df
153 |     _object_blocks["tweet"].clear()
154 |     _object_blocks["following"].clear()
155 |     _object_blocks["followers"].clear()
156 |     _object_blocks["user"].clear()
157 |     Tweets_df = None
158 |     Follow_df = None
159 |     User_df = None
160 | 
161 | def save(_filename, _dataframe, **options):
162 |     if options.get("dataname"):
163 |         _dataname = options.get("dataname")
164 |     else:
165 |         _dataname = "twint"
166 | 
167 |     if not options.get("type"):
168 |         with warnings.catch_warnings():
169 |             warnings.simplefilter("ignore")
170 |             _store = pd.HDFStore(_filename + ".h5")
171 |             _store[_dataname] = _dataframe
172 |             _store.close()
173 |     elif options.get("type") == "Pickle":
174 |         with warnings.catch_warnings():
175 |             warnings.simplefilter("ignore")
176 |             _dataframe.to_pickle(_filename + ".pkl")
177 |     else:
178 |         print("""Please specify: filename, DataFrame, DataFrame name and type
179 |               (HDF5, default, or Pickle)""")
180 | 
181 | def read(_filename, **options):
182 |     if not options.get("dataname"):
183 |         _dataname = "twint"
184 |     else:
185 |         _dataname = options.get("dataname")
186 | 
187 |     if not options.get("type"):
188 |         _store = pd.HDFStore(_filename + ".h5")
189 |         _df = _store[_dataname]
190 |         return _df
191 |     elif options.get("type") == "Pickle":
192 |         _df = pd.read_pickle(_filename + ".pkl")
193 |         return _df
194 |     else:
195 |         print("""Please specify: DataFrame, DataFrame name (twint as default),
196 |               filename and type (HDF5, default, or Pickle""")
197 | 


--------------------------------------------------------------------------------
/twint/storage/write.py:
--------------------------------------------------------------------------------
 1 | from . import write_meta as meta
 2 | import csv
 3 | import json
 4 | import os
 5 | 
 6 | def outputExt(objType, fType):
 7 |     if objType == "str":
 8 |         objType = "username"
 9 |     outExt = f"/{objType}s.{fType}"
10 | 
11 |     return outExt
12 | 
13 | def addExt(base, objType, fType):
14 |     if len(base.split('.')) == 1:
15 |         createDirIfMissing(base)
16 |         base += outputExt(objType, fType)
17 | 
18 |     return base
19 | 
20 | def Text(entry, f):
21 |     print(entry.replace('\n', ' '), file=open(f, "a", encoding="utf-8"))
22 | 
23 | def Type(config):
24 |     if config.User_full:
25 |         _type = "user"
26 |     elif config.Followers or config.Following:
27 |         _type = "username"
28 |     else:
29 |         _type = "tweet"
30 | 
31 |     return _type
32 | 
33 | def struct(obj, custom, _type):
34 |     if custom:
35 |         fieldnames = custom
36 |         row = {}
37 |         for f in fieldnames:
38 |             row[f] = meta.Data(obj, _type)[f]
39 |     else:
40 |         fieldnames = meta.Fieldnames(_type)
41 |         row = meta.Data(obj, _type)
42 | 
43 |     return fieldnames, row
44 | 
45 | def createDirIfMissing(dirname):
46 |     if not os.path.exists(dirname):
47 |         os.makedirs(dirname)
48 | 
49 | def Csv(obj, config):
50 |     _obj_type = obj.__class__.__name__
51 |     if _obj_type == "str":
52 |         _obj_type = "username"
53 |     fieldnames, row = struct(obj, config.Custom[_obj_type], _obj_type)
54 |     
55 |     base = addExt(config.Output, _obj_type, "csv")
56 |     dialect = 'excel-tab' if 'Tabs' in config.__dict__ else 'excel'
57 |     
58 |     if not (os.path.exists(base)):
59 |         with open(base, "w", newline='', encoding="utf-8") as csv_file:
60 |             writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=dialect)
61 |             writer.writeheader()
62 | 
63 |     with open(base, "a", newline='', encoding="utf-8") as csv_file:
64 |         writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=dialect)
65 |         writer.writerow(row)
66 | 
67 | def Json(obj, config):
68 |     _obj_type = obj.__class__.__name__
69 |     if _obj_type == "str":
70 |         _obj_type = "username"
71 |     null, data = struct(obj, config.Custom[_obj_type], _obj_type)
72 | 
73 |     base = addExt(config.Output, _obj_type, "json")
74 | 
75 |     with open(base, "a", newline='', encoding="utf-8") as json_file:
76 |         json.dump(data, json_file, ensure_ascii=False)
77 |         json_file.write("\n")
78 | 


--------------------------------------------------------------------------------
/twint/storage/write_meta.py:
--------------------------------------------------------------------------------
  1 | def tweetData(t):
  2 |     data = {
  3 |             "id": int(t.id),
  4 |             "conversation_id": t.conversation_id,
  5 |             "created_at": t.datetime,
  6 |             "date": t.datestamp,
  7 |             "time": t.timestamp,
  8 |             "timezone": t.timezone,
  9 |             "user_id": t.user_id,
 10 |             "username": t.username,
 11 |             "name": t.name,
 12 |             "place": t.place,
 13 |             "tweet": t.tweet,
 14 |             "language": t.lang,
 15 |             "mentions": t.mentions,
 16 |             "urls": t.urls,
 17 |             "photos": t.photos,
 18 |             "replies_count": int(t.replies_count),
 19 |             "retweets_count": int(t.retweets_count),
 20 |             "likes_count": int(t.likes_count),
 21 |             "hashtags": t.hashtags,
 22 |             "cashtags": t.cashtags,
 23 |             "link": t.link,
 24 |             "retweet": t.retweet,
 25 |             "quote_url": t.quote_url,
 26 |             "video": t.video,
 27 |             "thumbnail": t.thumbnail,
 28 |             "near": t.near,
 29 |             "geo": t.geo,
 30 |             "source": t.source,
 31 |             "user_rt_id": t.user_rt_id,
 32 |             "user_rt": t.user_rt,
 33 |             "retweet_id": t.retweet_id,
 34 |             "reply_to": t.reply_to,
 35 |             "retweet_date": t.retweet_date,
 36 |             "translate": t.translate,
 37 |             "trans_src": t.trans_src,
 38 |             "trans_dest": t.trans_dest,
 39 |             }
 40 |     return data
 41 | 
 42 | def tweetFieldnames():
 43 |     fieldnames = [
 44 |             "id",
 45 |             "conversation_id",
 46 |             "created_at",
 47 |             "date",
 48 |             "time",
 49 |             "timezone",
 50 |             "user_id",
 51 |             "username",
 52 |             "name",
 53 |             "place",
 54 |             "tweet",
 55 |             "language",
 56 |             "mentions",
 57 |             "urls",
 58 |             "photos",
 59 |             "replies_count",
 60 |             "retweets_count",
 61 |             "likes_count",
 62 |             "hashtags",
 63 |             "cashtags",
 64 |             "link",
 65 |             "retweet",
 66 |             "quote_url",
 67 |             "video",
 68 |             "thumbnail",
 69 |             "near",
 70 |             "geo",
 71 |             "source",
 72 |             "user_rt_id",
 73 |             "user_rt",
 74 |             "retweet_id",
 75 |             "reply_to",
 76 |             "retweet_date",
 77 |             "translate",
 78 |             "trans_src",
 79 |             "trans_dest"
 80 |             ]
 81 |     return fieldnames
 82 | 
 83 | def userData(u):
 84 |     data = {
 85 |             "id": int(u.id),
 86 |             "name": u.name,
 87 |             "username": u.username,
 88 |             "bio": u.bio,
 89 |             "location": u.location,
 90 |             "url": u.url,
 91 |             "join_date": u.join_date,
 92 |             "join_time": u.join_time,
 93 |             "tweets": int(u.tweets),
 94 |             "following": int(u.following),
 95 |             "followers": int(u.followers),
 96 |             "likes": int(u.likes),
 97 |             "media": int(u.media_count),
 98 |             "private": u.is_private,
 99 |             "verified": u.is_verified,
100 |             "profile_image_url": u.avatar,
101 |             "background_image": u.background_image
102 |             }
103 |     return data
104 | 
105 | def userFieldnames():
106 |     fieldnames = [
107 |             "id",
108 |             "name",
109 |             "username",
110 |             "bio",
111 |             "location",
112 |             "url",
113 |             "join_date",
114 |             "join_time",
115 |             "tweets",
116 |             "following",
117 |             "followers",
118 |             "likes",
119 |             "media",
120 |             "private",
121 |             "verified",
122 |             "profile_image_url",
123 |             "background_image"
124 |             ]
125 |     return fieldnames
126 | 
127 | def usernameData(u):
128 |     return {"username": u}
129 | 
130 | def usernameFieldnames():
131 |     return ["username"]
132 | 
133 | def Data(obj, _type):
134 |     if _type == "user":
135 |         ret = userData(obj)
136 |     elif _type == "username":
137 |         ret = usernameData(obj)
138 |     else:
139 |         ret = tweetData(obj)
140 | 
141 |     return ret
142 | 
143 | def Fieldnames(_type):
144 |     if _type == "user":
145 |         ret = userFieldnames()
146 |     elif _type == "username":
147 |         ret = usernameFieldnames()
148 |     else:
149 |         ret = tweetFieldnames()
150 | 
151 |     return ret
152 | 


--------------------------------------------------------------------------------
/twint/token.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import time
 3 | 
 4 | import requests
 5 | import logging as logme
 6 | 
 7 | 
 8 | class TokenExpiryException(Exception):
 9 |     def __init__(self, msg):
10 |         super().__init__(msg)
11 | 
12 |         
13 | class RefreshTokenException(Exception):
14 |     def __init__(self, msg):
15 |         super().__init__(msg)
16 |         
17 | 
18 | class Token:
19 |     def __init__(self, config):
20 |         self._session = requests.Session()
21 |         self._session.headers.update({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0'})
22 |         self.config = config
23 |         self._retries = 5
24 |         self._timeout = 10
25 |         self.url = 'https://twitter.com'
26 | 
27 |     def _request(self):
28 |         for attempt in range(self._retries + 1):
29 |             # The request is newly prepared on each retry because of potential cookie updates.
30 |             req = self._session.prepare_request(requests.Request('GET', self.url))
31 |             logme.debug(f'Retrieving {req.url}')
32 |             try:
33 |                 r = self._session.send(req, allow_redirects=True, timeout=self._timeout)
34 |             except requests.exceptions.RequestException as exc:
35 |                 if attempt < self._retries:
36 |                     retrying = ', retrying'
37 |                     level = logme.WARNING
38 |                 else:
39 |                     retrying = ''
40 |                     level = logme.ERROR
41 |                 logme.log(level, f'Error retrieving {req.url}: {exc!r}{retrying}')
42 |             else:
43 |                 success, msg = (True, None)
44 |                 msg = f': {msg}' if msg else ''
45 | 
46 |                 if success:
47 |                     logme.debug(f'{req.url} retrieved successfully{msg}')
48 |                     return r
49 |             if attempt < self._retries:
50 |                 # TODO : might wanna tweak this back-off timer
51 |                 sleep_time = 2.0 * 2 ** attempt
52 |                 logme.info(f'Waiting {sleep_time:.0f} seconds')
53 |                 time.sleep(sleep_time)
54 |         else:
55 |             msg = f'{self._retries + 1} requests to {self.url} failed, giving up.'
56 |             logme.fatal(msg)
57 |             self.config.Guest_token = None
58 |             raise RefreshTokenException(msg)
59 | 
60 |     def refresh(self):
61 |         logme.debug('Retrieving guest token')
62 |         res = self._request()
63 |         match = re.search(r'\("gt=(\d+);', res.text)
64 |         if match:
65 |             logme.debug('Found guest token in HTML')
66 |             self.config.Guest_token = str(match.group(1))
67 |         else:
68 |             self.config.Guest_token = None
69 |             raise RefreshTokenException('Could not find the Guest token in HTML')
70 | 


--------------------------------------------------------------------------------
/twint/tweet.py:
--------------------------------------------------------------------------------
  1 | from time import strftime, localtime
  2 | from datetime import datetime, timezone
  3 | 
  4 | import logging as logme
  5 | from googletransx import Translator
  6 | # ref. 
  7 | # - https://github.com/x0rzkov/py-googletrans#basic-usage
  8 | translator = Translator()
  9 | 
 10 | 
 11 | class tweet:
 12 |     """Define Tweet class
 13 |     """
 14 |     type = "tweet"
 15 | 
 16 |     def __init__(self):
 17 |         pass
 18 | 
 19 | 
 20 | def utc_to_local(utc_dt):
 21 |     return utc_dt.replace(tzinfo=timezone.utc).astimezone(tz=None)
 22 | 
 23 | 
 24 | Tweet_formats = {
 25 |     'datetime': '%Y-%m-%d %H:%M:%S %Z',
 26 |     'datestamp': '%Y-%m-%d',
 27 |     'timestamp': '%H:%M:%S'
 28 | }
 29 | 
 30 | 
 31 | def _get_mentions(tw):
 32 |     """Extract mentions from tweet
 33 |     """
 34 |     logme.debug(__name__ + ':get_mentions')
 35 |     try:
 36 |         mentions = [
 37 |             {
 38 |                 'screen_name': _mention['screen_name'],
 39 |                 'name': _mention['name'],
 40 |                 'id': _mention['id_str'],
 41 |             } for _mention in tw['entities']['user_mentions']
 42 |             if tw['display_text_range'][0] < _mention['indices'][0]
 43 |         ]
 44 |     except KeyError:
 45 |         mentions = []
 46 |     return mentions
 47 | 
 48 | 
 49 | def _get_reply_to(tw):
 50 |     try:
 51 |         reply_to = [
 52 |             {
 53 |                 'screen_name': _mention['screen_name'],
 54 |                 'name': _mention['name'],
 55 |                 'id': _mention['id_str'],
 56 |             } for _mention in tw['entities']['user_mentions']
 57 |             if tw['display_text_range'][0] > _mention['indices'][1]
 58 |         ]
 59 |     except KeyError:
 60 |         reply_to = []
 61 |     return reply_to
 62 | 
 63 | 
 64 | def getText(tw):
 65 |     """Replace some text
 66 |     """
 67 |     logme.debug(__name__ + ':getText')
 68 |     text = tw['full_text']
 69 |     text = text.replace("http", " http")
 70 |     text = text.replace("pic.twitter", " pic.twitter")
 71 |     text = text.replace("\n", " ")
 72 | 
 73 |     return text
 74 | 
 75 | 
 76 | def Tweet(tw, config):
 77 |     """Create Tweet object
 78 |     """
 79 |     logme.debug(__name__ + ':Tweet')
 80 |     t = tweet()
 81 |     t.id = int(tw['id_str'])
 82 |     t.id_str = tw["id_str"]
 83 |     t.conversation_id = tw["conversation_id_str"]
 84 | 
 85 |     # parsing date to user-friendly format
 86 |     _dt = tw['created_at']
 87 |     _dt = datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
 88 |     _dt = utc_to_local(_dt)
 89 |     t.datetime = str(_dt.strftime(Tweet_formats['datetime']))
 90 |     # date is of the format year,
 91 |     t.datestamp = _dt.strftime(Tweet_formats['datestamp'])
 92 |     t.timestamp = _dt.strftime(Tweet_formats['timestamp'])
 93 |     t.user_id = int(tw["user_id_str"])
 94 |     t.user_id_str = tw["user_id_str"]
 95 |     t.username = tw["user_data"]['screen_name']
 96 |     t.name = tw["user_data"]['name']
 97 |     t.place = tw['geo'] if 'geo' in tw and tw['geo'] else ""
 98 |     t.timezone = strftime("%z", localtime())
 99 |     t.mentions = _get_mentions(tw)
100 |     t.reply_to = _get_reply_to(tw)
101 |     try:
102 |         t.urls = [_url['expanded_url'] for _url in tw['entities']['urls']]
103 |     except KeyError:
104 |         t.urls = []
105 |     try:
106 |         t.photos = [_img['media_url_https'] for _img in tw['entities']['media'] if _img['type'] == 'photo' and
107 |                     _img['expanded_url'].find('/photo/') != -1]
108 |     except KeyError:
109 |         t.photos = []
110 |     try:
111 |         t.video = 1 if len(tw['extended_entities']['media']) else 0
112 |     except KeyError:
113 |         t.video = 0
114 |     try:
115 |         t.thumbnail = tw['extended_entities']['media'][0]['media_url_https']
116 |     except KeyError:
117 |         t.thumbnail = ''
118 |     t.tweet = getText(tw)
119 |     t.lang = tw['lang']
120 |     try:
121 |         t.hashtags = [hashtag['text'] for hashtag in tw['entities']['hashtags']]
122 |     except KeyError:
123 |         t.hashtags = []
124 |     try:
125 |         t.cashtags = [cashtag['text'] for cashtag in tw['entities']['symbols']]
126 |     except KeyError:
127 |         t.cashtags = []
128 |     t.replies_count = tw['reply_count']
129 |     t.retweets_count = tw['retweet_count']
130 |     t.likes_count = tw['favorite_count']
131 |     t.link = f"https://twitter.com/{t.username}/status/{t.id}"
132 |     try:
133 |         if 'user_rt_id' in tw['retweet_data']:
134 |             t.retweet = True
135 |             t.retweet_id = tw['retweet_data']['retweet_id']
136 |             t.retweet_date = tw['retweet_data']['retweet_date']
137 |             t.user_rt = tw['retweet_data']['user_rt']
138 |             t.user_rt_id = tw['retweet_data']['user_rt_id']
139 |     except KeyError:
140 |         t.retweet = False
141 |         t.retweet_id = ''
142 |         t.retweet_date = ''
143 |         t.user_rt = ''
144 |         t.user_rt_id = ''
145 |     try:
146 |         t.quote_url = tw['quoted_status_permalink']['expanded'] if tw['is_quote_status'] else ''
147 |     except KeyError:
148 |         # means that the quoted tweet have been deleted
149 |         t.quote_url = 0
150 |     t.near = config.Near if config.Near else ""
151 |     t.geo = config.Geo if config.Geo else ""
152 |     t.source = config.Source if config.Source else ""
153 |     t.translate = ''
154 |     t.trans_src = ''
155 |     t.trans_dest = ''
156 |     if config.Translate:
157 |         try:
158 |             ts = translator.translate(text=t.tweet, dest=config.TranslateDest)
159 |             t.translate = ts.text
160 |             t.trans_src = ts.src
161 |             t.trans_dest = ts.dest
162 |         # ref. https://github.com/SuniTheFish/ChainTranslator/blob/master/ChainTranslator/__main__.py#L31
163 |         except ValueError as e:
164 |             logme.debug(__name__ + ':Tweet:translator.translate:' + str(e))
165 |             raise Exception("Invalid destination language: {} / Tweet: {}".format(config.TranslateDest, t.tweet))
166 |     return t
167 | 


--------------------------------------------------------------------------------
/twint/url.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | from sys import platform
  3 | import logging as logme
  4 | from urllib.parse import urlencode
  5 | from urllib.parse import quote
  6 | 
  7 | mobile = "https://mobile.twitter.com"
  8 | base = "https://api.twitter.com/2/search/adaptive.json"
  9 | 
 10 | 
 11 | def _sanitizeQuery(_url, params):
 12 |     _serialQuery = ""
 13 |     _serialQuery = urlencode(params, quote_via=quote)
 14 |     _serialQuery = _url + "?" + _serialQuery
 15 |     return _serialQuery
 16 | 
 17 | 
 18 | def _formatDate(date):
 19 |     if "win" in platform:
 20 |         return f'\"{date.split()[0]}\"'
 21 |     try:
 22 |         return int(datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S").timestamp())
 23 |     except ValueError:
 24 |         return int(datetime.datetime.strptime(date, "%Y-%m-%d").timestamp())
 25 | 
 26 | 
 27 | async def Favorites(username, init):
 28 |     logme.debug(__name__ + ':Favorites')
 29 |     url = f"{mobile}/{username}/favorites?lang=en"
 30 | 
 31 |     if init != '-1':
 32 |         url += f"&max_id={init}"
 33 | 
 34 |     return url
 35 | 
 36 | 
 37 | async def Followers(username, init):
 38 |     logme.debug(__name__ + ':Followers')
 39 |     url = f"{mobile}/{username}/followers?lang=en"
 40 | 
 41 |     if init != '-1':
 42 |         url += f"&cursor={init}"
 43 | 
 44 |     return url
 45 | 
 46 | 
 47 | async def Following(username, init):
 48 |     logme.debug(__name__ + ':Following')
 49 |     url = f"{mobile}/{username}/following?lang=en"
 50 | 
 51 |     if init != '-1':
 52 |         url += f"&cursor={init}"
 53 | 
 54 |     return url
 55 | 
 56 | 
 57 | async def MobileProfile(username, init):
 58 |     logme.debug(__name__ + ':MobileProfile')
 59 |     url = f"{mobile}/{username}?lang=en"
 60 | 
 61 |     if init != '-1':
 62 |         url += f"&max_id={init}"
 63 | 
 64 |     return url
 65 | 
 66 | 
 67 | async def Search(config, init):
 68 |     logme.debug(__name__ + ':Search')
 69 |     url = base
 70 |     tweet_count = 100
 71 |     q = ""
 72 |     params = [
 73 |         # ('include_blocking', '1'),
 74 |         # ('include_blocked_by', '1'),
 75 |         # ('include_followed_by', '1'),
 76 |         # ('include_want_retweets', '1'),
 77 |         # ('include_mute_edge', '1'),
 78 |         # ('include_can_dm', '1'),
 79 |         ('include_can_media_tag', '1'),
 80 |         # ('skip_status', '1'),
 81 |         # ('include_cards', '1'),
 82 |         ('include_ext_alt_text', 'true'),
 83 |         ('include_quote_count', 'true'),
 84 |         ('include_reply_count', '1'),
 85 |         ('tweet_mode', 'extended'),
 86 |         ('include_entities', 'true'),
 87 |         ('include_user_entities', 'true'),
 88 |         ('include_ext_media_availability', 'true'),
 89 |         ('send_error_codes', 'true'),
 90 |         ('simple_quoted_tweet', 'true'),
 91 |         ('count', tweet_count),
 92 |         # ('query_source', 'typed_query'),
 93 |         # ('pc', '1'),
 94 |         ('cursor', str(init)),
 95 |         ('spelling_corrections', '1'),
 96 |         ('ext', 'mediaStats%2ChighlightedLabel'),
 97 |         ('tweet_search_mode', 'live'),  # this can be handled better, maybe take an argument and set it then
 98 |     ]
 99 |     if not config.Popular_tweets:
100 |         params.append(('f', 'tweets'))
101 |     if config.Lang:
102 |         params.append(("l", config.Lang))
103 |         params.append(("lang", "en"))
104 |     if config.Query:
105 |         q += f" from:{config.Query}"
106 |     if config.Username:
107 |         q += f" from:{config.Username}"
108 |     if config.Geo:
109 |         config.Geo = config.Geo.replace(" ", "")
110 |         q += f" geocode:{config.Geo}"
111 |     if config.Search:
112 | 
113 |         q += f" {config.Search}"
114 |     if config.Year:
115 |         q += f" until:{config.Year}-1-1"
116 |     if config.Since:
117 |         q += f" since:{_formatDate(config.Since)}"
118 |     if config.Until:
119 |         q += f" until:{_formatDate(config.Until)}"
120 |     if config.Email:
121 |         q += ' "mail" OR "email" OR'
122 |         q += ' "gmail" OR "e-mail"'
123 |     if config.Phone:
124 |         q += ' "phone" OR "call me" OR "text me"'
125 |     if config.Verified:
126 |         q += " filter:verified"
127 |     if config.To:
128 |         q += f" to:{config.To}"
129 |     if config.All:
130 |         q += f" to:{config.All} OR from:{config.All} OR @{config.All}"
131 |     if config.Near:
132 |         q += f' near:"{config.Near}"'
133 |     if config.Images:
134 |         q += " filter:images"
135 |     if config.Videos:
136 |         q += " filter:videos"
137 |     if config.Media:
138 |         q += " filter:media"
139 |     if config.Replies:
140 |         q += " filter:replies"
141 |     # although this filter can still be used, but I found it broken in my preliminary testing, needs more testing
142 |     if config.Native_retweets:
143 |         q += " filter:nativeretweets"
144 |     if config.Min_likes:
145 |         q += f" min_faves:{config.Min_likes}"
146 |     if config.Min_retweets:
147 |         q += f" min_retweets:{config.Min_retweets}"
148 |     if config.Min_replies:
149 |         q += f" min_replies:{config.Min_replies}"
150 |     if config.Links == "include":
151 |         q += " filter:links"
152 |     elif config.Links == "exclude":
153 |         q += " exclude:links"
154 |     if config.Source:
155 |         q += f" source:\"{config.Source}\""
156 |     if config.Members_list:
157 |         q += f" list:{config.Members_list}"
158 |     if config.Filter_retweets:
159 |         q += f" exclude:nativeretweets exclude:retweets"
160 |     if config.Custom_query:
161 |         q = config.Custom_query
162 | 
163 |     q = q.strip()
164 |     params.append(("q", q))
165 |     _serialQuery = _sanitizeQuery(url, params)
166 |     return url, params, _serialQuery
167 | 
168 | 
169 | def SearchProfile(config, init=None):
170 |     logme.debug(__name__ + ':SearchProfile')
171 |     _url = 'https://api.twitter.com/2/timeline/profile/{user_id}.json'.format(user_id=config.User_id)
172 |     tweet_count = 100
173 |     params = [
174 |         # some of the fields are not required, need to test which ones aren't required
175 |         ('include_profile_interstitial_type', '1'),
176 |         ('include_blocking', '1'),
177 |         ('include_blocked_by', '1'),
178 |         ('include_followed_by', '1'),
179 |         ('include_want_retweets', '1'),
180 |         ('include_mute_edge', '1'),
181 |         ('include_can_dm', '1'),
182 |         ('include_can_media_tag', '1'),
183 |         ('skip_status', '1'),
184 |         ('cards_platform', 'Web - 12'),
185 |         ('include_cards', '1'),
186 |         ('include_ext_alt_text', 'true'),
187 |         ('include_quote_count', 'true'),
188 |         ('include_reply_count', '1'),
189 |         ('tweet_mode', 'extended'),
190 |         ('include_entities', 'true'),
191 |         ('include_user_entities', 'true'),
192 |         ('include_ext_media_color', 'true'),
193 |         ('include_ext_media_availability', 'true'),
194 |         ('send_error_codes', 'true'),
195 |         ('simple_quoted_tweet', 'true'),
196 |         ('include_tweet_replies', 'true'),
197 |         ('count', tweet_count),
198 |         ('ext', 'mediaStats%2ChighlightedLabel'),
199 |     ]
200 | 
201 |     if type(init) == str:
202 |         params.append(('cursor', str(init)))
203 |     _serialQuery = _sanitizeQuery(_url, params)
204 |     return _url, params, _serialQuery
205 | 


--------------------------------------------------------------------------------
/twint/user.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import logging as logme
 3 | 
 4 | 
 5 | class user:
 6 |     type = "user"
 7 | 
 8 |     def __init__(self):
 9 |         pass
10 | 
11 | 
12 | User_formats = {
13 |     'join_date': '%Y-%m-%d',
14 |     'join_time': '%H:%M:%S %Z'
15 | }
16 | 
17 | 
18 | # ur object must be a json from the endpoint https://api.twitter.com/graphql
19 | def User(ur):
20 |     logme.debug(__name__ + ':User')
21 |     if 'data' not in ur and 'user' not in ur['data']:
22 |         msg = 'malformed json! cannot be parsed to get user data'
23 |         logme.fatal(msg)
24 |         raise KeyError(msg)
25 |     _usr = user()
26 |     _usr.id = ur['data']['user']['rest_id']
27 |     _usr.name = ur['data']['user']['legacy']['name']
28 |     _usr.username = ur['data']['user']['legacy']['screen_name']
29 |     _usr.bio = ur['data']['user']['legacy']['description']
30 |     _usr.location = ur['data']['user']['legacy']['location']
31 |     _usr.url = ur['data']['user']['legacy']['url']
32 |     # parsing date to user-friendly format
33 |     _dt = ur['data']['user']['legacy']['created_at']
34 |     _dt = datetime.datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
35 |     # date is of the format year,
36 |     _usr.join_date = _dt.strftime(User_formats['join_date'])
37 |     _usr.join_time = _dt.strftime(User_formats['join_time'])
38 | 
39 |     # :type `int`
40 |     _usr.tweets = int(ur['data']['user']['legacy']['statuses_count'])
41 |     _usr.following = int(ur['data']['user']['legacy']['friends_count'])
42 |     _usr.followers = int(ur['data']['user']['legacy']['followers_count'])
43 |     _usr.likes = int(ur['data']['user']['legacy']['favourites_count'])
44 |     _usr.media_count = int(ur['data']['user']['legacy']['media_count'])
45 | 
46 |     _usr.is_private = ur['data']['user']['legacy']['protected']
47 |     _usr.is_verified = ur['data']['user']['legacy']['verified']
48 |     _usr.avatar = ur['data']['user']['legacy']['profile_image_url_https']
49 |     _usr.background_image = ur['data']['user']['legacy']['profile_banner_url']
50 |     # TODO : future implementation
51 |     # legacy_extended_profile is also available in some cases which can be used to get DOB of user
52 |     return _usr
53 | 


--------------------------------------------------------------------------------
/twint/verbose.py:
--------------------------------------------------------------------------------
 1 | def Count(count, config):
 2 |     msg = "[+] Finished: Successfully collected "
 3 |     if config.Followers:
 4 |         msg += f"all {count} users who follow @{config.Username}"
 5 |     elif config.Following:
 6 |         msg += f"all {count} users who @{config.Username} follows"
 7 |     elif config.Favorites:
 8 |         msg += f"{count} Tweets that @{config.Username} liked"
 9 |     else:
10 |         msg += f"{count} Tweets"
11 |         if config.Username:
12 |             msg += f" from @{config.Username}"
13 |     msg += "."
14 |     print(msg)
15 | 
16 | def Elastic(elasticsearch):
17 |     if elasticsearch:
18 |         print("[+] Indexing to Elasticsearch @ " + str(elasticsearch))
19 | 


--------------------------------------------------------------------------------