├── .gitignore ├── LICENCE.md ├── MANIFEST.in ├── README.md ├── requirements.txt ├── setup.py ├── test.py └── tweeds ├── __init__.py ├── main.py ├── query.py └── tweet_scrape.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | *$py.class 4 | *.json 5 | *csv 6 | 7 | .Python 8 | build/ 9 | develop-eggs/ 10 | dist/ 11 | downloads/ 12 | eggs/ 13 | .eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | wheels/ 20 | share/python-wheels/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | MANIFEST 25 | 26 | env/ 27 | -------------------------------------------------------------------------------- /LICENCE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Achyuth Jois M 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Due to some changes in Twitter API interface we can't use this tool anymore!! 2 | # Tweeds - Twitter Scraping Tool 3 |

4 | 5 |

6 | 7 | Scrape tweets from any Twitter user profile. Twitter API alternative to scrape Twitter hashtags, threads, images, videos, statistics, 8 | and Twitter history. Export data in JSON and CSV format. This project enables you to extract large amounts of data from Twitter. 9 | It lets you do much more than the Twitter API, because it doesn't have rate limits and you don't even need to have a **Twitter account, a registered app, 10 | or Twitter API key.** 11 | 12 | ### Do not forget to star this project.🌟 😍 13 | 14 | ## 💡 Features 15 | 16 | - No API Key required 17 | - No Limit 18 | - No Authentication required 19 | - Get tweets from specific user 20 | - JSON and CSV export for further Data Analysis 21 | - And the best part it is OpenSource 😉 22 | 23 | ## ✔️ Requirements 24 | 25 | - Python >= 3.9 26 | 27 | ## ⚙ Installation 28 | 29 | ```bash 30 | pip install tweeds 31 | ``` 32 | 33 | ## 💃 Usage 34 | 35 | ```Bash 36 | achyuthjoism-MBP ~ % tweeds 37 | 38 | ████████╗░██╗░░░░░░░██╗███████╗███████╗██████╗░░██████╗ 39 | ╚══██╔══╝░██║░░██╗░░██║██╔════╝██╔════╝██╔══██╗██╔════╝ 40 | ░░░██║░░░░╚██╗████╗██╔╝█████╗░░█████╗░░██║░░██║╚█████╗░ 41 | ░░░██║░░░░░████╔═████║░██╔══╝░░██╔══╝░░██║░░██║░╚═══██╗ 42 | ░░░██║░░░░░╚██╔╝░╚██╔╝░███████╗███████╗██████╔╝██████╔╝ 43 | ░░░╚═╝░░░░░░╚═╝░░░╚═╝░░╚══════╝╚══════╝╚═════╝░╚═════╝░ 44 | 45 | by Achyuth Jois M 46 | 47 | usage: tweeds [-h] [-u U] [-s S] [--since SINCE] [--until UNTIL] [--limit LIMIT] [--near NEAR] [--geocode GEOCODE] 48 | [--year YEAR] [--today] [--verified] [--link LINK] [--videos] [--images] [--media] [--minLikes MINLIKES] 49 | [--minRetweets MINRETWEETS] [--minReplies MINREPLIES] [--json JSON] [--csv CSV] [--silent] 50 | 51 | Scrape twitter user tweets 52 | 53 | options: 54 | -h, --help show this help message and exit 55 | -u U Users Tweets you want to scrape. 56 | -s S Search for Tweets containing this word or phrase. 57 | --since SINCE Filter Tweets sent since date (Example: "2017-12-27 20:30:15" or 2017-12-27). 58 | --until UNTIL Filter Tweets sent until date (Example: "2017-12-27 20:30:15" or 2017-12-27). 59 | --limit LIMIT Number of Tweets to pull 60 | --near NEAR Find tweets near a particular location 61 | --geocode GEOCODE Search for geocoded Tweets. 62 | --year YEAR Filter Tweets before specified year. 63 | --today Filter Tweets from today 64 | --verified Display Tweets only from verified users (Use with -s). 65 | --links LINK Exclude tweets containing one or more links. 66 | --videos Display only Tweets with videos. 67 | --images Display only Tweets with images. 68 | --media Display Tweets with only images or videos. 69 | --minLikes MINLIKES Minimun likes for the tweet 70 | --minRetweets MINRETWEETS 71 | Minimun retweets for the tweet 72 | --minReplies MINREPLIES 73 | Minimun replies for the tweet 74 | --json JSON File to write the JSON output to. 75 | --csv CSV To store the output in CSV 76 | --silent Dont print the tweets(Only works while taking an output!) 77 | 78 | achyuthjoism-MBP ~ % 79 | 80 | ``` 81 | 82 | ## 📙 Example 83 | 84 | ### CLI Example 85 | 86 | Some simple examples to help you understand the basics: 87 | 88 | - `tweeds -u username` - Scrape all the Tweets of a _user_ (doesn't include **retweets** but includes **replies**). 89 | - `tweeds -u username -s pineapple` - Scrape all Tweets from the _user_'s timeline containing _pineapple_. 90 | - `tweeds -s pineapple` - Collect every Tweet containing _pineapple_ from everyone's Tweets. 91 | - `tweeds -u username --since "2015-12-20 20:30:15"` - Collect Tweets that were tweeted since 2015-12-20 20:30:15. 92 | - `tweeds -u username --since 2015-12-20` - Collect Tweets that were tweeted since 2015-12-20 00:00:00. 93 | - `tweeds -s "Rocking Star Yash" --verified` - Display Tweets by verified users that Tweeted about Rocking Star Yash. 94 | - `tweeds --geocode "48.880048,2.385939,1km" --csv paris.csv --limit 10` - Scrape Tweets from a radius of 1km around a place in Paris and export them to a csv file. 95 | - `tweeds -u username --images` - Scrape Tweets from a user containing only images. 96 | - `tweeds -u username --videos` - Scrape Tweets from a user containing only videos. 97 | - `tweeds -u username --media` - Scarape Tweets from a user containing both images and videos. 98 | - `tweeds -u username --links` - Scrape Tweets from a user which excludes links. 99 | - `tweeds -u username --json file.json` - Scrape Tweets and save as a json file. 100 | - `tweeds -u username --csv file.csv` - Scrape Tweets and save as a csv file. 101 | 102 | ### Module Example 103 | 104 | Now Tweed can be used as a module and allows custom formatting. 105 | Example: 106 | 107 | ```python 108 | import tweeds 109 | 110 | query = tweeds.Query() 111 | 112 | query.search = "Yash Boss" 113 | query.limit = 10 114 | query.verified = True 115 | 116 | tweeds.search(query) 117 | ``` 118 | 119 | **Have fun 🥰💞** 120 | 121 | ## 📮 Details 122 | 123 | ### Obvious disclaimer 124 | 125 | This tool is for educational purposes only, I am not responsible for its use. 126 | 127 | ### Less obvious disclaimer 128 | 129 | This project is under [MIT Licence](https://choosealicense.com/licenses/mit/), and you have to respect it.\ 130 | **Use it only in personal, criminal investigations, pentesting, or open-source projects.** 131 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | snscrape==0.5.0.20230113 2 | pandas==1.5.3 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | long_description = """"Scrape tweets from any Twitter user profile. Twitter API alternative to scrape Twitter hashtags, threads, images, videos, statistics, 4 | and Twitter history. Export data in JSON and CSV format. This project enables you to extract large amounts of data from Twitter. 5 | It lets you do much more than the Twitter API, because it doesn't have rate limits and you don't even need to have a **Twitter account, a registered app, 6 | or Twitter API key.**""" 7 | 8 | setup( 9 | name='tweeds', 10 | version='2.1.2.3', 11 | author='Achyuth Jois M', 12 | author_email='developer.arjm@gmail.com', 13 | packages=find_packages(include=['tweeds', 'tweeds.*']), 14 | install_requires=["snscrape==0.5.0.20230113", "pandas==1.5.3"], 15 | license='MIT', 16 | license_files=('LICENSE.md'), 17 | url='https://github.com/achyuthjoism/Twitter-OSINT', 18 | description='An advanced Twitter scraping & OSINT tool.', 19 | long_description=long_description, 20 | entry_points={ 21 | 'console_scripts': [ 22 | 'tweeds = tweeds.main:main', 23 | ] 24 | }, 25 | ) 26 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | from tweeds import main 2 | 3 | main.main() 4 | -------------------------------------------------------------------------------- /tweeds/__init__.py: -------------------------------------------------------------------------------- 1 | from tweeds.query import Query 2 | from tweeds.tweet_scrape import search 3 | -------------------------------------------------------------------------------- /tweeds/main.py: -------------------------------------------------------------------------------- 1 | import argparse as ap 2 | import sys 3 | from tweeds.query import Query 4 | from tweeds.tweet_scrape import search 5 | 6 | 7 | def banner(): 8 | print(""" 9 | ████████╗░██╗░░░░░░░██╗███████╗███████╗██████╗░░██████╗ 10 | ╚══██╔══╝░██║░░██╗░░██║██╔════╝██╔════╝██╔══██╗██╔════╝ 11 | ░░░██║░░░░╚██╗████╗██╔╝█████╗░░█████╗░░██║░░██║╚█████╗░ 12 | ░░░██║░░░░░████╔═████║░██╔══╝░░██╔══╝░░██║░░██║░╚═══██╗ 13 | ░░░██║░░░░░╚██╔╝░╚██╔╝░███████╗███████╗██████╔╝██████╔╝ 14 | ░░░╚═╝░░░░░░╚═╝░░░╚═╝░░╚══════╝╚══════╝╚═════╝░╚═════╝░ 15 | 16 | by Achyuth Jois M 17 | """) 18 | 19 | 20 | banner() 21 | 22 | 23 | def config(args: ap.Namespace): 24 | c = Query() 25 | c.search = args.s 26 | c.username = args.u 27 | c.limit = args.limit 28 | c.json = args.json 29 | c.since = args.since 30 | c.until = args.until 31 | c.near = args.near 32 | c.minLikes = args.minLikes 33 | c.csv = args.csv 34 | c.minReplies = args.minReplies 35 | c.minRetweets = args.minRetweets 36 | c.silent = args.silent 37 | c.verified = args.verified 38 | c.geoCode = args.geocode 39 | c.links = args.links 40 | c.videos = args.videos 41 | c.images = args.images 42 | c.media = args.media 43 | c.year = args.year 44 | c.today = args.today 45 | 46 | return c 47 | 48 | 49 | def process_args(args: ap.Namespace): 50 | q = config(args) 51 | search(q) 52 | 53 | 54 | def main(): 55 | parse = ap.ArgumentParser(description="Scrape twitter user tweets") 56 | 57 | parse.add_argument("-u", help="User's Tweets you want to scrape.") 58 | parse.add_argument( 59 | "-s", help="Search for Tweets containing this word or phrase.") 60 | parse.add_argument( 61 | '--since', type=str, help="Filter Tweets sent since date (Example: \"2017-12-27 20:30:15\" or 2017-12-27)." 62 | ) 63 | parse.add_argument( 64 | '--until', type=str, help="Filter Tweets sent until date (Example: \"2017-12-27 20:30:15\" or 2017-12-27)." 65 | ) 66 | parse.add_argument( 67 | '--limit', type=int, help="Number of Tweets to pull" 68 | ) 69 | parse.add_argument( 70 | '--near', type=str, help="Find tweets near a particular location" 71 | ) 72 | parse.add_argument( 73 | '--geocode', type=str, help="Search for geocoded Tweets." 74 | ) 75 | parse.add_argument( 76 | '--year', type=int, help="Filter Tweets before specified year." 77 | ) 78 | parse.add_argument( 79 | '--today', help="Filter Tweets from today", action='store_true' 80 | ) 81 | parse.add_argument( 82 | '--verified', help="Display Tweets only from verified users (Use with -s).", action='store_true' 83 | ) 84 | parse.add_argument( 85 | '--links', help="Exclude tweets containing one or more links.", action='store_true' 86 | ) 87 | parse.add_argument( 88 | "--videos", help="Display only Tweets with videos.", action='store_true' 89 | ) 90 | parse.add_argument( 91 | "--images", help="Display only Tweets with images.", action='store_true' 92 | ) 93 | parse.add_argument( 94 | "--media", help="Display Tweets with only images or videos.", action='store_true' 95 | ) 96 | 97 | parse.add_argument( 98 | '--minLikes', type=int, help="Minimun likes for the tweet" 99 | ) 100 | parse.add_argument( 101 | '--minRetweets', type=int, help="Minimun retweets for the tweet" 102 | ) 103 | parse.add_argument( 104 | '--minReplies', type=int, help="Minimun replies for the tweet" 105 | ) 106 | parse.add_argument( 107 | '--json', type=str, help="File to write the JSON output to.") 108 | parse.add_argument( 109 | '--csv', type=str, help="To store the output in CSV" 110 | ) 111 | parse.add_argument( 112 | '--silent', help="Don't print the tweets(Only works while taking an output!)[Type anything]", action='store_true' 113 | ) 114 | 115 | args = parse.parse_args(args=None if sys.argv[1:] else ['--help']) 116 | 117 | process_args(args) 118 | -------------------------------------------------------------------------------- /tweeds/query.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | from typing import Optional 3 | 4 | 5 | @dataclasses.dataclass 6 | class Query: 7 | """ 8 | Class used to query in twitter 9 | """ 10 | username: Optional[str] = None 11 | search: Optional[str] = None 12 | since: Optional[str] = None 13 | until: Optional[str] = None 14 | json: Optional[str] = None 15 | limit: Optional[int] = None 16 | near: Optional[str] = None 17 | minLikes: Optional[int] = None 18 | csv: Optional[str] = None 19 | minReplies: Optional[int] = None 20 | minRetweets: Optional[int] = None 21 | silent: bool = False 22 | verified: bool = False 23 | geoCode: Optional[str] = None 24 | links: bool = False 25 | videos: bool = False 26 | images: bool = False 27 | media: bool = False 28 | year: Optional[int] = None 29 | today: bool = False 30 | 31 | def __init__(self) -> None: 32 | pass 33 | -------------------------------------------------------------------------------- /tweeds/tweet_scrape.py: -------------------------------------------------------------------------------- 1 | import snscrape.modules.twitter as api 2 | import pandas as pd 3 | import json 4 | from tweeds.query import Query 5 | from datetime import date 6 | 7 | 8 | def make_json(data, jsonFilePath): 9 | with open(jsonFilePath, 'w', encoding='utf-8') as jsonf: 10 | jsonf.write(json.dumps(data, indent=4)) 11 | 12 | 13 | def printRes(tweet: api.Tweet): 14 | print(f"{tweet.id} {tweet.date} <{tweet.user.username}> {tweet.rawContent} \n") 15 | 16 | 17 | def toOBJ(tweet: api.Tweet) -> object: 18 | return { 19 | "id": tweet.id, 20 | "date": tweet.date.strftime('%Y/%m/%d'), 21 | "username": tweet.user.username, 22 | "content": tweet.rawContent, 23 | "likes": tweet.likeCount, 24 | "retweet": tweet.retweetCount, 25 | "reply": tweet.replyCount, 26 | "user": { 27 | "username": tweet.user.username, 28 | "followers": tweet.user.followersCount, 29 | "displayName": tweet.user.displayname, 30 | "id": tweet.user.id 31 | }, 32 | "url": tweet.url 33 | } 34 | 35 | 36 | def search(q: Query) -> None: 37 | """Print tweets""" 38 | query = "" 39 | 40 | if q.search: 41 | query += f"{q.search} " 42 | if q.username: 43 | query += f"(from:{q.username}) " 44 | if q.today: 45 | today = date.today().strftime("%Y-%m-%d") 46 | query += f"since:{today} " 47 | if q.year: 48 | query += f"since:{q.year}-01-01 " 49 | if q.until: 50 | query += f"until:{q.until} " 51 | if q.since: 52 | query += f"since:{q.since} " 53 | if q.minLikes: 54 | query += f"min_faves:{q.minLikes} " 55 | if q.minReplies: 56 | query += f" min_replies:{q.minReplies} " 57 | if q.minRetweets: 58 | query += f" min_retweets:{q.minRetweets} " 59 | if q.near: 60 | query += f"near:{q.near} " 61 | if q.geoCode: 62 | query += f"geocode:{q.geoCode} " 63 | if q.verified: 64 | query += f"filter:verified " 65 | if q.media: 66 | query += f"filter:media " 67 | if q.videos and not q.media and not q.images: 68 | query += f"filter:native_video " 69 | if q.images and not q.media and not q.videos: 70 | query += f"filter:images " 71 | if q.links: 72 | query += "-filter:links " 73 | jsonObj = {} 74 | csvObj = [] 75 | 76 | for i, tweet in enumerate(api.TwitterSearchScraper(query).get_items()): 77 | if q.limit: 78 | if i == q.limit: 79 | break 80 | jsonObj[tweet.id] = toOBJ(tweet) 81 | csvObj.append( 82 | [tweet.id, tweet.date, tweet.rawContent, tweet.url, 83 | tweet.likeCount, tweet.retweetCount, tweet.replyCount, tweet.sourceLabel[12:]] 84 | ) 85 | if q.silent: 86 | if q.csv or q.json: 87 | pass 88 | else: 89 | printRes(tweet) 90 | 91 | if q.json: 92 | if q.json.find(".json") != -1: 93 | make_json(jsonObj, q.json) 94 | print("Output saved in JSON!") 95 | 96 | if q.csv: 97 | df = pd.DataFrame( 98 | csvObj, columns=["ID", "Date", "Tweet", "URL", "Likes", "Retweet", "Replies", "Source"]) 99 | df.to_csv(q.csv) 100 | print("Output saved in CSV!") 101 | --------------------------------------------------------------------------------