├── MANIFEST.in
├── .gitignore
├── pyproject.toml
├── src
    └── twitter_scraper_without_api
    │   ├── __init__.py
    │   ├── scraping_utilities.py
    │   ├── driver_utils.py
    │   ├── driver_initialisation.py
    │   ├── element_finder.py
    │   └── twitter_scraper.py
├── requirements.txt
├── setup.py
├── .github
    └── workflows
    │   └── python-app.yml
└── README.MD


/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.MD
2 | include LICENS
3 | include requirements.txt


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | firefox-geckodriver/*
2 | geckodriver.log
3 | src/twitter_scraper_without_api/__pycache__/*
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=42"]
3 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/src/twitter_scraper_without_api/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .scraping_utilities import *
3 | from .driver_utils import *
4 | from .driver_initialisation import *
5 | from .element_finder import *
6 | from .twitter_scraper import *


--------------------------------------------------------------------------------
/src/twitter_scraper_without_api/scraping_utilities.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from inspect import currentframe
 4 | import re
 5 | 
 6 | 
 7 | 
 8 | class Scraping_utilities:
 9 | 
10 |   @staticmethod
11 |   def __parse_name(string):
12 |     try:
13 |       return string.split("(")[0].strip()
14 |     except Exception as ex:
15 |       print("Error on line no: {}".format( ex))
16 | 
17 |   @staticmethod
18 |   def __extract_digits(string):
19 |     try:
20 |       return int(re.search(r'\d+', string).group(0))
21 |     except Exception as ex:
22 |       print("Error on line no.: {}".format( ex))
23 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | async-generator==1.10
 2 | attrs==21.4.0
 3 | beautifulsoup4==4.11.1
 4 | build==0.7.0
 5 | certifi==2022.5.18.1
 6 | cffi==1.15.0
 7 | charset-normalizer==2.0.12
 8 | colorama==0.4.4
 9 | cryptography==37.0.2
10 | distlib==0.3.4
11 | distro==1.7.0
12 | filelock==3.7.0
13 | h11==0.13.0
14 | idna==3.3
15 | importlib-metadata==4.11.3
16 | mozdownload==1.26.0
17 | mozfile==2.1.0
18 | mozinfo==1.2.2
19 | numpy==1.21.6
20 | outcome==1.1.0
21 | packaging==21.3
22 | pandas==1.3.5
23 | pep517==0.12.0
24 | platformdirs==2.5.2
25 | platinfo==0.15.0
26 | progressbar2==4.0.0
27 | py-firefox-driver-manager==0.0.4
28 | pycparser==2.21
29 | pyOpenSSL==22.0.0
30 | pyparsing==3.0.9
31 | PySocks==1.7.1
32 | python-dateutil==2.8.2
33 | python-utils==3.3.0
34 | pytz==2022.1
35 | pywin32-ctypes==0.2.0
36 | redo==2.0.3
37 | requests==2.27.1
38 | selenium==4.1.5
39 | six==1.16.0
40 | sniffio==1.2.0
41 | sortedcontainers==2.4.0
42 | soupsieve==2.3.2.post1
43 | style==1.1.0
44 | tomli==2.0.1
45 | treeherder-client==5.0.0
46 | trio==0.20.0
47 | trio-websocket==0.9.2
48 | typing_extensions==4.2.0
49 | update==0.0.1
50 | urllib3==1.26.9
51 | virtualenv==20.14.1
52 | wsproto==1.1.0
53 | zipp==3.8.0
54 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | import setuptools
 3 | import os
 4 | import sys
 5 | 
 6 | def read_file(filename):
 7 |     with open(os.path.join(os.path.dirname(__file__), filename)) as file:
 8 |         return file.read()
 9 | 
10 | thelibFolder = os.path.dirname(os.path.realpath(__file__))
11 | requirementPath = thelibFolder + '/requirements.txt'
12 | install_requires = [] # Here we'll get: ["gunicorn", "docutils>=0.3", "lxml==0.5a7"]
13 | if os.path.isfile(requirementPath):
14 |     with open(requirementPath,encoding='utf-8') as f:
15 |         install_requires = f.read().splitlines()
16 | 
17 | print(install_requires)
18 | setup(
19 |     name='twitter_scraper_without_api',
20 |     version='0.0.6',
21 |     license='',
22 |     author='Hamed',
23 |     author_email='hamed.minaei@gmail.com',
24 |     description='twitter_scraper without API',
25 |     long_description=read_file('README.MD'),
26 |     long_description_content_type="text/markdown",
27 |     url="https://github.com/HamedMinaeizaeim/twitter_scraper",
28 |     project_urls={
29 |         "Bug Tracker": "https://github.com/HamedMinaeizaeim/twitter_scraper/issues",
30 |     },
31 |     classifiers=[
32 |         "Programming Language :: Python :: 3",
33 |         "License :: OSI Approved :: MIT License",
34 |         "Operating System :: OS Independent",
35 |     ],
36 |     install_requires=install_requires,
37 |     packages=['twitter_scraper_without_api'],
38 |     package_dir={'': 'src'},
39 |     python_requires=">=3.6",
40 | )
41 | 


--------------------------------------------------------------------------------
/.github/workflows/python-app.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python application
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ main ]
 9 |   pull_request:
10 |     branches: [ main ]
11 | 
12 | permissions:
13 |   contents: read
14 | 
15 | jobs:
16 |   build:
17 | 
18 |     runs-on: ubuntu-latest
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v3
22 |     - name: Set up Python 3.10
23 |       uses: actions/setup-python@v3
24 |       with:
25 |         python-version: "3.10"
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         pip install flake8 pytest
30 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
31 |     - name: Lint with flake8
32 |       run: |
33 |         # stop the build if there are Python syntax errors or undefined names
34 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
35 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
36 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
37 |     #- name: Test with pytest
38 |     #  run: |
39 |     #    pytest
40 | 
41 |   deploy:
42 | 
43 |     runs-on: ubuntu-latest
44 | 
45 |     steps:
46 |     - uses: actions/checkout@v3
47 |     - name: Set up Python
48 |       uses: actions/setup-python@v3
49 |       with:
50 |         python-version: '3.x'
51 |     - name: Install dependencies
52 |       run: |
53 |         python -m pip install --upgrade pip
54 |         pip install build
55 |     - name: Build package
56 |       run: python -m build
57 |     - name: Publish package
58 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
59 |       with:
60 |         user: __token__
61 |         password: ${{ secrets.PYPI_API_TOKEN }}


--------------------------------------------------------------------------------
/src/twitter_scraper_without_api/driver_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | try:
 3 |     from selenium.webdriver.support.ui import WebDriverWait
 4 |     from selenium.webdriver.support import expected_conditions as EC
 5 |     from selenium.webdriver.common.by import By
 6 |     from selenium.common.exceptions import WebDriverException
 7 |     import time
 8 |     from selenium.webdriver.common.by import By
 9 |     from selenium.webdriver.support.ui import WebDriverWait
10 |     from selenium.webdriver.support import expected_conditions as EC
11 |     from selenium.webdriver.common.keys import Keys
12 |     from inspect import currentframe
13 |     from random import randint
14 | except Exception as ex:
15 |     frameinfo = currentframe()
16 |     print("Error on line no. {} : {}".format(frameinfo.f_lineno,ex))
17 | 
18 | frameinfo = currentframe()
19 | 
20 | class Utilities:
21 |   """this class contains all the method related to driver behaviour,
22 |   like scrolling, waiting for element to appear, it contains all static
23 |   method, which accepts driver instance as a argument"""
24 | 
25 |   @staticmethod
26 |   def __wait_until_tweets_appear(driver):
27 |     try:
28 |       WebDriverWait(driver, 10).until(EC.presence_of_element_located(
29 |         (By.CSS_SELECTOR, '[data-testid="tweet"]')))
30 |     except WebDriverException:
31 |       print("Tweets did not appear!")
32 | 
33 |   @staticmethod
34 |   def __scroll_down(driver):
35 |     try:
36 |       body = driver.find_element_by_css_selector('body')
37 |       for _ in range(3):
38 |         body.send_keys(Keys.PAGE_DOWN)
39 |     except Exception as ex:
40 |       print("Error on line no. {} : {}".format(frameinfo.f_lineno,ex))
41 | 
42 |   @staticmethod
43 |   def __wait_until_completion(driver):
44 |     """waits until the page have completed loading"""
45 |     try:
46 |       state = ""
47 |       while state != "complete":
48 |         time.sleep(randint(3, 5))
49 |         state = driver.execute_script("return document.readyState")
50 |     except Exception as ex:
51 |       print(ex)
52 | 


--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
 1 | <h1> Twitter scraper selenium </h1>
 2 | <p> Python's package to scrape Twitter's front-end easily with selenium.  </p>
 3 | 
 4 | 
 5 | [![PyPI license](https://img.shields.io/pypi/l/ansicolortags.svg)](https://opensource.org/licenses/MIT) [![Python >=3.6.9](https://img.shields.io/badge/python-3.6+-blue.svg)](https://www.python.org/downloads/release/python-360/)
 6 | [![Maintenance](https://img.shields.io/badge/Maintained-Yes-green.svg)](https://github.com/shaikhsajid1111/facebook_page_scraper/graphs/commit-activity)
 7 | 
 8 | # Twitter_scraper_without_API
 9 | 
10 | This code was developed to extract information from twitter without using API as there are a limitation and costs for using official twitter API. You can extract based on your keyword and time frame (in minutes). You can extract unlimitted number of tweets. 
11 | 
12 | 
13 | ## Pre-requests
14 | 
15 |  - Python 3.6+
16 |  - Browsers(Firefox)
17 | 
18 | ## Instalation 
19 | 
20 | you can install from source code using 
21 | 
22 |     git clone https://github.com/HamedMinaeizaeim/twitter_scraper_without_API.git
23 |  and then run 
24 |  
25 | 
26 |     Python setup.py install 
27 |    or you can run 
28 |    
29 | 
30 |     pip install -r requirements.txt
31 | alternatively, you can install using **PyPl** : 
32 | 
33 |    
34 | 
35 |      pip install twitter_scraper_without_API
36 |     
37 | 
38 | 
39 | 
40 | 
41 | ## How to use 
42 | 
43 | To use this library, you just need to import the TwitterScraper scraper class and then specify your keyword search. By default, it will return all tweets within a minute. You can change it to extract tweets in the last n minutes. Here is a code to do that: 
44 | 
45 |      from src.twitter_scraper_without_api import TwitterScraper
46 |      twitter = TwitterScraper('bitcoin')
47 |      twitter.last_n_mins = 3
48 |      twitter.fetch_data()
49 | 
50 | ## Export option
51 | 
52 | You can export data as json, panda (Dataframe) and csv
53 | 
54 |     df = twitter.store_data('dataFrame')
55 |     csv = twitter.store_data('csv')
56 |     json = twitter.store_data('json')
57 | 
58 | 
59 | ## Privacy
60 | 
61 | There is no issue with privacy in this library and search is based on publicly avaialble information 
62 | 


--------------------------------------------------------------------------------
/src/twitter_scraper_without_api/driver_initialisation.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | # to add capabilities for chrome and firefox, import their Options with different aliases
 3 | from selenium.webdriver.chrome.options import Options as ChromeOptions
 4 | from selenium.webdriver.firefox.options import Options as FirefoxOptions
 5 | # import webdriver for downloading respective driver for the browser
 6 | 
 7 | from py_firefox_driver_manager import GeckoFireFoxdriverManager
 8 | 
 9 | class DriverInitilizer:
10 |     def __init__(self, proxy=None):
11 |         self.proxy = proxy
12 | 
13 |     def set_properties(self, browser_option):
14 | 
15 |         browser_option.add_argument(
16 |             '--headless')  # runs browser in headless mode
17 |         browser_option.add_argument('--no-sandbox')
18 |         browser_option.add_argument("--disable-dev-shm-usage")
19 |         browser_option.add_argument('--ignore-certificate-errors')
20 |         browser_option.add_argument('--disable-gpu')
21 |         browser_option.add_argument('--log-level=3')
22 |         browser_option.add_argument('--disable-notifications')
23 |         browser_option.add_argument('--disable-popup-blocking')
24 |         return browser_option
25 | 
26 |     def setup_profile(self):
27 |         """
28 |         This code is setup the profile
29 |         :param fileLocation: location of file to be save
30 |         :return profile:
31 |         """
32 |         profile = webdriver.FirefoxProfile()
33 |         #profile.set_preference("browser.download.dir", self.file_location);
34 |         profile.set_preference("browser.download.folderList", 2);
35 |         profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
36 |                                "application/csv,application/excel,application/vnd.msexcel,application/vnd.ms-excel,text/anytext,text/comma-separated-values,text/csv,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/octet-stream");
37 |         profile.set_preference("browser.download.manager.showWhenStarting", False);
38 |         profile.set_preference("browser.helperApps.neverAsk.openFile",
39 |                                "application/csv,application/excel,application/vnd.msexcel,application/vnd.ms-excel,text/anytext,text/comma-separated-values,text/csv,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/octet-stream");
40 |         profile.set_preference("browser.helperApps.alwaysAsk.force", False);
41 |         profile.set_preference("browser.download.manager.useWindow", False);
42 |         profile.set_preference("browser.download.manager.focusWhenStarting", False);
43 |         profile.set_preference("browser.download.manager.alertOnEXEOpen", False);
44 |         profile.set_preference("browser.download.manager.showAlertOnComplete", False);
45 |         profile.set_preference("browser.download.manager.closeWhenDone", True);
46 |         profile.set_preference("pdfjs.disabled", True)
47 |         profile.set_preference('permissions.default.stylesheet', 2)
48 |         profile.set_preference('permissions.default.image', 2)
49 |         profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false')
50 |         profile.set_preference("http.response.timeout", 500)
51 |         profile.set_preference("dom.max_script_run_time", 500)
52 |         return profile
53 | 
54 |     def set_driver_for_browser(self):
55 |         """expects browser name and returns a driver instance"""
56 |         browser_option = FirefoxOptions()
57 |         if self.proxy is not None:
58 |             options = {
59 |                 'https': 'https://{}'.format(self.proxy.replace(" ", "")),
60 |                 'http': 'http://{}'.format(self.proxy.replace(" ", "")),
61 |                 'no_proxy': 'localhost, 127.0.0.1'
62 |             }
63 | 
64 |             return webdriver.Firefox(executable_path=GeckoFireFoxdriverManager().install_geckodriver(),
65 |                                      options=self.set_properties(browser_option), seleniumwire_options=options)
66 | 
67 | 
68 |         return webdriver.Firefox(executable_path=GeckoFireFoxdriverManager().install_geckodriver(),
69 |                                  options=self.set_properties(browser_option))
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/src/twitter_scraper_without_api/element_finder.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | from selenium.common.exceptions import NoSuchElementException
  4 | from .scraping_utilities import Scraping_utilities
  5 | from inspect import currentframe
  6 | from dateutil.parser import parse
  7 | 
  8 | 
  9 | 
 10 | class Finder:
 11 |   """
 12 |   this class should contain all  the static method to find that accept
 13 |   webdriver instance and perform operation to find elements and return the
 14 |   found element.
 15 |   method should follow convention like so:
 16 | 
 17 |   @staticmethod
 18 |   def __method_name(parameters):
 19 |   """
 20 | 
 21 |   @staticmethod
 22 |   def __fetch_all_tweets(driver):
 23 |     try:
 24 |       return driver.find_elements_by_css_selector('[data-testid="tweet"]')
 25 |     except Exception as ex:
 26 |       print("Error at method fetch_all_tweets on line no : {}".format(ex))
 27 | 
 28 |   @staticmethod
 29 |   def __find_replies(tweet):
 30 |     try:
 31 |       replies_element = tweet.find_element_by_css_selector('[data-testid="reply"]')
 32 |       replies = replies_element.get_attribute("aria-label")
 33 |       return Scraping_utilities._Scraping_utilities__extract_digits(replies)
 34 |     except Exception as ex:
 35 |       print("Error at method find_replies on line no : {}".format( ex))
 36 |       return ""
 37 | 
 38 |   @staticmethod
 39 |   def __find_shares(tweet):
 40 |     try:
 41 |       shares_element = tweet.find_element_by_css_selector('[data-testid="retweet"]')
 42 |       shares =  shares_element.get_attribute("aria-label")
 43 |       return Scraping_utilities._Scraping_utilities__extract_digits(shares)
 44 |     except Exception as ex:
 45 |       print("Error at method find_shares on line no: {}".format( ex))
 46 |       return ""
 47 | 
 48 |   @staticmethod
 49 |   def __find_status(tweet):
 50 |     try:
 51 |       anchor = tweet.find_element_by_css_selector("a.r-bcqeeo.r-3s2u2q.r-qvutc0")
 52 |       return (anchor.get_attribute("href").split("/"), anchor.get_attribute("href"))
 53 |     except Exception as ex:
 54 |       print("Error at method find_status on line no: {}".format( ex))
 55 |       return []
 56 | 
 57 |   @staticmethod
 58 |   def __find_all_anchor_tags(tweet):
 59 |     try:
 60 |       return tweet.find_elements_by_tag_name('a')
 61 |     except Exception as ex:
 62 |       print("Error at method find_all_anchor_tags on line no : {}".format(
 63 |           ex))
 64 | 
 65 |   @staticmethod
 66 |   def __find_timestamp(tweet):
 67 |     try:
 68 |       timestamp = tweet.find_element_by_tag_name(
 69 |           "time").get_attribute("datetime")
 70 |       #posted_time = parse(timestamp).isoformat()
 71 |       return timestamp
 72 |     except Exception as ex:
 73 |       print("Error at method find_timestamp on line no.: {}".format(
 74 |            ex))
 75 | 
 76 | 
 77 |   @staticmethod
 78 |   def __find_content(tweet):
 79 |     try:
 80 |       #content_element = tweet.find_element_by_css_selector('.//*[@dir="auto"]')[4]
 81 |       content_element = tweet.find_element_by_css_selector('div[lang]')
 82 |       return content_element.text
 83 |     except NoSuchElementException:
 84 |       return ""
 85 |     except Exception as ex:
 86 |       print("Error at method find_content on line no: {}".format(
 87 |           ex))
 88 | 
 89 |   @staticmethod
 90 |   def __find_like(tweet):
 91 |     try:
 92 |       like_element = tweet.find_element_by_css_selector('[data-testid="like"]')
 93 |       likes = like_element.get_attribute("aria-label")
 94 |       return Scraping_utilities._Scraping_utilities__extract_digits(likes)
 95 |     except Exception as ex:
 96 |       print("Error at method find_like on line no: {}".format(
 97 |            ex))
 98 |   @staticmethod
 99 |   def __find_images(tweet):
100 |     try:
101 |       image_element = tweet.find_elements_by_css_selector(
102 |           'div[data-testid="tweetPhoto"]')
103 |       images = []
104 |       for image_div in image_element:
105 |         href = image_div.find_element_by_tag_name("img").get_attribute("src")
106 |         images.append(href)
107 |       return images
108 |     except Exception as ex:
109 |       print("Error at method __find_images on line no : {}".format(
110 |           ex))
111 | 
112 |   @staticmethod
113 |   def __find_videos(tweet):
114 |     try:
115 |       image_element = tweet.find_elements_by_css_selector(
116 |           'div[data-testid="videoPlayer"]')
117 |       videos = []
118 |       for video_div in image_element:
119 |         href = video_div.find_element_by_tag_name("video").get_attribute("src")
120 |         videos.append(href)
121 |       return videos
122 |     except Exception as ex:
123 |       print("Error at method find_videos on line no: {}".format(
124 |            ex))
125 | 
126 |   @staticmethod
127 |   def __is_retweet(tweet):
128 |     try:
129 |       tweet.find_element_by_css_selector('div.r-92ng3h.r-qvutc0')
130 |       return True
131 |     except NoSuchElementException:
132 |       return False
133 |     except Exception as ex:
134 |       print("Error at method is_retweet on line no: {}".format(
135 |           ex))
136 |       return False
137 | 
138 |   @staticmethod
139 |   def __find_name_from_post(tweet,is_retweet=False):
140 |     try:
141 |       name = "NA"
142 |       anchors = Finder.__find_all_anchor_tags(tweet)
143 |       if len(anchors) > 2:
144 |         if is_retweet:
145 |           name = anchors[2].text.strip()
146 |         else:
147 |           name = anchors[1].text.split("\n")[0]
148 |       return name
149 |     except Exception as ex:
150 |       print("Error at method __find_name_from_post on line no: {}".format(
151 |           ex))
152 | 
153 |   @staticmethod
154 |   def __find_external_link(tweet):
155 |     try:
156 |       card = tweet.find_element_by_css_selector('[data-testid="card.wrapper"]')
157 |       href = card.find_element_by_tag_name('a')
158 |       return href.get_attribute("href")
159 | 
160 |     except NoSuchElementException:
161 |       return ""
162 |     except Exception as ex:
163 |       print("Error at method __find_external_link on line no: {}".format(
164 |           ex))
165 | 
166 | 


--------------------------------------------------------------------------------
/src/twitter_scraper_without_api/twitter_scraper.py:
--------------------------------------------------------------------------------
  1 | from selenium import webdriver
  2 | from selenium.webdriver.support.ui import Select
  3 | import time
  4 | import pytz
  5 | from selenium.webdriver.common.by import By
  6 | from selenium.webdriver.support.ui import WebDriverWait
  7 | from selenium.webdriver.support import expected_conditions as EC
  8 | from selenium.common.exceptions import TimeoutException
  9 | from selenium.webdriver.firefox.options import Options
 10 | from selenium.webdriver.common.action_chains import ActionChains
 11 | import time
 12 | from datetime import datetime
 13 | import datetime
 14 | from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
 15 | from datetime import datetime, timedelta
 16 | from selenium.webdriver.common.keys import Keys
 17 | from bs4 import BeautifulSoup
 18 | import pandas as pd
 19 | import numpy as np
 20 | from urllib.parse import quote
 21 | from .element_finder import Finder
 22 | from .driver_initialisation import DriverInitilizer
 23 | from .driver_utils import Utilities
 24 | import re, json, os, csv
 25 | import dateutil
 26 | 
 27 | class TwitterScraper:
 28 |     
 29 |     def __init__(self, keyword):
 30 |         self.keyword = keyword
 31 |         self.since = self.set_since()
 32 |         self.until = self.set_untill()
 33 |         self.url = "https://twitter.com/search?q={}%20until%3A{}%20since%3A{}&src=typed_query&f=live".format(
 34 |             quote(keyword), self.until, self.since)
 35 |         self.driver = self.setup_driver()
 36 |         self.retry = 10
 37 |         self.data = {}
 38 |         self._last_n_mins = 1
 39 | 
 40 |     def __repr__(self):
 41 |         return "TwitterScraper('bitcoin', 60 )"
 42 | 
 43 |     def __str__(self):
 44 |         return ""
 45 | 
 46 |     @property
 47 |     def last_n_mins(self):
 48 |         return self._last_n_mins
 49 | 
 50 |     @last_n_mins.setter
 51 |     def last_n_mins(self, value):
 52 |         if str(value).isnumeric():
 53 |             self._last_n_mins = value
 54 |         else:
 55 |             print("you must enter numeric value in mints - 1 mins defult value was replaced")
 56 |             self._last_n_mins = 1
 57 | 
 58 |     @staticmethod
 59 |     def str_to_datetime(str_datetime):
 60 |         datetime_old_zone = dateutil.parser.isoparse(str_datetime)
 61 |         #datetime_old_zone = datetime.strptime(str_datetime, "%Y-%m-%dT%H:%M:%S.%z")
 62 |         nz_datetime_time = datetime_old_zone.replace(tzinfo=pytz.utc).astimezone(pytz.timezone("Pacific/Auckland"))
 63 |         return nz_datetime_time
 64 | 
 65 |     @staticmethod
 66 |     def convert_json_to_dataframe(json_data):
 67 |         df=[]
 68 |         for key in json_data:
 69 |             df=[pd.json_normalize(json_data[key]) for key in json_data]
 70 |         return pd.concat(df)
 71 | 
 72 |     def set_since(self):
 73 |         yesterday = datetime.now()-timedelta(days=1)
 74 |         return yesterday.strftime('%Y-%m-%d')
 75 | 
 76 |     def set_untill(self):
 77 |         tomorrow = datetime.now()+timedelta(days=1)
 78 |         return tomorrow.strftime('%Y-%m-%d')
 79 | 
 80 |     def __check_tweets_presence(self, tweet_list):
 81 |         if len(tweet_list) <= 0:
 82 |             self.retry -= 1
 83 | 
 84 |     def __check_retry(self):
 85 |         return self.retry <= 0
 86 | 
 87 |     def setup_driver(self):
 88 |         # driver = webdriver.Firefox(
 89 |         #     executable_path=r"C:\Users\Hamed\PycharmProjects\Twitter_Scraper\geckodriver.exe",
 90 |         #     firefox_profile=self.setup_profile())
 91 |         firefox = DriverInitilizer()
 92 |         driver = firefox.set_driver_for_browser()
 93 |         driver.get(self.url)
 94 |         driver.set_page_load_timeout(6000)
 95 |         return driver
 96 | 
 97 | 
 98 |     def obtain_info_from_tweet(self, tweet):
 99 |         name = Finder._Finder__find_name_from_post(tweet)
100 |         status, tweet_url = Finder._Finder__find_status(tweet)
101 |         replies = Finder._Finder__find_replies(tweet)
102 |         retweets = Finder._Finder__find_shares(tweet)
103 |         username = tweet_url.split("/")[3]
104 |         status = status[-1]
105 |         is_retweet = Finder._Finder__is_retweet(tweet)
106 |         posted_time = Finder._Finder__find_timestamp(tweet)
107 |         posted_time = TwitterScraper.str_to_datetime(posted_time)
108 |         content = Finder._Finder__find_content(tweet)
109 |         likes = Finder._Finder__find_like(tweet)
110 |         images = Finder._Finder__find_images(tweet)
111 |         videos = Finder._Finder__find_videos(tweet)
112 |         hashtags = re.findall(r"#(\w+)", content)
113 |         mentions = re.findall(r"@(\w+)", content)
114 |         profile_picture = "https://twitter.com/{}/photo".format(username)
115 |         link = Finder._Finder__find_external_link(tweet)
116 |         return link, profile_picture, mentions, hashtags,\
117 |                videos, images, likes, content, posted_time,\
118 |                is_retweet, status, username, retweets, replies,\
119 |                tweet_url, name
120 | 
121 | 
122 |     def update_tweet_data(self, link, profile_picture, mentions, hashtags,
123 |                videos, images, likes, content, posted_time,
124 |                is_retweet, status, username, retweets, replies,
125 |                tweet_url, name):
126 |         self.data[status] = {
127 |             "tweet_id": status,
128 |             "username": username,
129 |             "name": name,
130 |             "profile_picture": profile_picture,
131 |             "replies": replies,
132 |             "retweets": retweets,
133 |             "likes": likes,
134 |             "is_retweet": is_retweet,
135 |             "posted_time": posted_time,
136 |             "content": content,
137 |             "hashtags": hashtags,
138 |             "mentions": mentions,
139 |             "images": images,
140 |             "videos": videos,
141 |             "tweet_url": tweet_url,
142 |             "link": link
143 |         }
144 | 
145 |     def fetch_data(self):
146 |         #try:
147 |         all_ready_fetched_posts = []
148 |         time.sleep(4)
149 |         present_tweets = Finder._Finder__fetch_all_tweets(self.driver)
150 |         self.__check_tweets_presence(present_tweets)
151 |         all_ready_fetched_posts.extend(present_tweets)
152 |         latest_time_now = datetime.now()
153 |         latest_time_now = latest_time_now.replace(tzinfo=None).astimezone(pytz.timezone("Pacific/Auckland"))
154 |         ref_date_time = latest_time_now-timedelta(minutes=self._last_n_mins)
155 | 
156 |         while (latest_time_now-ref_date_time).total_seconds()>0:
157 | 
158 |             for tweet in present_tweets:
159 | 
160 |                 link, profile_picture, mentions, hashtags, \
161 |                 videos, images, likes, content, posted_time, \
162 |                 is_retweet, status, username, retweets, replies, \
163 |                 tweet_url, name = self.obtain_info_from_tweet(tweet)
164 |                 self.update_tweet_data(link, profile_picture, mentions, hashtags,
165 |                                        videos, images, likes, content, posted_time,
166 |                                        is_retweet, status, username, retweets, replies,
167 |                                        tweet_url, name)
168 | 
169 |                 if (posted_time-latest_time_now).total_seconds()<0:
170 |                     latest_time_now = posted_time
171 | 
172 |             Utilities._Utilities__scroll_down(self.driver)
173 |             Utilities._Utilities__wait_until_completion(self.driver)
174 |             Utilities._Utilities__wait_until_tweets_appear(self.driver)
175 |             present_tweets = Finder._Finder__fetch_all_tweets(self.driver)
176 |             present_tweets = [post for post in present_tweets if post not in all_ready_fetched_posts]
177 |             self.__check_tweets_presence(present_tweets)
178 |             all_ready_fetched_posts.extend(present_tweets)
179 |             if self.__check_retry() is True:
180 |                break
181 |         self.driver.quit()
182 | 
183 |     def store_data(self, format='Json'):
184 |         if format.lower()=='json':
185 |             return self.data
186 |         elif format.lower()=='dataframe':
187 |             return TwitterScraper.convert_json_to_dataframe(self.data)
188 |         elif format.lower()=='csv':
189 |             df = TwitterScraper.convert_json_to_dataframe(self.data)
190 |             return df.to_csv()
191 |         else:
192 |             print("it dose not sopport that format")
193 | 
194 | 
195 | 


--------------------------------------------------------------------------------