├── MANIFEST.in
├── mescrappy
├── scrappy
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ └── scrapper.cpython-37.pyc
│ └── scrapper.py
├── __pycache__
│ ├── __main__.cpython-37.pyc
│ └── scrapper.cpython-37.pyc
├── __main__.py
└── test.py
├── mescrappy.egg-info
├── top_level.txt
├── dependency_links.txt
├── SOURCES.txt
└── PKG-INFO
├── requirements.txt
├── dist
├── mescrappy-1.0.0.tar.gz
└── mescrappy-1.0.0-py3-none-any.whl
├── Annotation 2021-08-17 204528.png
├── pyproject.toml
├── .gitignore
├── setup.cfg
├── LICENCE
└── README.md
/MANIFEST.in:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/mescrappy/scrappy/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/mescrappy.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/mescrappy.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | autopep8==1.5.7
2 | pycodestyle==2.7.0
3 | selenium==3.141.0
4 | toml==0.10.2
5 | urllib3==1.26.6
6 |
--------------------------------------------------------------------------------
/dist/mescrappy-1.0.0.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MerlinEmris/youtube_srapping_with_python/HEAD/dist/mescrappy-1.0.0.tar.gz
--------------------------------------------------------------------------------
/Annotation 2021-08-17 204528.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MerlinEmris/youtube_srapping_with_python/HEAD/Annotation 2021-08-17 204528.png
--------------------------------------------------------------------------------
/dist/mescrappy-1.0.0-py3-none-any.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MerlinEmris/youtube_srapping_with_python/HEAD/dist/mescrappy-1.0.0-py3-none-any.whl
--------------------------------------------------------------------------------
/mescrappy/__pycache__/__main__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MerlinEmris/youtube_srapping_with_python/HEAD/mescrappy/__pycache__/__main__.cpython-37.pyc
--------------------------------------------------------------------------------
/mescrappy/__pycache__/scrapper.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MerlinEmris/youtube_srapping_with_python/HEAD/mescrappy/__pycache__/scrapper.cpython-37.pyc
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "selenium==3.141.0",
4 | "setuptools>=54",
5 | "wheel"
6 | ]
7 | build-backend = "setuptools.build_meta"
--------------------------------------------------------------------------------
/mescrappy/scrappy/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MerlinEmris/youtube_srapping_with_python/HEAD/mescrappy/scrappy/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/mescrappy/scrappy/__pycache__/scrapper.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MerlinEmris/youtube_srapping_with_python/HEAD/mescrappy/scrappy/__pycache__/scrapper.cpython-37.pyc
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /.idea
3 | /shelf/
4 | /workspace.xml
5 | # Datasource local storage ignored files
6 | /dataSources/
7 | /dataSources.local.xml
8 | # Editor-based HTTP Client requests
9 | /httpRequests/
10 |
--------------------------------------------------------------------------------
/mescrappy.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | LICENCE
2 | MANIFEST.in
3 | README.md
4 | pyproject.toml
5 | setup.cfg
6 | mescrappy.egg-info/PKG-INFO
7 | mescrappy.egg-info/SOURCES.txt
8 | mescrappy.egg-info/dependency_links.txt
9 | mescrappy.egg-info/top_level.txt
--------------------------------------------------------------------------------
/mescrappy/__main__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from scrappy.scrapper import youtube_video_data_scrapper as yvds
3 |
4 |
5 | if __name__ == "__main__":
6 | url = sys.argv[1]
7 | driver = sys.argv[2]
8 | print(url, driver)
9 | # url = 'https://www.youtube.com/watch?v=rMO7APyBiMI'
10 | # driver = r"C:\Users\ME\projects\for_github\chromedriver_win32\chromedriver.exe"
11 | print(yvds(url, driver))
12 |
--------------------------------------------------------------------------------
/mescrappy/test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrappy.scrapper import youtube_video_data_scrapper
3 |
4 |
5 | class TestScrapper(unittest.TestCase):
6 | def test_scrap(self):
7 | url = 'https://www.youtube.com/watch?v=TFMnICdHiyM'
8 | driver = r"C:\Users\ME\projects\for_github\chromedriver_win32\chromedriver.exe"
9 | self.assertAlmostEqual(
10 | youtube_video_data_scrapper(url=url, driver=driver)['title'], "Xiaomi Mi 11 Lite vs Samsung A52: SIMILAR BUT ONLY ONE WINNER! Let's Find Out!")
11 |
12 |
13 | if __name__ == '__main__':
14 | unittest.main()
15 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = mescrappy
3 | version = 1.0.0
4 | author = Merdan Chariyarov
5 | author_email = merdanchariyarov@gmail.com
6 | description = python selenium youtube scrapper
7 | long_description = file: README.md
8 | long_description_content_type = text/markdown
9 | url = https://github.com/MerlinEmris/youtube_srapping_with_python
10 | classifiers =
11 | Programming Language :: Python :: 3
12 | License :: OSI Approved :: MIT License
13 | Operating System :: OS Independent
14 |
15 | [options]
16 | packages = find:
17 | python_requires = >=3.7
18 | include_package_data = True
--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
1 |
2 | MIT License
3 |
4 | Copyright (c) 2021 Merdan Chariyarov
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
--------------------------------------------------------------------------------
/mescrappy/scrappy/scrapper.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.webdriver.common.by import By
3 | from selenium.webdriver.support.ui import WebDriverWait
4 | from selenium.webdriver.support import expected_conditions as EC
5 |
6 |
7 | def youtube_video_data_scrapper(url: str, driver: str) -> dict:
8 | f"""
9 | youtube video data scrapper
10 | :param url: video url starting with https://
11 | :param driver:path to browser driver for selenium
12 | :return: dict[str: str]
13 | """
14 | chrome_path = driver
15 | driver = webdriver.Chrome(chrome_path)
16 | driver.get(url)
17 |
18 | title = driver.find_element_by_xpath(
19 | '/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[6]/div[2]/ytd-video-primary-info-renderer/div/h1/yt-formatted-string').text
20 | description = driver.find_element_by_xpath(
21 | '//*[@id="description"]/yt-formatted-string').text
22 | owner = driver.find_element_by_xpath(
23 | '/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[7]/div[2]/ytd-video-secondary-info-renderer/div/div/ytd-video-owner-renderer/div[1]/ytd-channel-name/div/div/yt-formatted-string/a').text
24 | views = driver.find_element_by_xpath(
25 | '/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[6]/div[2]/ytd-video-primary-info-renderer/div/div/div[1]/div[1]/ytd-video-view-count-renderer/span[1]').text.split('views')[0].strip().strip(',')
26 | date = driver.find_element_by_xpath(
27 | '//*[@id="info-strings"]/yt-formatted-string').text
28 | like = driver.find_element_by_xpath(
29 | '/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[6]/div[2]/ytd-video-primary-info-renderer/div/div/div[3]/div/ytd-menu-renderer/div/ytd-toggle-button-renderer[1]/a/yt-formatted-string').text
30 | dislike = driver.find_element_by_xpath(
31 | '/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[6]/div[2]/ytd-video-primary-info-renderer/div/div/div[3]/div/ytd-menu-renderer/div/ytd-toggle-button-renderer[2]/a/yt-formatted-string').text
32 |
33 | data = {
34 | 'title': title,
35 | "description": description,
36 | 'owner': owner,
37 | 'date': date,
38 | "views": views,
39 | 'like': like,
40 | 'dislike': dislike
41 | }
42 | driver.close()
43 | return data
44 |
--------------------------------------------------------------------------------
/mescrappy.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 2.1
2 | Name: mescrappy
3 | Version: 1.0.0
4 | Summary: python selenium youtube scrapper
5 | Home-page: https://github.com/MerlinEmris/youtube_srapping_with_python
6 | Author: Merdan Chariyarov
7 | Author-email: merdanchariyarov@gmail.com
8 | License: MIT
9 | Platform: Independent
10 | Classifier: Programming Language :: Python :: 3
11 | Classifier: License :: OSI Approved :: MIT License
12 | Classifier: Operating System :: OS Independent
13 | Requires-Python: >=3.7
14 | Description-Content-Type: text/markdown
15 | License-File: LICENCE
16 |
17 | # mescrappy - Python + Selenium **Youtube** scrapper
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
6 |
7 |
8 |
9 |
10 |