├── .env
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
└── workflows
│ └── docker-image.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Dockerfile
├── FUNDING.yml
├── LICENSE
├── README copy.md
├── README.md
├── SECURITY.md
├── app.py
├── contents.json
├── data.json
├── geckodriver.log
├── requirements.txt
├── results
├── berita terbaru_scroll-2_20230203_172012.txt
└── jpu_scroll-1_20230204_193840.txt
├── sample_output.png
└── yt_scraper_sroll
└── __init__.py
/.env:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhohirpradana/Python-Youtube-Search-Scraper/503036505aa64e966b3f2032341dd954fd5ccefa/.env
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Desktop (please complete the following information):**
27 | - OS: [e.g. iOS]
28 | - Browser [e.g. chrome, safari]
29 | - Version [e.g. 22]
30 |
31 | **Smartphone (please complete the following information):**
32 | - Device: [e.g. iPhone6]
33 | - OS: [e.g. iOS8.1]
34 | - Browser [e.g. stock browser, safari]
35 | - Version [e.g. 22]
36 |
37 | **Additional context**
38 | Add any other context about the problem here.
39 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/workflows/docker-image.yml:
--------------------------------------------------------------------------------
1 | name: Docker Image CI
2 |
3 | on:
4 | push:
5 | branches: [ "master" ]
6 | pull_request:
7 | branches: [ "master" ]
8 |
9 | jobs:
10 |
11 | build:
12 |
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v3
17 | - name: Build the Docker image
18 | run: docker build . --file Dockerfile --tag my-image-name:$(date +%s)
19 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # .env
2 | __pycache__
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our
6 | community a harassment-free experience for everyone, regardless of age, body
7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
8 | identity and expression, level of experience, education, socio-economic status,
9 | nationality, personal appearance, race, religion, or sexual identity
10 | and orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open, welcoming,
13 | diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for our
18 | community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our mistakes,
24 | and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the
26 | overall community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or
31 | advances of any kind
32 | * Trolling, insulting or derogatory comments, and personal or political attacks
33 | * Public or private harassment
34 | * Publishing others' private information, such as a physical or email
35 | address, without their explicit permission
36 | * Other conduct which could reasonably be considered inappropriate in a
37 | professional setting
38 |
39 | ## Enforcement Responsibilities
40 |
41 | Community leaders are responsible for clarifying and enforcing our standards of
42 | acceptable behavior and will take appropriate and fair corrective action in
43 | response to any behavior that they deem inappropriate, threatening, offensive,
44 | or harmful.
45 |
46 | Community leaders have the right and responsibility to remove, edit, or reject
47 | comments, commits, code, wiki edits, issues, and other contributions that are
48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
49 | decisions when appropriate.
50 |
51 | ## Scope
52 |
53 | This Code of Conduct applies within all community spaces, and also applies when
54 | an individual is officially representing the community in public spaces.
55 | Examples of representing our community include using an official e-mail address,
56 | posting via an official social media account, or acting as an appointed
57 | representative at an online or offline event.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported to the community leaders responsible for enforcement at
63 | .
64 | All complaints will be reviewed and investigated promptly and fairly.
65 |
66 | All community leaders are obligated to respect the privacy and security of the
67 | reporter of any incident.
68 |
69 | ## Enforcement Guidelines
70 |
71 | Community leaders will follow these Community Impact Guidelines in determining
72 | the consequences for any action they deem in violation of this Code of Conduct:
73 |
74 | ### 1. Correction
75 |
76 | **Community Impact**: Use of inappropriate language or other behavior deemed
77 | unprofessional or unwelcome in the community.
78 |
79 | **Consequence**: A private, written warning from community leaders, providing
80 | clarity around the nature of the violation and an explanation of why the
81 | behavior was inappropriate. A public apology may be requested.
82 |
83 | ### 2. Warning
84 |
85 | **Community Impact**: A violation through a single incident or series
86 | of actions.
87 |
88 | **Consequence**: A warning with consequences for continued behavior. No
89 | interaction with the people involved, including unsolicited interaction with
90 | those enforcing the Code of Conduct, for a specified period of time. This
91 | includes avoiding interactions in community spaces as well as external channels
92 | like social media. Violating these terms may lead to a temporary or
93 | permanent ban.
94 |
95 | ### 3. Temporary Ban
96 |
97 | **Community Impact**: A serious violation of community standards, including
98 | sustained inappropriate behavior.
99 |
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 |
106 | ### 4. Permanent Ban
107 |
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 |
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 |
115 | ## Attribution
116 |
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 |
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 |
124 | [homepage]: https://www.contributor-covenant.org
125 |
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | Contributing
2 | When contributing to this repository, please first discuss the change you wish to make via issue, email, or any other method with the owners of this repository before making a change.
3 |
4 | Please note we have a code of conduct, please follow it in all your interactions with the project.
5 |
6 | Pull Request Process
7 | Ensure any install or build dependencies are removed before the end of the layer when doing a build.
8 | Update the README.md with details of changes to the interface, this includes new environment variables, exposed ports, useful file locations and container parameters.
9 | Increase the version numbers in any examples files and the README.md to the new version that this Pull Request would represent. The versioning scheme we use is SemVer.
10 | You may merge the Pull Request in once you have the sign-off of two other developers, or if you do not have permission to do that, you may request the second reviewer to merge it for you.
11 | Code of Conduct
12 | Our Pledge
13 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
14 |
15 | Our Standards
16 | Examples of behavior that contributes to creating a positive environment include:
17 |
18 | Using welcoming and inclusive language
19 | Being respectful of differing viewpoints and experiences
20 | Gracefully accepting constructive criticism
21 | Focusing on what is best for the community
22 | Showing empathy towards other community members
23 | Examples of unacceptable behavior by participants include:
24 |
25 | The use of sexualized language or imagery and unwelcome sexual attention or advances
26 | Trolling, insulting/derogatory comments, and personal or political attacks
27 | Public or private harassment
28 | Publishing others' private information, such as a physical or electronic address, without explicit permission
29 | Other conduct which could reasonably be considered inappropriate in a professional setting
30 | Our Responsibilities
31 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
32 |
33 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
34 |
35 | Scope
36 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
37 |
38 | Enforcement
39 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at [INSERT EMAIL ADDRESS]. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
40 |
41 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
42 |
43 | Attribution
44 | This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at http://contributor-covenant.org/version/1/4
45 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine
2 |
3 | WORKDIR /app
4 | COPY requirements.txt requirements.txt
5 |
6 | RUN echo "http://dl-cdn.alpinelinux.org/alpine/edge/community" > /etc/apk/repositories
7 | RUN echo "http://dl-cdn.alpinelinux.org/alpine/edge/main" >> /etc/apk/repositories
8 |
9 | RUN apk update
10 | RUN apk add chromium chromium-chromedriver
11 |
12 | RUN apk add python3
13 | RUN apk add py3-pip
14 |
15 | RUN pip install -r requirements.txt
16 |
17 | COPY . .
18 |
19 | ENV FLASK_RUN_HOST=0.0.0.0
20 |
21 | CMD ["python3", "-m", "flask", "run"]
22 |
--------------------------------------------------------------------------------
/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [dhohirpradana]
2 | custom: ["https://mediasiana.id"]
3 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Dhohir Pradana
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README copy.md:
--------------------------------------------------------------------------------
1 | # khayangan-blockchain
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python-Youtube-Search-Scraper
2 |
3 | Scrape YouTube video with details whatever scroll you want
4 |
5 | 
6 |
7 | [](https://github.com/dhohirpradana/Python-Youtube-Search-Scraper/actions/workflows/docker-image.yml)
8 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | Use this section to tell people about which versions of your project are
6 | currently being supported with security updates.
7 |
8 | | Version | Supported |
9 | | ------- | ------------------ |
10 | | 5.1.x | :white_check_mark: |
11 | | 5.0.x | :x: |
12 | | 4.0.x | :white_check_mark: |
13 | | < 4.0 | :x: |
14 |
15 | ## Reporting a Vulnerability
16 |
17 | Use this section to tell people how to report a vulnerability.
18 |
19 | Tell them where to go, how often they can expect to get an update on a
20 | reported vulnerability, what to expect if the vulnerability is accepted or
21 | declined, etc.
22 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | import os
2 | from sys import stderr
3 |
4 | from flask import Flask, jsonify, request
5 | # from flask_cors import CORS
6 |
7 | from yt_scraper_sroll import handler as yt_scraper_sroll_handler
8 |
9 | app = Flask(__name__)
10 | # CORS(app, resources={r"/*": {"origins": "*"}})
11 |
12 | @app.route('/')
13 | def hello_geek():
14 | return '
Hello from Flask'
15 |
16 | @app.route('/youtube_scraper_scroll', methods=['POST'])
17 | def youtube_scraper_scroll():
18 | return yt_scraper_sroll_handler(request, jsonify)
19 |
20 | if __name__ == "__main__":
21 | app.run(debug=True)
--------------------------------------------------------------------------------
/geckodriver.log:
--------------------------------------------------------------------------------
1 | 1675419593741 geckodriver INFO Listening on 127.0.0.1:51761
2 | 1675419596820 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "--headless" "--window-size=1920,1080" "--d ... 2" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\dhohi\\AppData\\Local\\Temp\\rust_mozprofileWZukZw"
3 | *** You are running in headless mode.
4 | 1675419597487 Marionette INFO Marionette enabled
5 | Dynamically enable window occlusion 0
6 | 1675419597497 Marionette INFO Listening on port 51770
7 | Read port: 51770
8 | WebDriver BiDi listening on ws://127.0.0.1:51762
9 | 1675419597714 RemoteAgent WARN TLS certificate errors will be ignored for this session
10 | [GFX1-]: RenderCompositorSWGL failed mapping default framebuffer, no dt
11 | console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\dhohi\\AppData\\Local\\Temp\\rust_mozprofileWZukZw\\search.json.mozlz4", (void 0)))
12 | DevTools listening on ws://127.0.0.1:51762/devtools/browser/d1c995a1-f126-44e1-8468-8fe6d2a3528c
13 | 1675419616011 Marionette WARN Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'.
14 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0"
15 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0"
16 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0"
17 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0"
18 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0"
19 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0"
20 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0"
21 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0"
22 | 1675420107561 addons.xpi ERROR System addon update list error Error: Failed downloading XML, status: 0, reason: error
23 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | certifi==2022.6.15
2 | charset-normalizer==2.1.1
3 | click==8.0.3
4 | Flask==2.0.2
5 | idna==3.3
6 | itsdangerous==2.0.1
7 | Jinja2==3.0.2
8 | MarkupSafe==2.0.1
9 | requests==2.28.1
10 | urllib3==1.26.12
11 | Werkzeug==2.0.2
12 | python-dotenv==0.21.0
13 | # flask_cors==3.0.3
14 | selenium==4.8.0
--------------------------------------------------------------------------------
/results/berita terbaru_scroll-2_20230203_172012.txt:
--------------------------------------------------------------------------------
1 | https://www.youtube.com/watch?v=OPpvl5nWn7I¦¦Hancur Lebur, Insiden Bom Bunuh Diri di Masjid | Kabar Hari Ini tvOne¦¦73 rb x ditonton¦¦1 hari yang lalu
2 | https://www.youtube.com/watch?v=lH7J8HuWcQo¦¦Pemilu 2024, Nama Anies, Ganjar, dan Prabowo, Siapa Kandidat Capres Terkuat?¦¦53 rb x ditonton¦¦2 hari yang lalu
3 | https://www.youtube.com/watch?v=Ufsb8S6EvKk¦¦Tabrakan Audi Ungkap Perselingkuhan Polisi | Kabar Hari Ini tvOne¦¦64 rb x ditonton¦¦2 hari yang lalu
4 | https://www.youtube.com/watch?v=Ufsb8S6EvKk¦¦Tabrakan Audi Ungkap Perselingkuhan Polisi | Kabar Hari Ini tvOne¦¦64 rb x ditonton¦¦2 hari yang lalu
5 | https://www.youtube.com/watch?v=XefSqRWCiZs¦¦KEJUTAN! Haaland Bakal ke Barcelona 😱 Marselino Gabung Klub Belgia 😱 Eks Pelatih Timnas Meninggal 😢¦¦145 rb x ditonton¦¦1 hari yang lalu
6 | https://www.youtube.com/watch?v=10IF7COGwQ8¦¦BREAKING NEWS - Sidang Anak Buah Sambo: Hendra, Agus dan Arif Sampaikan Nota Pembelaan Kasus OOJ¦¦104 rb x ditonton¦¦3 hari yang lalu
7 | https://www.youtube.com/watch?v=y286wY5F7Uc¦¦Jaksa Usap Mata Berkali-kali Saat Bacakan Replik Richard Eliezer¦¦219 rb x ditonton¦¦Streaming 2 hari yang lalu
8 | https://www.youtube.com/watch?v=gHk4_hEBcMM¦¦BREAKING NEWS - Presiden Jokowi Hadiri Puncak Perayaan HUT ke-8 PSI¦¦15 rb x ditonton¦¦1 hari yang lalu
9 | https://www.youtube.com/watch?v=C7TTKv_wEQM¦¦Hadapi Invasi Rusia, Ukraina Dapat Pinjaman Senjata dari Sejumlah Negara | Kabar Hari Ini tvOne¦¦3,2 rb x ditonton¦¦2 hari yang lalu
10 | https://www.youtube.com/watch?v=inEJJHSlnp8¦¦6 Tersangka Kasus Peredaran Narkoba yang Melibatkan Teddy Minahasa Hari Ini Diadili¦¦182 rb x ditonton¦¦2 hari yang lalu
11 | https://www.youtube.com/watch?v=zKbaHgFh0fo¦¦PERANG PECAH! Pasukan Tengkorak 'Tusuk' KKB Di Intan Jaya¦¦161 rb x ditonton¦¦3 hari yang lalu
12 | https://www.youtube.com/watch?v=MwT4L_NNQZc¦¦Reaksi Bharada E seusai Seluruh Pledoi Ditolak JPU, Kuasa Hukum: Janji Ikuti Proses dengan Baik¦¦156 rb x ditonton¦¦10 hari yang lalu
13 | https://www.youtube.com/watch?v=wfao0X0KvbI¦¦Ferdy Sambo Diprediksi akan 'Buka-bukaan' jika Akhirnya Divonis Mati dalam Kasus Kematian Brigadir J¦¦189 rb x ditonton¦¦Streaming 4 hari yang lalu
14 | https://www.youtube.com/watch?v=yNKvkPJl-tg¦¦Live Streaming tvOne 24 Jam¦¦54 rb x ditonton¦¦3 hari yang lalu
15 | https://www.youtube.com/watch?v=Pzd-cHHh8i4¦¦🔴Jaksa Cium Niat Jahat Pengacara Sambo yang akan Limpahkan Semua Kesalahan pada Richard Eliezer¦¦35 rb x ditonton¦¦6 hari yang lalu
16 | https://www.youtube.com/watch?v=Za5-fvwbPJI¦¦LIVE STREAMING 24 JAM - KOMPASTV¦¦6,6 rb x ditonton¦¦1 hari yang lalu
17 | https://www.youtube.com/watch?v=eUdoTrPdmuk¦¦Arema FC Bubar? Ini 7 Konsekuensi Jika Putuskan Bubar di Tengah Liga 1 Bergulir, Klub Lain Terdampak¦¦59 rb x ditonton¦¦3 hari yang lalu
18 | https://www.youtube.com/watch?v=Jx3fCgNWM4c¦¦Baku Tembak Pasukan Israel & Warga Palestina, Kamp Pengungsian Rusak Berat | Kabar Hari Ini tvOne¦¦157 rb x ditonton¦¦3 hari yang laluhttps://www.youtube.com/watch?v=OPpvl5nWn7I¦¦Hancur Lebur, Insiden Bom Bunuh Diri di Masjid | Kabar Hari Ini tvOne¦¦73 rb x ditonton¦¦1 hari yang lalu
19 | https://www.youtube.com/watch?v=lH7J8HuWcQo¦¦Pemilu 2024, Nama Anies, Ganjar, dan Prabowo, Siapa Kandidat Capres Terkuat?¦¦53 rb x ditonton¦¦2 hari yang lalu
20 | https://www.youtube.com/watch?v=Ufsb8S6EvKk¦¦Tabrakan Audi Ungkap Perselingkuhan Polisi | Kabar Hari Ini tvOne¦¦64 rb x ditonton¦¦2 hari yang lalu
21 | https://www.youtube.com/watch?v=Ufsb8S6EvKk¦¦Tabrakan Audi Ungkap Perselingkuhan Polisi | Kabar Hari Ini tvOne¦¦64 rb x ditonton¦¦2 hari yang lalu
22 | https://www.youtube.com/watch?v=XefSqRWCiZs¦¦KEJUTAN! Haaland Bakal ke Barcelona 😱 Marselino Gabung Klub Belgia 😱 Eks Pelatih Timnas Meninggal 😢¦¦145 rb x ditonton¦¦1 hari yang lalu
23 | https://www.youtube.com/watch?v=10IF7COGwQ8¦¦BREAKING NEWS - Sidang Anak Buah Sambo: Hendra, Agus dan Arif Sampaikan Nota Pembelaan Kasus OOJ¦¦104 rb x ditonton¦¦3 hari yang lalu
24 | https://www.youtube.com/watch?v=y286wY5F7Uc¦¦Jaksa Usap Mata Berkali-kali Saat Bacakan Replik Richard Eliezer¦¦219 rb x ditonton¦¦Streaming 2 hari yang lalu
25 | https://www.youtube.com/watch?v=gHk4_hEBcMM¦¦BREAKING NEWS - Presiden Jokowi Hadiri Puncak Perayaan HUT ke-8 PSI¦¦15 rb x ditonton¦¦1 hari yang lalu
26 | https://www.youtube.com/watch?v=C7TTKv_wEQM¦¦Hadapi Invasi Rusia, Ukraina Dapat Pinjaman Senjata dari Sejumlah Negara | Kabar Hari Ini tvOne¦¦3,2 rb x ditonton¦¦2 hari yang lalu
27 | https://www.youtube.com/watch?v=inEJJHSlnp8¦¦6 Tersangka Kasus Peredaran Narkoba yang Melibatkan Teddy Minahasa Hari Ini Diadili¦¦182 rb x ditonton¦¦2 hari yang lalu
28 | https://www.youtube.com/watch?v=zKbaHgFh0fo¦¦PERANG PECAH! Pasukan Tengkorak 'Tusuk' KKB Di Intan Jaya¦¦161 rb x ditonton¦¦3 hari yang lalu
29 | https://www.youtube.com/watch?v=MwT4L_NNQZc¦¦Reaksi Bharada E seusai Seluruh Pledoi Ditolak JPU, Kuasa Hukum: Janji Ikuti Proses dengan Baik¦¦156 rb x ditonton¦¦10 hari yang lalu
30 | https://www.youtube.com/watch?v=wfao0X0KvbI¦¦Ferdy Sambo Diprediksi akan 'Buka-bukaan' jika Akhirnya Divonis Mati dalam Kasus Kematian Brigadir J¦¦189 rb x ditonton¦¦Streaming 4 hari yang lalu
31 | https://www.youtube.com/watch?v=yNKvkPJl-tg¦¦Live Streaming tvOne 24 Jam¦¦54 rb x ditonton¦¦3 hari yang lalu
32 | https://www.youtube.com/watch?v=Pzd-cHHh8i4¦¦🔴Jaksa Cium Niat Jahat Pengacara Sambo yang akan Limpahkan Semua Kesalahan pada Richard Eliezer¦¦35 rb x ditonton¦¦6 hari yang lalu
33 | https://www.youtube.com/watch?v=Za5-fvwbPJI¦¦LIVE STREAMING 24 JAM - KOMPASTV¦¦6,6 rb x ditonton¦¦1 hari yang lalu
34 | https://www.youtube.com/watch?v=eUdoTrPdmuk¦¦Arema FC Bubar? Ini 7 Konsekuensi Jika Putuskan Bubar di Tengah Liga 1 Bergulir, Klub Lain Terdampak¦¦59 rb x ditonton¦¦3 hari yang lalu
35 | https://www.youtube.com/watch?v=Jx3fCgNWM4c¦¦Baku Tembak Pasukan Israel & Warga Palestina, Kamp Pengungsian Rusak Berat | Kabar Hari Ini tvOne¦¦157 rb x ditonton¦¦3 hari yang lalu
36 | https://www.youtube.com/watch?v=OPpvl5nWn7I¦¦Hancur Lebur, Insiden Bom Bunuh Diri di Masjid | Kabar Hari Ini tvOne¦¦13 rb x ditonton¦¦2 hari yang lalu
37 | https://www.youtube.com/watch?v=lH7J8HuWcQo¦¦Pemilu 2024, Nama Anies, Ganjar, dan Prabowo, Siapa Kandidat Capres Terkuat?¦¦12 rb x ditonton¦¦3 hari yang lalu
38 | https://www.youtube.com/watch?v=Ufsb8S6EvKk¦¦Tabrakan Audi Ungkap Perselingkuhan Polisi | Kabar Hari Ini tvOne¦¦4,6 rb x ditonton¦¦3 hari yang lalu
39 | https://www.youtube.com/watch?v=Ufsb8S6EvKk¦¦Tabrakan Audi Ungkap Perselingkuhan Polisi | Kabar Hari Ini tvOne¦¦9,5 rb x ditonton¦¦2 hari yang lalu
40 | https://www.youtube.com/watch?v=XefSqRWCiZs¦¦KEJUTAN! Haaland Bakal ke Barcelona 😱 Marselino Gabung Klub Belgia 😱 Eks Pelatih Timnas Meninggal 😢¦¦12 rb x ditonton¦¦2 hari yang lalu
41 | https://www.youtube.com/watch?v=10IF7COGwQ8¦¦BREAKING NEWS - Sidang Anak Buah Sambo: Hendra, Agus dan Arif Sampaikan Nota Pembelaan Kasus OOJ¦¦363 rb x ditonton¦¦8 hari yang lalu
42 | https://www.youtube.com/watch?v=y286wY5F7Uc¦¦Jaksa Usap Mata Berkali-kali Saat Bacakan Replik Richard Eliezer¦¦73 rb x ditonton¦¦1 hari yang lalu
43 | https://www.youtube.com/watch?v=gHk4_hEBcMM¦¦BREAKING NEWS - Presiden Jokowi Hadiri Puncak Perayaan HUT ke-8 PSI¦¦53 rb x ditonton¦¦2 hari yang lalu
44 | https://www.youtube.com/watch?v=C7TTKv_wEQM¦¦Hadapi Invasi Rusia, Ukraina Dapat Pinjaman Senjata dari Sejumlah Negara | Kabar Hari Ini tvOne¦¦64 rb x ditonton¦¦2 hari yang lalu
45 | https://www.youtube.com/watch?v=inEJJHSlnp8¦¦6 Tersangka Kasus Peredaran Narkoba yang Melibatkan Teddy Minahasa Hari Ini Diadili¦¦64 rb x ditonton¦¦2 hari yang lalu
46 | https://www.youtube.com/watch?v=zKbaHgFh0fo¦¦PERANG PECAH! Pasukan Tengkorak 'Tusuk' KKB Di Intan Jaya¦¦145 rb x ditonton¦¦1 hari yang lalu
47 | https://www.youtube.com/watch?v=MwT4L_NNQZc¦¦Reaksi Bharada E seusai Seluruh Pledoi Ditolak JPU, Kuasa Hukum: Janji Ikuti Proses dengan Baik¦¦104 rb x ditonton¦¦3 hari yang lalu
48 | https://www.youtube.com/watch?v=wfao0X0KvbI¦¦Ferdy Sambo Diprediksi akan 'Buka-bukaan' jika Akhirnya Divonis Mati dalam Kasus Kematian Brigadir J¦¦219 rb x ditonton¦¦Streaming 2 hari yang lalu
49 | https://www.youtube.com/watch?v=yNKvkPJl-tg¦¦Live Streaming tvOne 24 Jam¦¦15 rb x ditonton¦¦1 hari yang lalu
50 | https://www.youtube.com/watch?v=Pzd-cHHh8i4¦¦🔴Jaksa Cium Niat Jahat Pengacara Sambo yang akan Limpahkan Semua Kesalahan pada Richard Eliezer¦¦3,2 rb x ditonton¦¦2 hari yang lalu
51 | https://www.youtube.com/watch?v=Za5-fvwbPJI¦¦LIVE STREAMING 24 JAM - KOMPASTV¦¦182 rb x ditonton¦¦2 hari yang lalu
52 | https://www.youtube.com/watch?v=eUdoTrPdmuk¦¦Arema FC Bubar? Ini 7 Konsekuensi Jika Putuskan Bubar di Tengah Liga 1 Bergulir, Klub Lain Terdampak¦¦161 rb x ditonton¦¦3 hari yang lalu
53 | https://www.youtube.com/watch?v=Jx3fCgNWM4c¦¦Baku Tembak Pasukan Israel & Warga Palestina, Kamp Pengungsian Rusak Berat | Kabar Hari Ini tvOne¦¦156 rb x ditonton¦¦10 hari yang lalu
54 | https://www.youtube.com/watch?v=E7de4ex_yTc¦¦[Breaking News] Banjir Bandang di Parepare | tvOne¦¦189 rb x ditonton¦¦Streaming 4 hari yang lalu
55 | https://www.youtube.com/watch?v=2zpCbFFEvTw¦¦ISRAEL-PALESTINA KEMBALI SALING SERANG RUDAL¦¦54 rb x ditonton¦¦3 hari yang lalu
56 | https://www.youtube.com/watch?v=9B2scW2RNQ0¦¦Bak di Film-film, Jambret di Surabaya Keok Lawan 7 Anak SD | Kabar Utama tvOne¦¦35 rb x ditonton¦¦6 hari yang lalu
57 | https://www.youtube.com/watch?v=leHbRGvYC5k¦¦PKS Deklarasi Dukungan Terhadap Anies Baswedan | Kabar Petang tvOne¦¦6,6 rb x ditonton¦¦1 hari yang lalu
58 | https://www.youtube.com/shorts/GZ9cvkVFGnc¦¦KABAR TERKINI Hotman Paris Lapor Nikita Mirzani dan Farhat Abbas Gegara Bunda Corla?¦¦59 rb x ditonton¦¦3 hari yang lalu
59 | https://www.youtube.com/watch?v=7-ysqhUmx-E¦¦Banjir dan Tanah Longsor Landa Auckland Selandia Baru | Kabar Hari Ini tvOne¦¦157 rb x ditonton¦¦3 hari yang lalu
60 | https://www.youtube.com/watch?v=ehEw3mPcMm4¦¦🔴 KILASAN TERKINI : Korban Tewas Ledakan di Masjid Pakistan Jadi 100 Orang¦¦13 rb x ditonton¦¦2 hari yang lalu
61 | https://www.youtube.com/watch?v=FN0W12umPrY¦¦Sidang Vonis Ferdy Sambo Akan Digelar 13 Februari 2023 | Kabar Petang tvOne¦¦12 rb x ditonton¦¦3 hari yang lalu
62 | https://www.youtube.com/watch?v=dn-1htaioDM¦¦Dengan Tangan Kosong, Warga Kalahkan Pelaku Penembakan Massal | Kabar Hari Ini tvOne¦¦4,6 rb x ditonton¦¦3 hari yang lalu
63 | https://www.youtube.com/watch?v=OPpvl5nWn7I¦¦Hancur Lebur, Insiden Bom Bunuh Diri di Masjid | Kabar Hari Ini tvOne¦¦9,5 rb x ditonton¦¦2 hari yang lalu
64 | https://www.youtube.com/watch?v=lH7J8HuWcQo¦¦Pemilu 2024, Nama Anies, Ganjar, dan Prabowo, Siapa Kandidat Capres Terkuat?¦¦12 rb x ditonton¦¦2 hari yang lalu
65 | https://www.youtube.com/watch?v=wgS8xkWL9Hw¦¦Alasan Pemuda Papua Ngebet Pengen Gabung KKB¦¦363 rb x ditonton¦¦8 hari yang lalu
66 | https://www.youtube.com/watch?v=Q5s2HxfUGfc¦¦Demokrat Dukung Anies Baswedan Maju di Pilpres 2024 | Kabar Utama tvOne¦¦73 rb x ditonton¦¦1 hari yang lalu
67 | https://www.youtube.com/watch?v=cEfjW2yldWM¦¦Robot Tempur Rusia Musnahkan Tank Abrams, Leopard 2 AS dan Jerman, Ukraina Mulai Ketar-ketir¦¦53 rb x ditonton¦¦2 hari yang lalu
68 | https://www.youtube.com/watch?v=tGcyylEdr2s¦¦Sebarkan Ajaran Sesat, Masjid di Pamekasan Disegel | Apa Kabar Indonesia Pagi tvOne¦¦294 rb x ditonton¦¦2 minggu yang lalu
69 | https://www.youtube.com/watch?v=hqg1YsFmhLs¦¦Kabar Sumatera 31 Januari 2023 | tvOne¦¦103 rb x ditonton¦¦6 hari yang lalu
70 | https://www.youtube.com/watch?v=Y1dN0rR-6AA¦¦Pelaku Percobaan Pemerkosaan Habis Dihajar Warga | Kabar Hari Ini tvOne¦¦438 rb x ditonton¦¦6 hari yang lalu
71 | https://www.youtube.com/watch?v=tSjBIoNlUWw¦¦BREAKING NEWS - Tanggapan JPU Atas Pledoi Terdakwa Putri Candrawathi & Richard Eliezer¦¦75 rb x ditonton¦¦8 hari yang lalu
72 | https://www.youtube.com/watch?v=rPY6XWKX-gc¦¦Begini Potret Terkini Pembangunan IKN Nusantara¦¦56 rb x ditonton¦¦3 hari yang lalu
73 | https://www.youtube.com/watch?v=I3Hez7gPF3Q¦¦Fakta Penolakan Ibunda Ferry Irawan Ke Rumah Venna Melinda | Hot Shot¦¦1,5 jt x ditonton¦¦1 bulan yang lalu
74 | https://www.youtube.com/watch?v=XCyfYwJcckI¦¦Catatan Effendi Gazali Respon Soal Kontrak Politik | Kabar Petang tvOne¦¦426 rb x ditonton¦¦Streaming 4 hari yang lalu
75 | https://www.youtube.com/watch?v=KVgzaq27qiw¦¦Rencana Rusia Kerahkan Tank T-14 ke Medan Perang di Ukraina Ditolak Pasukannya, Ini Alasannya¦¦230 rb x ditonton¦¦6 hari yang lalu
76 | https://www.youtube.com/watch?v=4s_CMha6BAU¦¦Nota Pembelaan Eliezer Ditolak Jaksa, Ini Kata Ronny Talapessy¦¦92 rb x ditonton¦¦2 hari yang lalu
77 | https://www.youtube.com/watch?v=iGx0jACEzV0¦¦Highlights - PSIS Semarang VS Persib Bandung | BRI Liga 1 2022/2023¦¦62 rb x ditonton¦¦2 hari yang lalu
78 | https://www.youtube.com/watch?v=d92KP_jhgXk¦¦Wilayah Israel Bagian Selatan Dihujani Roket Militan Palestina | Kabar Hari Ini tvOne¦¦91 rb x ditonton¦¦6 hari yang lalu
79 | https://www.youtube.com/watch?v=FT7u1sX8bZo¦¦JPU Tidak Mendapatkan Bukti Pelecehan Putri dari Pengacara | Breaking News tvOne¦¦858 rb x ditonton¦¦4 hari yang lalu
--------------------------------------------------------------------------------
/results/jpu_scroll-1_20230204_193840.txt:
--------------------------------------------------------------------------------
1 | https://www.youtube.com/watch?v=5DDvydmswz4¦¦Momen JPU Minta Hakim Perlihatkan Isi Amplop Dalam Berkas: Biar Netizen Tidak Salah Tafsirkan¦¦250K views¦¦1 day ago
2 | https://www.youtube.com/watch?v=Amf36_5u1-E¦¦Tanggapi Pledoi Putri Candrawathi, JPU: Tidak Ada Satu pun Bukti yang Menunjukan Putri Dilecehkan¦¦40K views¦¦5 days ago
3 | https://www.youtube.com/watch?v=RZQFjNMAksA¦¦Ekspresi JPU Saat Disebut Frustasi oleh Penasihat Hukum Ferdy Sambo¦¦41K views¦¦3 days ago
4 | https://www.youtube.com/watch?v=MwT4L_NNQZc¦¦Reaksi Bharada E seusai Seluruh Pledoi Ditolak JPU, Kuasa Hukum: Janji Ikuti Proses dengan Baik¦¦167K views¦¦4 days ago
5 | https://www.youtube.com/watch?v=aED2KXBSB2E¦¦Pengacara Minta Maaf Ke JPU Terkait Pledoi Eliezer¦¦17K views¦¦1 day ago
6 | https://www.youtube.com/watch?v=nzFg5fV_69w¦¦Hakim Libas Habis JPU saat Tanya Ajudan Hendra Kurniawan, Kesal Tak Sesuai SOP: Kok PD Banget Anda¦¦251K views¦¦2 weeks ago
--------------------------------------------------------------------------------
/sample_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhohirpradana/Python-Youtube-Search-Scraper/503036505aa64e966b3f2032341dd954fd5ccefa/sample_output.png
--------------------------------------------------------------------------------
/yt_scraper_sroll/__init__.py:
--------------------------------------------------------------------------------
1 | import time
2 | import urllib.parse
3 |
4 | from selenium import webdriver
5 | from selenium.webdriver.chrome.options import Options
6 | from selenium.webdriver.common.by import By
7 | from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
8 | from selenium.webdriver.support import expected_conditions as EC
9 | from selenium.webdriver.support.wait import WebDriverWait
10 |
11 | # binary = FirefoxBinary(r'C:\Program Files\Mozilla Firefox\firefox.exe')
12 |
13 | # options = webdriver.FirefoxOptions()
14 | # options.add_argument("--headless")
15 | # options.add_argument("--window-size=1920,1080")
16 | # options.add_argument("--disable-gpu")
17 | # options.add_argument("--disable-extensions")
18 | # options.add_argument("--no-sandbox")
19 | # options.add_argument("--disable-dev-shm-usage")
20 | # options.add_argument("--disable-features=VizDisplayCompositor")
21 | # options.add_argument("--disable-features=NetworkService")
22 | # # , firefox_binary=binary
23 |
24 | # driver = webdriver.Firefox(options=options)
25 |
26 |
27 | def set_chrome_options() -> Options:
28 | """Sets chrome options for Selenium.
29 | Chrome options for headless browser is enabled.
30 | """
31 | chrome_options = Options()
32 | chrome_options.add_argument("--headless")
33 | chrome_options.add_argument("--no-sandbox")
34 | chrome_options.add_argument("--disable-dev-shm-usage")
35 | chrome_prefs = dict[str, dict[str, int]]()
36 | chrome_options.experimental_options["prefs"] = chrome_prefs
37 | chrome_prefs["profile.default_content_settings"] = {"images": 2}
38 | return chrome_options
39 |
40 |
41 | driver = webdriver.Chrome(options=set_chrome_options())
42 | driver.delete_all_cookies()
43 | # BASE_DIR = os.path.join(os.path.dirname(__file__), '..')
44 |
45 |
46 | def handler(request, jsonify):
47 | body = request.get_json()
48 |
49 | if body is None:
50 | return jsonify({'message': 'No body provided'}), 400
51 |
52 | try:
53 | query = body['query']
54 | scroll = body['scroll']
55 | except AttributeError as err:
56 | return jsonify({'message': str(err) + " not provided"}), 400
57 |
58 | query_url = urllib.parse.quote(query)
59 | if scroll < 1:
60 | scroll = 1
61 | print('Query URL: ', query_url)
62 |
63 | try:
64 | driver.get(f"https://www.youtube.com/results?search_query={query_url}")
65 |
66 | scroll_height = driver.execute_script("return window.innerHeight")
67 | video_links = []
68 | video_titles = []
69 | video_views = []
70 | video_published_times = []
71 |
72 | res_data = []
73 |
74 | max_scroll = scroll
75 |
76 | scroll_num = 0
77 | # while True:
78 | while scroll_num <= max_scroll:
79 | print(f"Scrolling {scroll_num} of {max_scroll}")
80 | video_ids = driver.find_elements(
81 | By.XPATH, "//a[@id='video-title']")
82 |
83 | finish_video_ids = False
84 | for i, video_id in enumerate(video_ids):
85 | print(video_id.get_attribute("href"))
86 | # skip playlist
87 | if "list" in video_id.get_attribute("href"):
88 | print("playlist")
89 | continue
90 |
91 | # skip channel
92 | if video_id.get_attribute("href").startswith("/@"):
93 | print("channel")
94 | continue
95 |
96 | video_links.append(video_id.get_attribute("href"))
97 | video_titles.append(video_id.get_attribute("title"))
98 |
99 | if i == len(video_ids) - 1:
100 | finish_video_ids = True
101 |
102 | video_infos = driver.find_elements(
103 | By.XPATH, "//span[@class='inline-metadata-item style-scope ytd-video-meta-block']")
104 |
105 | finish_video_infos = False
106 | for i, video_info in enumerate(video_infos):
107 | print(video_info.text)
108 | if "views" in video_info.text or "ditonton" in video_info.text:
109 | view_count = video_info.text
110 | video_views.append(view_count)
111 | elif "ago" in video_info.text or "yang lalu" in video_info.text:
112 | published_time = video_info.text
113 | video_published_times.append(published_time)
114 |
115 | if i == len(video_infos) - 1:
116 | finish_video_infos = True
117 |
118 | # print("video_links:", len(video_links))
119 | # print("video_titles:", len(video_titles))
120 | # print("video_views:", len(video_views))
121 | # print("video_published_times:", len(video_published_times))
122 |
123 | def write_to_file():
124 | if finish_video_ids and finish_video_infos:
125 | for i, video_link in enumerate(video_links):
126 | try:
127 | v_title = video_titles[i]
128 | except IndexError:
129 | v_title = "-"
130 |
131 | try:
132 | v_views = video_views[i]
133 | except IndexError:
134 | v_views = "-"
135 |
136 | try:
137 | v_published_times = video_published_times[i]
138 | except IndexError:
139 | v_published_times = "-"
140 |
141 | res_data.append({
142 | "url": video_link,
143 | "title": v_title,
144 | "views": v_views,
145 | "published": v_published_times
146 | })
147 | else:
148 | print("Video ID or Video Info not finished")
149 | time.sleep(2)
150 | write_to_file()
151 |
152 | write_to_file()
153 |
154 | document_height_before = driver.execute_script(
155 | "return document.documentElement.scrollHeight")
156 | driver.execute_script(
157 | f"window.scrollTo(0, {document_height_before + scroll_height});")
158 |
159 | scroll_num += 1
160 |
161 | # delay before next scroll
162 | time.sleep(2)
163 | document_height_after = driver.execute_script(
164 | "return document.documentElement.scrollHeight")
165 |
166 | # end of scroll
167 | if document_height_after == document_height_before:
168 | break
169 |
170 | except ConnectionError as err:
171 | print("Error: ", err)
172 | return jsonify({'message': str(err)}), 500
173 |
174 | return jsonify({'message': 'success', }), 200
--------------------------------------------------------------------------------