├── .env ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ └── docker-image.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── FUNDING.yml ├── LICENSE ├── README copy.md ├── README.md ├── SECURITY.md ├── app.py ├── contents.json ├── data.json ├── geckodriver.log ├── requirements.txt ├── results ├── berita terbaru_scroll-2_20230203_172012.txt └── jpu_scroll-1_20230204_193840.txt ├── sample_output.png └── yt_scraper_sroll └── __init__.py /.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhohirpradana/Python-Youtube-Search-Scraper/503036505aa64e966b3f2032341dd954fd5ccefa/.env -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | jobs: 10 | 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: Build the Docker image 18 | run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # .env 2 | __pycache__ -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | . 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing 2 | When contributing to this repository, please first discuss the change you wish to make via issue, email, or any other method with the owners of this repository before making a change. 3 | 4 | Please note we have a code of conduct, please follow it in all your interactions with the project. 5 | 6 | Pull Request Process 7 | Ensure any install or build dependencies are removed before the end of the layer when doing a build. 8 | Update the README.md with details of changes to the interface, this includes new environment variables, exposed ports, useful file locations and container parameters. 9 | Increase the version numbers in any examples files and the README.md to the new version that this Pull Request would represent. The versioning scheme we use is SemVer. 10 | You may merge the Pull Request in once you have the sign-off of two other developers, or if you do not have permission to do that, you may request the second reviewer to merge it for you. 11 | Code of Conduct 12 | Our Pledge 13 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 14 | 15 | Our Standards 16 | Examples of behavior that contributes to creating a positive environment include: 17 | 18 | Using welcoming and inclusive language 19 | Being respectful of differing viewpoints and experiences 20 | Gracefully accepting constructive criticism 21 | Focusing on what is best for the community 22 | Showing empathy towards other community members 23 | Examples of unacceptable behavior by participants include: 24 | 25 | The use of sexualized language or imagery and unwelcome sexual attention or advances 26 | Trolling, insulting/derogatory comments, and personal or political attacks 27 | Public or private harassment 28 | Publishing others' private information, such as a physical or electronic address, without explicit permission 29 | Other conduct which could reasonably be considered inappropriate in a professional setting 30 | Our Responsibilities 31 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 32 | 33 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 34 | 35 | Scope 36 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 37 | 38 | Enforcement 39 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at [INSERT EMAIL ADDRESS]. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 40 | 41 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 42 | 43 | Attribution 44 | This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at http://contributor-covenant.org/version/1/4 45 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | 3 | WORKDIR /app 4 | COPY requirements.txt requirements.txt 5 | 6 | RUN echo "http://dl-cdn.alpinelinux.org/alpine/edge/community" > /etc/apk/repositories 7 | RUN echo "http://dl-cdn.alpinelinux.org/alpine/edge/main" >> /etc/apk/repositories 8 | 9 | RUN apk update 10 | RUN apk add chromium chromium-chromedriver 11 | 12 | RUN apk add python3 13 | RUN apk add py3-pip 14 | 15 | RUN pip install -r requirements.txt 16 | 17 | COPY . . 18 | 19 | ENV FLASK_RUN_HOST=0.0.0.0 20 | 21 | CMD ["python3", "-m", "flask", "run"] 22 | -------------------------------------------------------------------------------- /FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [dhohirpradana] 2 | custom: ["https://mediasiana.id"] 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Dhohir Pradana 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README copy.md: -------------------------------------------------------------------------------- 1 | # khayangan-blockchain 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python-Youtube-Search-Scraper 2 | 3 | Scrape YouTube video with details whatever scroll you want 4 | 5 | ![alt text](https://github.com/dhohirpradana/Python-Youtube-Search-Scraper/blob/master/sample_output.png?raw=true) 6 | 7 | [![Docker Image CI](https://github.com/dhohirpradana/Python-Youtube-Search-Scraper/actions/workflows/docker-image.yml/badge.svg)](https://github.com/dhohirpradana/Python-Youtube-Search-Scraper/actions/workflows/docker-image.yml) 8 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Use this section to tell people about which versions of your project are 6 | currently being supported with security updates. 7 | 8 | | Version | Supported | 9 | | ------- | ------------------ | 10 | | 5.1.x | :white_check_mark: | 11 | | 5.0.x | :x: | 12 | | 4.0.x | :white_check_mark: | 13 | | < 4.0 | :x: | 14 | 15 | ## Reporting a Vulnerability 16 | 17 | Use this section to tell people how to report a vulnerability. 18 | 19 | Tell them where to go, how often they can expect to get an update on a 20 | reported vulnerability, what to expect if the vulnerability is accepted or 21 | declined, etc. 22 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import os 2 | from sys import stderr 3 | 4 | from flask import Flask, jsonify, request 5 | # from flask_cors import CORS 6 | 7 | from yt_scraper_sroll import handler as yt_scraper_sroll_handler 8 | 9 | app = Flask(__name__) 10 | # CORS(app, resources={r"/*": {"origins": "*"}}) 11 | 12 | @app.route('/') 13 | def hello_geek(): 14 | return '

Hello from Flask

' 15 | 16 | @app.route('/youtube_scraper_scroll', methods=['POST']) 17 | def youtube_scraper_scroll(): 18 | return yt_scraper_sroll_handler(request, jsonify) 19 | 20 | if __name__ == "__main__": 21 | app.run(debug=True) -------------------------------------------------------------------------------- /geckodriver.log: -------------------------------------------------------------------------------- 1 | 1675419593741 geckodriver INFO Listening on 127.0.0.1:51761 2 | 1675419596820 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "--headless" "--window-size=1920,1080" "--d ... 2" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\dhohi\\AppData\\Local\\Temp\\rust_mozprofileWZukZw" 3 | *** You are running in headless mode. 4 | 1675419597487 Marionette INFO Marionette enabled 5 | Dynamically enable window occlusion 0 6 | 1675419597497 Marionette INFO Listening on port 51770 7 | Read port: 51770 8 | WebDriver BiDi listening on ws://127.0.0.1:51762 9 | 1675419597714 RemoteAgent WARN TLS certificate errors will be ignored for this session 10 | [GFX1-]: RenderCompositorSWGL failed mapping default framebuffer, no dt 11 | console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\dhohi\\AppData\\Local\\Temp\\rust_mozprofileWZukZw\\search.json.mozlz4", (void 0))) 12 | DevTools listening on ws://127.0.0.1:51762/devtools/browser/d1c995a1-f126-44e1-8468-8fe6d2a3528c 13 | 1675419616011 Marionette WARN Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'. 14 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0" 15 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0" 16 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0" 17 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0" 18 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0" 19 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0" 20 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0" 21 | [2023-02-03T10:20:18Z ERROR mp4parse] Found 2 nul bytes in "\0\0" 22 | 1675420107561 addons.xpi ERROR System addon update list error Error: Failed downloading XML, status: 0, reason: error 23 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2022.6.15 2 | charset-normalizer==2.1.1 3 | click==8.0.3 4 | Flask==2.0.2 5 | idna==3.3 6 | itsdangerous==2.0.1 7 | Jinja2==3.0.2 8 | MarkupSafe==2.0.1 9 | requests==2.28.1 10 | urllib3==1.26.12 11 | Werkzeug==2.0.2 12 | python-dotenv==0.21.0 13 | # flask_cors==3.0.3 14 | selenium==4.8.0 -------------------------------------------------------------------------------- /results/berita terbaru_scroll-2_20230203_172012.txt: -------------------------------------------------------------------------------- 1 | https://www.youtube.com/watch?v=OPpvl5nWn7I¦¦Hancur Lebur, Insiden Bom Bunuh Diri di Masjid | Kabar Hari Ini tvOne¦¦73 rb x ditonton¦¦1 hari yang lalu 2 | https://www.youtube.com/watch?v=lH7J8HuWcQo¦¦Pemilu 2024, Nama Anies, Ganjar, dan Prabowo, Siapa Kandidat Capres Terkuat?¦¦53 rb x ditonton¦¦2 hari yang lalu 3 | https://www.youtube.com/watch?v=Ufsb8S6EvKk¦¦Tabrakan Audi Ungkap Perselingkuhan Polisi | Kabar Hari Ini tvOne¦¦64 rb x ditonton¦¦2 hari yang lalu 4 | https://www.youtube.com/watch?v=Ufsb8S6EvKk¦¦Tabrakan Audi Ungkap Perselingkuhan Polisi | Kabar Hari Ini tvOne¦¦64 rb x ditonton¦¦2 hari yang lalu 5 | https://www.youtube.com/watch?v=XefSqRWCiZs¦¦KEJUTAN! Haaland Bakal ke Barcelona 😱 Marselino Gabung Klub Belgia 😱 Eks Pelatih Timnas Meninggal 😢¦¦145 rb x ditonton¦¦1 hari yang lalu 6 | https://www.youtube.com/watch?v=10IF7COGwQ8¦¦BREAKING NEWS - Sidang Anak Buah Sambo: Hendra, Agus dan Arif Sampaikan Nota Pembelaan Kasus OOJ¦¦104 rb x ditonton¦¦3 hari yang lalu 7 | https://www.youtube.com/watch?v=y286wY5F7Uc¦¦Jaksa Usap Mata Berkali-kali Saat Bacakan Replik Richard Eliezer¦¦219 rb x ditonton¦¦Streaming 2 hari yang lalu 8 | https://www.youtube.com/watch?v=gHk4_hEBcMM¦¦BREAKING NEWS - Presiden Jokowi Hadiri Puncak Perayaan HUT ke-8 PSI¦¦15 rb x ditonton¦¦1 hari yang lalu 9 | https://www.youtube.com/watch?v=C7TTKv_wEQM¦¦Hadapi Invasi Rusia, Ukraina Dapat Pinjaman Senjata dari Sejumlah Negara | Kabar Hari Ini tvOne¦¦3,2 rb x ditonton¦¦2 hari yang lalu 10 | https://www.youtube.com/watch?v=inEJJHSlnp8¦¦6 Tersangka Kasus Peredaran Narkoba yang Melibatkan Teddy Minahasa Hari Ini Diadili¦¦182 rb x ditonton¦¦2 hari yang lalu 11 | https://www.youtube.com/watch?v=zKbaHgFh0fo¦¦PERANG PECAH! Pasukan Tengkorak 'Tusuk' KKB Di Intan Jaya¦¦161 rb x ditonton¦¦3 hari yang lalu 12 | https://www.youtube.com/watch?v=MwT4L_NNQZc¦¦Reaksi Bharada E seusai Seluruh Pledoi Ditolak JPU, Kuasa Hukum: Janji Ikuti Proses dengan Baik¦¦156 rb x ditonton¦¦10 hari yang lalu 13 | https://www.youtube.com/watch?v=wfao0X0KvbI¦¦Ferdy Sambo Diprediksi akan 'Buka-bukaan' jika Akhirnya Divonis Mati dalam Kasus Kematian Brigadir J¦¦189 rb x ditonton¦¦Streaming 4 hari yang lalu 14 | https://www.youtube.com/watch?v=yNKvkPJl-tg¦¦Live Streaming tvOne 24 Jam¦¦54 rb x ditonton¦¦3 hari yang lalu 15 | https://www.youtube.com/watch?v=Pzd-cHHh8i4¦¦🔴Jaksa Cium Niat Jahat Pengacara Sambo yang akan Limpahkan Semua Kesalahan pada Richard Eliezer¦¦35 rb x ditonton¦¦6 hari yang lalu 16 | https://www.youtube.com/watch?v=Za5-fvwbPJI¦¦LIVE STREAMING 24 JAM - KOMPASTV¦¦6,6 rb x ditonton¦¦1 hari yang lalu 17 | https://www.youtube.com/watch?v=eUdoTrPdmuk¦¦Arema FC Bubar? Ini 7 Konsekuensi Jika Putuskan Bubar di Tengah Liga 1 Bergulir, Klub Lain Terdampak¦¦59 rb x ditonton¦¦3 hari yang lalu 18 | https://www.youtube.com/watch?v=Jx3fCgNWM4c¦¦Baku Tembak Pasukan Israel & Warga Palestina, Kamp Pengungsian Rusak Berat | Kabar Hari Ini tvOne¦¦157 rb x ditonton¦¦3 hari yang laluhttps://www.youtube.com/watch?v=OPpvl5nWn7I¦¦Hancur Lebur, Insiden Bom Bunuh Diri di Masjid | Kabar Hari Ini tvOne¦¦73 rb x ditonton¦¦1 hari yang lalu 19 | https://www.youtube.com/watch?v=lH7J8HuWcQo¦¦Pemilu 2024, Nama Anies, Ganjar, dan Prabowo, Siapa Kandidat Capres Terkuat?¦¦53 rb x ditonton¦¦2 hari yang lalu 20 | https://www.youtube.com/watch?v=Ufsb8S6EvKk¦¦Tabrakan Audi Ungkap Perselingkuhan Polisi | Kabar Hari Ini tvOne¦¦64 rb x ditonton¦¦2 hari yang lalu 21 | https://www.youtube.com/watch?v=Ufsb8S6EvKk¦¦Tabrakan Audi Ungkap Perselingkuhan Polisi | Kabar Hari Ini tvOne¦¦64 rb x ditonton¦¦2 hari yang lalu 22 | https://www.youtube.com/watch?v=XefSqRWCiZs¦¦KEJUTAN! Haaland Bakal ke Barcelona 😱 Marselino Gabung Klub Belgia 😱 Eks Pelatih Timnas Meninggal 😢¦¦145 rb x ditonton¦¦1 hari yang lalu 23 | https://www.youtube.com/watch?v=10IF7COGwQ8¦¦BREAKING NEWS - Sidang Anak Buah Sambo: Hendra, Agus dan Arif Sampaikan Nota Pembelaan Kasus OOJ¦¦104 rb x ditonton¦¦3 hari yang lalu 24 | https://www.youtube.com/watch?v=y286wY5F7Uc¦¦Jaksa Usap Mata Berkali-kali Saat Bacakan Replik Richard Eliezer¦¦219 rb x ditonton¦¦Streaming 2 hari yang lalu 25 | https://www.youtube.com/watch?v=gHk4_hEBcMM¦¦BREAKING NEWS - Presiden Jokowi Hadiri Puncak Perayaan HUT ke-8 PSI¦¦15 rb x ditonton¦¦1 hari yang lalu 26 | https://www.youtube.com/watch?v=C7TTKv_wEQM¦¦Hadapi Invasi Rusia, Ukraina Dapat Pinjaman Senjata dari Sejumlah Negara | Kabar Hari Ini tvOne¦¦3,2 rb x ditonton¦¦2 hari yang lalu 27 | https://www.youtube.com/watch?v=inEJJHSlnp8¦¦6 Tersangka Kasus Peredaran Narkoba yang Melibatkan Teddy Minahasa Hari Ini Diadili¦¦182 rb x ditonton¦¦2 hari yang lalu 28 | https://www.youtube.com/watch?v=zKbaHgFh0fo¦¦PERANG PECAH! Pasukan Tengkorak 'Tusuk' KKB Di Intan Jaya¦¦161 rb x ditonton¦¦3 hari yang lalu 29 | https://www.youtube.com/watch?v=MwT4L_NNQZc¦¦Reaksi Bharada E seusai Seluruh Pledoi Ditolak JPU, Kuasa Hukum: Janji Ikuti Proses dengan Baik¦¦156 rb x ditonton¦¦10 hari yang lalu 30 | https://www.youtube.com/watch?v=wfao0X0KvbI¦¦Ferdy Sambo Diprediksi akan 'Buka-bukaan' jika Akhirnya Divonis Mati dalam Kasus Kematian Brigadir J¦¦189 rb x ditonton¦¦Streaming 4 hari yang lalu 31 | https://www.youtube.com/watch?v=yNKvkPJl-tg¦¦Live Streaming tvOne 24 Jam¦¦54 rb x ditonton¦¦3 hari yang lalu 32 | https://www.youtube.com/watch?v=Pzd-cHHh8i4¦¦🔴Jaksa Cium Niat Jahat Pengacara Sambo yang akan Limpahkan Semua Kesalahan pada Richard Eliezer¦¦35 rb x ditonton¦¦6 hari yang lalu 33 | https://www.youtube.com/watch?v=Za5-fvwbPJI¦¦LIVE STREAMING 24 JAM - KOMPASTV¦¦6,6 rb x ditonton¦¦1 hari yang lalu 34 | https://www.youtube.com/watch?v=eUdoTrPdmuk¦¦Arema FC Bubar? Ini 7 Konsekuensi Jika Putuskan Bubar di Tengah Liga 1 Bergulir, Klub Lain Terdampak¦¦59 rb x ditonton¦¦3 hari yang lalu 35 | https://www.youtube.com/watch?v=Jx3fCgNWM4c¦¦Baku Tembak Pasukan Israel & Warga Palestina, Kamp Pengungsian Rusak Berat | Kabar Hari Ini tvOne¦¦157 rb x ditonton¦¦3 hari yang lalu 36 | https://www.youtube.com/watch?v=OPpvl5nWn7I¦¦Hancur Lebur, Insiden Bom Bunuh Diri di Masjid | Kabar Hari Ini tvOne¦¦13 rb x ditonton¦¦2 hari yang lalu 37 | https://www.youtube.com/watch?v=lH7J8HuWcQo¦¦Pemilu 2024, Nama Anies, Ganjar, dan Prabowo, Siapa Kandidat Capres Terkuat?¦¦12 rb x ditonton¦¦3 hari yang lalu 38 | https://www.youtube.com/watch?v=Ufsb8S6EvKk¦¦Tabrakan Audi Ungkap Perselingkuhan Polisi | Kabar Hari Ini tvOne¦¦4,6 rb x ditonton¦¦3 hari yang lalu 39 | https://www.youtube.com/watch?v=Ufsb8S6EvKk¦¦Tabrakan Audi Ungkap Perselingkuhan Polisi | Kabar Hari Ini tvOne¦¦9,5 rb x ditonton¦¦2 hari yang lalu 40 | https://www.youtube.com/watch?v=XefSqRWCiZs¦¦KEJUTAN! Haaland Bakal ke Barcelona 😱 Marselino Gabung Klub Belgia 😱 Eks Pelatih Timnas Meninggal 😢¦¦12 rb x ditonton¦¦2 hari yang lalu 41 | https://www.youtube.com/watch?v=10IF7COGwQ8¦¦BREAKING NEWS - Sidang Anak Buah Sambo: Hendra, Agus dan Arif Sampaikan Nota Pembelaan Kasus OOJ¦¦363 rb x ditonton¦¦8 hari yang lalu 42 | https://www.youtube.com/watch?v=y286wY5F7Uc¦¦Jaksa Usap Mata Berkali-kali Saat Bacakan Replik Richard Eliezer¦¦73 rb x ditonton¦¦1 hari yang lalu 43 | https://www.youtube.com/watch?v=gHk4_hEBcMM¦¦BREAKING NEWS - Presiden Jokowi Hadiri Puncak Perayaan HUT ke-8 PSI¦¦53 rb x ditonton¦¦2 hari yang lalu 44 | https://www.youtube.com/watch?v=C7TTKv_wEQM¦¦Hadapi Invasi Rusia, Ukraina Dapat Pinjaman Senjata dari Sejumlah Negara | Kabar Hari Ini tvOne¦¦64 rb x ditonton¦¦2 hari yang lalu 45 | https://www.youtube.com/watch?v=inEJJHSlnp8¦¦6 Tersangka Kasus Peredaran Narkoba yang Melibatkan Teddy Minahasa Hari Ini Diadili¦¦64 rb x ditonton¦¦2 hari yang lalu 46 | https://www.youtube.com/watch?v=zKbaHgFh0fo¦¦PERANG PECAH! Pasukan Tengkorak 'Tusuk' KKB Di Intan Jaya¦¦145 rb x ditonton¦¦1 hari yang lalu 47 | https://www.youtube.com/watch?v=MwT4L_NNQZc¦¦Reaksi Bharada E seusai Seluruh Pledoi Ditolak JPU, Kuasa Hukum: Janji Ikuti Proses dengan Baik¦¦104 rb x ditonton¦¦3 hari yang lalu 48 | https://www.youtube.com/watch?v=wfao0X0KvbI¦¦Ferdy Sambo Diprediksi akan 'Buka-bukaan' jika Akhirnya Divonis Mati dalam Kasus Kematian Brigadir J¦¦219 rb x ditonton¦¦Streaming 2 hari yang lalu 49 | https://www.youtube.com/watch?v=yNKvkPJl-tg¦¦Live Streaming tvOne 24 Jam¦¦15 rb x ditonton¦¦1 hari yang lalu 50 | https://www.youtube.com/watch?v=Pzd-cHHh8i4¦¦🔴Jaksa Cium Niat Jahat Pengacara Sambo yang akan Limpahkan Semua Kesalahan pada Richard Eliezer¦¦3,2 rb x ditonton¦¦2 hari yang lalu 51 | https://www.youtube.com/watch?v=Za5-fvwbPJI¦¦LIVE STREAMING 24 JAM - KOMPASTV¦¦182 rb x ditonton¦¦2 hari yang lalu 52 | https://www.youtube.com/watch?v=eUdoTrPdmuk¦¦Arema FC Bubar? Ini 7 Konsekuensi Jika Putuskan Bubar di Tengah Liga 1 Bergulir, Klub Lain Terdampak¦¦161 rb x ditonton¦¦3 hari yang lalu 53 | https://www.youtube.com/watch?v=Jx3fCgNWM4c¦¦Baku Tembak Pasukan Israel & Warga Palestina, Kamp Pengungsian Rusak Berat | Kabar Hari Ini tvOne¦¦156 rb x ditonton¦¦10 hari yang lalu 54 | https://www.youtube.com/watch?v=E7de4ex_yTc¦¦[Breaking News] Banjir Bandang di Parepare | tvOne¦¦189 rb x ditonton¦¦Streaming 4 hari yang lalu 55 | https://www.youtube.com/watch?v=2zpCbFFEvTw¦¦ISRAEL-PALESTINA KEMBALI SALING SERANG RUDAL¦¦54 rb x ditonton¦¦3 hari yang lalu 56 | https://www.youtube.com/watch?v=9B2scW2RNQ0¦¦Bak di Film-film, Jambret di Surabaya Keok Lawan 7 Anak SD | Kabar Utama tvOne¦¦35 rb x ditonton¦¦6 hari yang lalu 57 | https://www.youtube.com/watch?v=leHbRGvYC5k¦¦PKS Deklarasi Dukungan Terhadap Anies Baswedan | Kabar Petang tvOne¦¦6,6 rb x ditonton¦¦1 hari yang lalu 58 | https://www.youtube.com/shorts/GZ9cvkVFGnc¦¦KABAR TERKINI Hotman Paris Lapor Nikita Mirzani dan Farhat Abbas Gegara Bunda Corla?¦¦59 rb x ditonton¦¦3 hari yang lalu 59 | https://www.youtube.com/watch?v=7-ysqhUmx-E¦¦Banjir dan Tanah Longsor Landa Auckland Selandia Baru | Kabar Hari Ini tvOne¦¦157 rb x ditonton¦¦3 hari yang lalu 60 | https://www.youtube.com/watch?v=ehEw3mPcMm4¦¦🔴 KILASAN TERKINI : Korban Tewas Ledakan di Masjid Pakistan Jadi 100 Orang¦¦13 rb x ditonton¦¦2 hari yang lalu 61 | https://www.youtube.com/watch?v=FN0W12umPrY¦¦Sidang Vonis Ferdy Sambo Akan Digelar 13 Februari 2023 | Kabar Petang tvOne¦¦12 rb x ditonton¦¦3 hari yang lalu 62 | https://www.youtube.com/watch?v=dn-1htaioDM¦¦Dengan Tangan Kosong, Warga Kalahkan Pelaku Penembakan Massal | Kabar Hari Ini tvOne¦¦4,6 rb x ditonton¦¦3 hari yang lalu 63 | https://www.youtube.com/watch?v=OPpvl5nWn7I¦¦Hancur Lebur, Insiden Bom Bunuh Diri di Masjid | Kabar Hari Ini tvOne¦¦9,5 rb x ditonton¦¦2 hari yang lalu 64 | https://www.youtube.com/watch?v=lH7J8HuWcQo¦¦Pemilu 2024, Nama Anies, Ganjar, dan Prabowo, Siapa Kandidat Capres Terkuat?¦¦12 rb x ditonton¦¦2 hari yang lalu 65 | https://www.youtube.com/watch?v=wgS8xkWL9Hw¦¦Alasan Pemuda Papua Ngebet Pengen Gabung KKB¦¦363 rb x ditonton¦¦8 hari yang lalu 66 | https://www.youtube.com/watch?v=Q5s2HxfUGfc¦¦Demokrat Dukung Anies Baswedan Maju di Pilpres 2024 | Kabar Utama tvOne¦¦73 rb x ditonton¦¦1 hari yang lalu 67 | https://www.youtube.com/watch?v=cEfjW2yldWM¦¦Robot Tempur Rusia Musnahkan Tank Abrams, Leopard 2 AS dan Jerman, Ukraina Mulai Ketar-ketir¦¦53 rb x ditonton¦¦2 hari yang lalu 68 | https://www.youtube.com/watch?v=tGcyylEdr2s¦¦Sebarkan Ajaran Sesat, Masjid di Pamekasan Disegel | Apa Kabar Indonesia Pagi tvOne¦¦294 rb x ditonton¦¦2 minggu yang lalu 69 | https://www.youtube.com/watch?v=hqg1YsFmhLs¦¦Kabar Sumatera 31 Januari 2023 | tvOne¦¦103 rb x ditonton¦¦6 hari yang lalu 70 | https://www.youtube.com/watch?v=Y1dN0rR-6AA¦¦Pelaku Percobaan Pemerkosaan Habis Dihajar Warga | Kabar Hari Ini tvOne¦¦438 rb x ditonton¦¦6 hari yang lalu 71 | https://www.youtube.com/watch?v=tSjBIoNlUWw¦¦BREAKING NEWS - Tanggapan JPU Atas Pledoi Terdakwa Putri Candrawathi & Richard Eliezer¦¦75 rb x ditonton¦¦8 hari yang lalu 72 | https://www.youtube.com/watch?v=rPY6XWKX-gc¦¦Begini Potret Terkini Pembangunan IKN Nusantara¦¦56 rb x ditonton¦¦3 hari yang lalu 73 | https://www.youtube.com/watch?v=I3Hez7gPF3Q¦¦Fakta Penolakan Ibunda Ferry Irawan Ke Rumah Venna Melinda | Hot Shot¦¦1,5 jt x ditonton¦¦1 bulan yang lalu 74 | https://www.youtube.com/watch?v=XCyfYwJcckI¦¦Catatan Effendi Gazali Respon Soal Kontrak Politik | Kabar Petang tvOne¦¦426 rb x ditonton¦¦Streaming 4 hari yang lalu 75 | https://www.youtube.com/watch?v=KVgzaq27qiw¦¦Rencana Rusia Kerahkan Tank T-14 ke Medan Perang di Ukraina Ditolak Pasukannya, Ini Alasannya¦¦230 rb x ditonton¦¦6 hari yang lalu 76 | https://www.youtube.com/watch?v=4s_CMha6BAU¦¦Nota Pembelaan Eliezer Ditolak Jaksa, Ini Kata Ronny Talapessy¦¦92 rb x ditonton¦¦2 hari yang lalu 77 | https://www.youtube.com/watch?v=iGx0jACEzV0¦¦Highlights - PSIS Semarang VS Persib Bandung | BRI Liga 1 2022/2023¦¦62 rb x ditonton¦¦2 hari yang lalu 78 | https://www.youtube.com/watch?v=d92KP_jhgXk¦¦Wilayah Israel Bagian Selatan Dihujani Roket Militan Palestina | Kabar Hari Ini tvOne¦¦91 rb x ditonton¦¦6 hari yang lalu 79 | https://www.youtube.com/watch?v=FT7u1sX8bZo¦¦JPU Tidak Mendapatkan Bukti Pelecehan Putri dari Pengacara | Breaking News tvOne¦¦858 rb x ditonton¦¦4 hari yang lalu -------------------------------------------------------------------------------- /results/jpu_scroll-1_20230204_193840.txt: -------------------------------------------------------------------------------- 1 | https://www.youtube.com/watch?v=5DDvydmswz4¦¦Momen JPU Minta Hakim Perlihatkan Isi Amplop Dalam Berkas: Biar Netizen Tidak Salah Tafsirkan¦¦250K views¦¦1 day ago 2 | https://www.youtube.com/watch?v=Amf36_5u1-E¦¦Tanggapi Pledoi Putri Candrawathi, JPU: Tidak Ada Satu pun Bukti yang Menunjukan Putri Dilecehkan¦¦40K views¦¦5 days ago 3 | https://www.youtube.com/watch?v=RZQFjNMAksA¦¦Ekspresi JPU Saat Disebut Frustasi oleh Penasihat Hukum Ferdy Sambo¦¦41K views¦¦3 days ago 4 | https://www.youtube.com/watch?v=MwT4L_NNQZc¦¦Reaksi Bharada E seusai Seluruh Pledoi Ditolak JPU, Kuasa Hukum: Janji Ikuti Proses dengan Baik¦¦167K views¦¦4 days ago 5 | https://www.youtube.com/watch?v=aED2KXBSB2E¦¦Pengacara Minta Maaf Ke JPU Terkait Pledoi Eliezer¦¦17K views¦¦1 day ago 6 | https://www.youtube.com/watch?v=nzFg5fV_69w¦¦Hakim Libas Habis JPU saat Tanya Ajudan Hendra Kurniawan, Kesal Tak Sesuai SOP: Kok PD Banget Anda¦¦251K views¦¦2 weeks ago -------------------------------------------------------------------------------- /sample_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhohirpradana/Python-Youtube-Search-Scraper/503036505aa64e966b3f2032341dd954fd5ccefa/sample_output.png -------------------------------------------------------------------------------- /yt_scraper_sroll/__init__.py: -------------------------------------------------------------------------------- 1 | import time 2 | import urllib.parse 3 | 4 | from selenium import webdriver 5 | from selenium.webdriver.chrome.options import Options 6 | from selenium.webdriver.common.by import By 7 | from selenium.webdriver.firefox.firefox_binary import FirefoxBinary 8 | from selenium.webdriver.support import expected_conditions as EC 9 | from selenium.webdriver.support.wait import WebDriverWait 10 | 11 | # binary = FirefoxBinary(r'C:\Program Files\Mozilla Firefox\firefox.exe') 12 | 13 | # options = webdriver.FirefoxOptions() 14 | # options.add_argument("--headless") 15 | # options.add_argument("--window-size=1920,1080") 16 | # options.add_argument("--disable-gpu") 17 | # options.add_argument("--disable-extensions") 18 | # options.add_argument("--no-sandbox") 19 | # options.add_argument("--disable-dev-shm-usage") 20 | # options.add_argument("--disable-features=VizDisplayCompositor") 21 | # options.add_argument("--disable-features=NetworkService") 22 | # # , firefox_binary=binary 23 | 24 | # driver = webdriver.Firefox(options=options) 25 | 26 | 27 | def set_chrome_options() -> Options: 28 | """Sets chrome options for Selenium. 29 | Chrome options for headless browser is enabled. 30 | """ 31 | chrome_options = Options() 32 | chrome_options.add_argument("--headless") 33 | chrome_options.add_argument("--no-sandbox") 34 | chrome_options.add_argument("--disable-dev-shm-usage") 35 | chrome_prefs = dict[str, dict[str, int]]() 36 | chrome_options.experimental_options["prefs"] = chrome_prefs 37 | chrome_prefs["profile.default_content_settings"] = {"images": 2} 38 | return chrome_options 39 | 40 | 41 | driver = webdriver.Chrome(options=set_chrome_options()) 42 | driver.delete_all_cookies() 43 | # BASE_DIR = os.path.join(os.path.dirname(__file__), '..') 44 | 45 | 46 | def handler(request, jsonify): 47 | body = request.get_json() 48 | 49 | if body is None: 50 | return jsonify({'message': 'No body provided'}), 400 51 | 52 | try: 53 | query = body['query'] 54 | scroll = body['scroll'] 55 | except AttributeError as err: 56 | return jsonify({'message': str(err) + " not provided"}), 400 57 | 58 | query_url = urllib.parse.quote(query) 59 | if scroll < 1: 60 | scroll = 1 61 | print('Query URL: ', query_url) 62 | 63 | try: 64 | driver.get(f"https://www.youtube.com/results?search_query={query_url}") 65 | 66 | scroll_height = driver.execute_script("return window.innerHeight") 67 | video_links = [] 68 | video_titles = [] 69 | video_views = [] 70 | video_published_times = [] 71 | 72 | res_data = [] 73 | 74 | max_scroll = scroll 75 | 76 | scroll_num = 0 77 | # while True: 78 | while scroll_num <= max_scroll: 79 | print(f"Scrolling {scroll_num} of {max_scroll}") 80 | video_ids = driver.find_elements( 81 | By.XPATH, "//a[@id='video-title']") 82 | 83 | finish_video_ids = False 84 | for i, video_id in enumerate(video_ids): 85 | print(video_id.get_attribute("href")) 86 | # skip playlist 87 | if "list" in video_id.get_attribute("href"): 88 | print("playlist") 89 | continue 90 | 91 | # skip channel 92 | if video_id.get_attribute("href").startswith("/@"): 93 | print("channel") 94 | continue 95 | 96 | video_links.append(video_id.get_attribute("href")) 97 | video_titles.append(video_id.get_attribute("title")) 98 | 99 | if i == len(video_ids) - 1: 100 | finish_video_ids = True 101 | 102 | video_infos = driver.find_elements( 103 | By.XPATH, "//span[@class='inline-metadata-item style-scope ytd-video-meta-block']") 104 | 105 | finish_video_infos = False 106 | for i, video_info in enumerate(video_infos): 107 | print(video_info.text) 108 | if "views" in video_info.text or "ditonton" in video_info.text: 109 | view_count = video_info.text 110 | video_views.append(view_count) 111 | elif "ago" in video_info.text or "yang lalu" in video_info.text: 112 | published_time = video_info.text 113 | video_published_times.append(published_time) 114 | 115 | if i == len(video_infos) - 1: 116 | finish_video_infos = True 117 | 118 | # print("video_links:", len(video_links)) 119 | # print("video_titles:", len(video_titles)) 120 | # print("video_views:", len(video_views)) 121 | # print("video_published_times:", len(video_published_times)) 122 | 123 | def write_to_file(): 124 | if finish_video_ids and finish_video_infos: 125 | for i, video_link in enumerate(video_links): 126 | try: 127 | v_title = video_titles[i] 128 | except IndexError: 129 | v_title = "-" 130 | 131 | try: 132 | v_views = video_views[i] 133 | except IndexError: 134 | v_views = "-" 135 | 136 | try: 137 | v_published_times = video_published_times[i] 138 | except IndexError: 139 | v_published_times = "-" 140 | 141 | res_data.append({ 142 | "url": video_link, 143 | "title": v_title, 144 | "views": v_views, 145 | "published": v_published_times 146 | }) 147 | else: 148 | print("Video ID or Video Info not finished") 149 | time.sleep(2) 150 | write_to_file() 151 | 152 | write_to_file() 153 | 154 | document_height_before = driver.execute_script( 155 | "return document.documentElement.scrollHeight") 156 | driver.execute_script( 157 | f"window.scrollTo(0, {document_height_before + scroll_height});") 158 | 159 | scroll_num += 1 160 | 161 | # delay before next scroll 162 | time.sleep(2) 163 | document_height_after = driver.execute_script( 164 | "return document.documentElement.scrollHeight") 165 | 166 | # end of scroll 167 | if document_height_after == document_height_before: 168 | break 169 | 170 | except ConnectionError as err: 171 | print("Error: ", err) 172 | return jsonify({'message': str(err)}), 500 173 | 174 | return jsonify({'message': 'success', }), 200 --------------------------------------------------------------------------------