├── LICENSE
├── README.md
└── webscrape.sh


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Sandesh Yadav
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # WebScrape ![License](https://img.shields.io/badge/License-MIT-yellow.svg) ![Version](https://img.shields.io/badge/Version-1.0-red.svg)
 2 | 
 3 | It is a web scraper written in bash with all possible error handling which scrapes mail ID's and phone numbers from the websites.
 4 | 
 5 | **What is Web Scraping ?** </br>
 6 | Web Scraping also termed Web Data Extraction or Web Harvesting it is a technique employed to extract large amounts of data from websites whereby the data is extracted and saved to a local file in your computer for many uses.
 7 | 
 8 | ## Screenshot
 9 | 
10 | <img src="https://user-images.githubusercontent.com/46316908/136668777-24ca6baf-e413-459e-87a4-8cd1d8630ff8.png" width="100%"></img>
11 | 
12 | ## Installing and requirements
13 | 
14 | - Linux or Unix-based system
15 | 
16 | ### Installing
17 | 
18 | ```
19 | ~ ❯❯❯ git clone https://github.com/3xploitGuy/webscrape.git
20 | 
21 | ~ ❯❯❯ cd webscrape
22 | 
23 | ~/webscrape ❯❯❯ chmod +x webscrape.sh
24 | 
25 | ~/webscrape ❯❯❯ ./webscrape.sh
26 | ```
27 | 
28 | ## Contact
29 | 
30 | [Gmail](mailto:sandeshyadavm46@gmail.com) </br>
31 | [Instagram](https://instagram.com/1n_only_sandy) </br>
32 | [Blog](https://virtualprivacy.blogspot.com) </br>
33 | [Website](https://sandeshyadav.000webhostapp.com) </br>
34 | [YouTube](https://www.youtube.com/channel/UCAdDJn4yWzQMJyKyRWne3qg)
35 | 
36 | ## Disclaimer
37 | 
38 | Webscrape is created to help in data scraping from the Internet and it's not responsible for any misuse or illegal purposes.
39 | 
40 | ## License
41 | 
42 | This work by [3xplotGuy](https://github.com/3xploitGuy) is licensed under the terms of the [MIT License](https://www.tldrlegal.com/l/mit).
43 | 


--------------------------------------------------------------------------------
/webscrape.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | Black="\e[1;90m"
  4 | Red="\e[1;91m"
  5 | Green="\e[1;92m"
  6 | Yellow="\e[1;93m"
  7 | Blue="\e[1;94m"
  8 | Purple="\e[1;95m"
  9 | White="\e[1;97m"
 10 | clear
 11 | banner () {
 12 | echo -e "
 13 | ${Red} __      __  ${Yellow}    ___.     _________                                  
 14 | ${Red}/  \    /  \ ${Yellow}____\_ |__  /   _____/ ________________  ______   ____  
 15 | ${Red}\   \/\/   /${Yellow}/ __ \| __ \ \_____  \_/ ___\_  __ \__  \ \____ \_/ __ \ 
 16 | ${Red} \        /${Yellow}\  ___/| \_\ \/        \  \___|  | \// __ \|  |_> >  ___/ 
 17 | ${Red}  \__/\  /${Yellow}  \___  >___  /_______  /\___  >__|  (____  /   __/ \___  >
 18 | ${Red}       \/${Yellow}       \/    \/        \/     \/           \/|__|        \/                                                                     "
 19 | printf "\n\e[1;77m     A web scraper to get emails and phone numbers from websites      \e[0m\n\n"
 20 | echo -e "\e[0;96m                Developed by: ${Red}Sandesh (3xploitGuy)\n\n\n"                      
 21 | #echo -e "\e[0;96m                     Version: ${Red}1.0 Stable\n\n\n"                  
 22 | }
 23 | scanner () {
 24 | sleep 0.5
 25 | read -p $'\e[1;97m[\e[1;92m*\e[1;97m]\e[1;92m Enter URL to begin : \e[1;97m' url
 26 | url_validity='(https?|ftp|file)://[-A-Za-z0-9\+&@#/%?=~_|!:,.;]*[-A-Za-z0-9\+&@#/%=~_|]'
 27 | if [[ $url =~ $url_validity ]]
 28 | then 
 29 | read -p $'\e[1;97m[\e[1;92m*\e[1;97m]\e[1;92m Scrape emails from website (y/n) : \e[1;97m' email
 30 | read -p $'\e[1;97m[\e[1;92m*\e[1;97m]\e[1;92m Scrape phone numbers from website (y/n) : \e[1;97m' phone
 31 | if [ "$email" = "Y" ] || [ "$email" = "y" ] || [ "$phone" = "Y" ] || [ "$phone" = "y" ]; then
 32 | echo -e "$White[${Red}!$White] ${Red}Scraping started"
 33 | scraper
 34 | fi
 35 | sleep 0.4
 36 | echo -e "$White[${Red}!$White] ${Red}Exiting....\n"
 37 | exit
 38 | else
 39 | echo -e "$White[$Red!$White] ${Red}Check your url (invalid)"
 40 | scanner
 41 | fi
 42 | }
 43 | scraper () {
 44 | curl -s $url > temp.txt
 45 | if [ "$email" = "Y" ] || [ "$email" = "y" ]; then
 46 | email_scraping
 47 | fi
 48 | if [ "$phone" = "Y" ] || [ "$phone" = "y" ]; then
 49 | phone_scraping
 50 | fi
 51 | rm temp.txt
 52 | if [[ -f "email.txt" ]] || [[ -f "phone.txt" ]] ; then
 53 | sleep 0.4
 54 | read -p $'\e[1;97m[\e[1;92m*\e[1;97m]\e[1;92m Do you want to save the output (y/n) : \e[1;97m' save_output
 55 | if [ "$save_output" = "Y" ] || [ "$save_output" = "y" ]; then
 56 | output
 57 | fi
 58 | fi
 59 | sleep 0.4
 60 | echo -e "$White[${Red}!$White] ${Red}Exiting....\n"
 61 | rm email.txt phone.txt 2> /dev/null 
 62 | exit
 63 | }
 64 | email_scraping () {
 65 | grep -i -o '[A-Z0-9._%+-]\+@[A-Z0-9.-]\+\.[A-Z]\{2,4\}' temp.txt | sort -u > email.txt
 66 | if [[ -s email.txt ]]; then
 67 | echo -e "$White[${Yellow}*$White] ${Yellow}Emails success${White}"
 68 | cat email.txt
 69 | else 
 70 | echo -e "$White[${Red}!$White] ${Red}No Emails found"
 71 | rm email.txt
 72 | fi
 73 | }
 74 | phone_scraping () {
 75 | grep -o '\([0-9]\{3\}\-[0-9]\{3\}\-[0-9]\{4\}\)\|\(([0-9]\{3\})[0-9]\{3\}\-[0-9]\{4\}\)\|\([0-9]\{10\}\)\|\([0-9]\{3\}\s[0-9]\{3\}\s[0-9]\{4\}\)' temp.txt | sort -u > phone.txt
 76 | if [[ -s phone.txt ]]; then
 77 | echo -e "$White[${Yellow}*$White] ${Yellow}Phone numbers success${White}"
 78 | cat phone.txt
 79 | else 
 80 | echo -e "$White[${Red}!$White] ${Red}No phone numbers found"
 81 | rm phone.txt
 82 | fi
 83 | }
 84 | output () {
 85 | sleep 0.4
 86 | read -p $'\e[1;97m[\e[1;92m*\e[1;97m]\e[1;92m Enter folder name : \e[1;97m' folder_name
 87 | if [ -d "$folder_name" ] 
 88 | then
 89 | echo -e "$White[${Red}!$White] ${Red}Folder already exists"
 90 | output
 91 | fi
 92 | mkdir $folder_name
 93 | mv email.txt $folder_name 2> /dev/null
 94 | mv phone.txt $folder_name 2> /dev/null
 95 | sleep 0.3
 96 | echo -e "$White[${Green}*$White] ${Yellow}Output saved"
 97 | sleep 0.4
 98 | echo -e "$White[${Red}!$White] ${Red}Exiting....\n"
 99 | exit
100 | }
101 | internet () {
102 | sleep 0.5
103 | echo -e "$White[$Red!$White] ${Red}Checking internet connection"
104 | wget -q --spider http://google.com
105 | if [ $? -eq 0 ]; then
106 | echo -e "$White[$Yellow*$White] ${Yellow}Connected"
107 | else
108 | sleep 0.5
109 | echo -e "$White[$Red!$White] ${Red}No internet try later"
110 | exit 
111 | fi
112 | }
113 | banner
114 | internet
115 | scanner
116 | 


--------------------------------------------------------------------------------