├── Procfile ├── README.md ├── app.py ├── businesslayer ├── BusinessLayerUtil.py └── __pycache__ │ └── BusinessLayerUtil.cpython-36.pyc ├── requirements.txt ├── scrapperImage ├── ScrapperImage.py └── __pycache__ │ └── ScrapperImage.cpython-36.pyc ├── static ├── Dog0.jpg ├── Dog1.jpg ├── Dog2.jpg ├── Dog3.jpg ├── Dog4.jpg ├── Dog5.jpg └── style.css └── templates ├── index.html └── showImage.html /Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn app:app -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Image-Webscrapper -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @author: Krish Naik 4 | """ 5 | # Importing the necessary Libraries 6 | from flask_cors import CORS,cross_origin 7 | from flask import Flask, render_template, request,jsonify 8 | from scrapperImage.ScrapperImage import ScrapperImage 9 | from businesslayer.BusinessLayerUtil import BusinessLayer 10 | import os 11 | 12 | 13 | # import request 14 | app = Flask(__name__) # initialising the flask app with the name 'app' 15 | 16 | #response = 'Welcome!' 17 | 18 | 19 | @app.route('/') # route for redirecting to the home page 20 | @cross_origin() 21 | def home(): 22 | return render_template('index.html') 23 | 24 | @app.route('/showImages') 25 | @cross_origin() 26 | def displayImages(): 27 | list_images=os.listdir('static') 28 | print(list_images) 29 | 30 | try: 31 | if(len(list_images)>0): 32 | return render_template('showImage.html',user_images=list_images) 33 | else: 34 | return "Images are not present" 35 | except Exception as e: 36 | print("No images found",e) 37 | return "Please try with a different search keyword" 38 | 39 | @app.route('/searchImages',methods=['Get','POST']) 40 | def searchImage(): 41 | if request.method=="POST": 42 | search_term=request.form['keyword'] # assigning the value of the input keyword to the variable keyword 43 | 44 | else: 45 | print("Please enter something") 46 | 47 | imagescrapperutil=BusinessLayer ## Instantiate a object for ScrapperImage Class 48 | imagescrapper=ScrapperImage() 49 | list_images=os.listdir('static') 50 | imagescrapper.delete_downloaded_images(list_images)## Delete the old images before search 51 | 52 | image_name=search_term.split() 53 | image_name="+".join(image_name) 54 | 55 | ## We need to add the header metadata 56 | 57 | header={ 58 | 'User-Agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36" 59 | 60 | } 61 | lst_images=imagescrapperutil.downloadImages(search_term,header) 62 | 63 | return displayImages() # redirect the control to the show images method 64 | 65 | 66 | 67 | if __name__ == "__main__": 68 | app.run(host='127.0.0.1', port=8000) # port to run on local machine 69 | #app.run(debug=True) # to run on cloud 70 | -------------------------------------------------------------------------------- /businesslayer/BusinessLayerUtil.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Jan 7 15:24:49 2020 4 | 5 | @author: krish.naik 6 | """ 7 | 8 | from scrapperImage.ScrapperImage import ScrapperImage 9 | 10 | class BusinessLayer: 11 | 12 | keyword="" 13 | fileLoc="" 14 | image_name="" 15 | header="" 16 | 17 | def downloadImages( keyWord, header): 18 | imgScrapper = ScrapperImage 19 | url = imgScrapper.createImageUrl(keyWord) 20 | rawHtml = imgScrapper.scrap_html_data(url, header) 21 | 22 | imageURLList = imgScrapper.getimageUrlList(rawHtml) 23 | 24 | masterListOfImages = imgScrapper.downloadImagesFromURL(imageURLList,keyWord, header) 25 | 26 | return masterListOfImages 27 | 28 | 29 | -------------------------------------------------------------------------------- /businesslayer/__pycache__/BusinessLayerUtil.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Image-Webscrapper/ca3901ea156912a35f28b77da4458e91cc01bd10/businesslayer/__pycache__/BusinessLayerUtil.cpython-36.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.8.1 2 | bs4==0.0.1 3 | certifi==2019.9.11 4 | Click==7.0 5 | Flask==1.1.1 6 | Flask-Cors==3.0.8 7 | gunicorn==20.0.4 8 | itsdangerous==1.1.0 9 | Jinja2==2.10.3 10 | MarkupSafe==1.1.1 11 | numpy==1.17.4 12 | opencv-python==4.1.2.30 13 | Pillow==6.2.1 14 | pymongo==3.9.0 15 | six==1.13.0 16 | soupsieve==1.9.5 17 | Werkzeug==0.16.0 -------------------------------------------------------------------------------- /scrapperImage/ScrapperImage.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Jan 7 10:51:15 2020 4 | 5 | @author: krish.naik 6 | """ 7 | 8 | from bs4 import BeautifulSoup as bs 9 | import os 10 | import json 11 | import urllib.request 12 | import urllib.parse 13 | import urllib.error 14 | from urllib.request import urlretrieve 15 | 16 | class ScrapperImage: 17 | 18 | ## Create Image URl 19 | def createImageUrl(searchterm): 20 | searchterm=searchterm.split() 21 | searchterm="+".join(searchterm) 22 | web_url="https://www.google.co.in/search?q=" + searchterm + "&source=lnms&tbm=isch" 23 | return web_url 24 | 25 | # get Raw HTML 26 | def scrap_html_data(url,header): 27 | request=urllib.request.Request(url,headers=header) 28 | response = urllib.request.urlopen(request) 29 | responseData = response.read() 30 | html = bs(responseData, 'html.parser') 31 | return html 32 | 33 | # contains the link for Large original images, type of image 34 | def getimageUrlList(rawHtml): 35 | imageUrlList = [] 36 | for a in rawHtml.find_all("div", {"class": "rg_meta"}): 37 | link, imageExtension = json.loads(a.text)["ou"], json.loads(a.text)["ity"] 38 | imageUrlList.append((link, imageExtension)) 39 | 40 | print("there are total", len(imageUrlList), "images") 41 | return imageUrlList 42 | 43 | def downloadImagesFromURL(imageUrlList,image_name, header): 44 | masterListOfImages = [] 45 | count=0 46 | ###print images 47 | imageFiles = [] 48 | imageTypes = [] 49 | image_counter=0 50 | for i, (img, Type) in enumerate(imageUrlList): 51 | try: 52 | if (count > 5): 53 | break 54 | else: 55 | count = count + 1 56 | req = urllib.request.Request(img, headers=header) 57 | try: 58 | urllib.request.urlretrieve(img,"./static/"+image_name+str(image_counter)+".jpg") 59 | image_counter=image_counter+1 60 | except Exception as e: 61 | print("Image write failed: ",e) 62 | image_counter = image_counter + 1 63 | respData = urllib.request.urlopen(req) 64 | raw_img = respData.read() 65 | # soup = bs(respData, 'html.parser') 66 | 67 | imageFiles.append(raw_img) 68 | imageTypes.append(Type) 69 | 70 | except Exception as e: 71 | print("could not load : " + img) 72 | print(e) 73 | count = count + 1 74 | masterListOfImages.append(imageFiles) 75 | masterListOfImages.append(imageTypes) 76 | 77 | return masterListOfImages 78 | 79 | def delete_downloaded_images(self,list_of_images): 80 | for self.image in list_of_images: 81 | try: 82 | os.remove("./static/"+self.image) 83 | except Exception as e: 84 | print('error in deleting: ',e) 85 | return 0 86 | 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /scrapperImage/__pycache__/ScrapperImage.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Image-Webscrapper/ca3901ea156912a35f28b77da4458e91cc01bd10/scrapperImage/__pycache__/ScrapperImage.cpython-36.pyc -------------------------------------------------------------------------------- /static/Dog0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Image-Webscrapper/ca3901ea156912a35f28b77da4458e91cc01bd10/static/Dog0.jpg -------------------------------------------------------------------------------- /static/Dog1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Image-Webscrapper/ca3901ea156912a35f28b77da4458e91cc01bd10/static/Dog1.jpg -------------------------------------------------------------------------------- /static/Dog2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Image-Webscrapper/ca3901ea156912a35f28b77da4458e91cc01bd10/static/Dog2.jpg -------------------------------------------------------------------------------- /static/Dog3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Image-Webscrapper/ca3901ea156912a35f28b77da4458e91cc01bd10/static/Dog3.jpg -------------------------------------------------------------------------------- /static/Dog4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Image-Webscrapper/ca3901ea156912a35f28b77da4458e91cc01bd10/static/Dog4.jpg -------------------------------------------------------------------------------- /static/Dog5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Image-Webscrapper/ca3901ea156912a35f28b77da4458e91cc01bd10/static/Dog5.jpg -------------------------------------------------------------------------------- /static/style.css: -------------------------------------------------------------------------------- 1 | * { 2 | box-sizing: border-box; 3 | } 4 | 5 | *:focus { 6 | outline: none; 7 | } 8 | body { 9 | font-family: Arial; 10 | background-color: #e8e8e8; 11 | padding: 50px; 12 | } 13 | .login { 14 | margin: 20px auto; 15 | width: 300px; 16 | } 17 | .login-screen { 18 | background-color: #FFF; 19 | padding: 20px; 20 | border-radius: 5px 21 | } 22 | 23 | .app-title { 24 | text-align: center; 25 | color: #777; 26 | } 27 | 28 | .login-form { 29 | text-align: center; 30 | } 31 | .control-group { 32 | margin-bottom: 10px; 33 | } 34 | 35 | input { 36 | text-align: center; 37 | background-color: #ECF0F1; 38 | border: 2px solid transparent; 39 | border-radius: 3px; 40 | font-size: 16px; 41 | font-weight: 200; 42 | padding: 10px 0; 43 | width: 250px; 44 | transition: border .5s; 45 | } 46 | 47 | input:focus { 48 | border: 2px solid #3498DB; 49 | box-shadow: none; 50 | } 51 | .btn-group button { 52 | background-color: #4CAF50; /* Green background */ 53 | border: 1px solid green; /* Green border */ 54 | color: white; /* White text */ 55 | padding: 10px 24px; /* Some padding */ 56 | cursor: pointer; /* Pointer/hand icon */ 57 | float: left; /* Float the buttons side by side */ 58 | } 59 | 60 | /* Clear floats (clearfix hack) */ 61 | .btn-group:after { 62 | content: ""; 63 | clear: both; 64 | display: table; 65 | } 66 | 67 | .btn-group button:not(:last-child) { 68 | border-right: none; /* Prevent double borders */ 69 | } 70 | 71 | /* Add a background color on hover */ 72 | .btn-group button:hover { 73 | background-color: #3e8e41; 74 | } 75 | .btn { 76 | border: 2px solid transparent; 77 | background: #5dc3c3; 78 | color: #ffffff; 79 | font-size: 16px; 80 | line-height: 25px; 81 | padding: 10px 0; 82 | text-decoration: none; 83 | text-shadow: none; 84 | border-radius: 3px; 85 | box-shadow: none; 86 | transition: 0.25s; 87 | display: block; 88 | width: 200px; 89 | margin: 0 auto; 90 | } 91 | 92 | .btn:hover { 93 | background-color: #2980B9; 94 | } 95 | 96 | .login-link { 97 | font-size: 12px; 98 | color: #444; 99 | display: block; 100 | margin-top: 12px; 101 | } 102 | -------------------------------------------------------------------------------- /templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | 8 | 9 | {% block body %} 10 | 11 | 17 |
13 | |
14 | {% endfor %}
15 |