├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── custom.md │ └── feature_request.md ├── .gitignore ├── README.md ├── app.py ├── config.py ├── db_config.py ├── fbmarket ├── __init__.py ├── chromedriver.exe ├── items.py ├── middlewares.py ├── pipelines.py ├── settings.py └── spiders │ ├── __init__.py │ └── fmarket.py ├── fbwebpage1.html ├── helper ├── __init__.py └── functions.py ├── scrapy.cfg ├── static └── css │ └── main.css └── templates ├── 404.html ├── base.html ├── data_list.html ├── item_detail.html ├── item_list.html └── main.html /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv/* 2 | *.csv 3 | *.xlsx 4 | *.json 5 | .idea/* 6 | *.jpg 7 | *.png 8 | *.yml 9 | *.pyc 10 | scrapy/project.egg-info/* 11 | scrapy/build/* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Facebook Marketplace Scraper 2 | This script will scrape the product data with given search keywords. 3 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template, redirect, request, url_for 2 | from flask_mysqldb import MySQL 3 | from helper import functions 4 | 5 | app = Flask(__name__) 6 | 7 | app.config['MYSQL_HOST'] = 'localhost' 8 | app.config['MYSQL_USER'] = 'root' 9 | app.config['MYSQL_PASSWORD'] = '' 10 | app.config['MYSQL_DB'] = 'fbmarketdb' 11 | 12 | mysql = MySQL(app) 13 | 14 | 15 | @app.route('/', methods=['GET', 'POST']) 16 | def index(): 17 | return render_template('main.html') 18 | 19 | 20 | @app.route('/list', methods=['GET', 'POST']) 21 | def archive_list(): 22 | return render_template('data_list.html') 23 | 24 | 25 | @app.route('/item_list', methods=['GET', 'POST']) 26 | def item_list(): 27 | # form_data = functions.get_form_data() 28 | fb_items = get_all_fb_items() 29 | # functions.run_spider(form_data) 30 | if request.method == 'POST': 31 | form_data = functions.get_form_data() 32 | functions.run_spider(form_data) 33 | return redirect(url_for('item_list')) 34 | else: 35 | return render_template('item_list.html', data=fb_items) 36 | 37 | 38 | @app.errorhandler(404) 39 | def page_not_found(e): 40 | return render_template('404.html'), 404 41 | 42 | 43 | def get_all_fb_items(): 44 | cur = mysql.connection.cursor() 45 | cur.execute("SELECT * FROM fb_item") 46 | fb_data = cur.fetchall() 47 | cur.close() 48 | return fb_data 49 | 50 | 51 | if __name__ == '__main__': 52 | app.run(debug=True) 53 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def project_dir(): 5 | filepath = os.path.abspath(__file__) 6 | main_dir = os.path.dirname(filepath) 7 | return main_dir 8 | -------------------------------------------------------------------------------- /db_config.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask_mysqldb import MySQL 3 | 4 | app = Flask(__name__) 5 | 6 | app.config['MYSQL_HOST'] = 'localhost' 7 | app.config['MYSQL_USER'] = 'root' 8 | app.config['MYSQL_PASSWORD'] = '' 9 | app.config['MYSQL_DB'] = 'fbmarketdb' 10 | 11 | mysql = MySQL(app) 12 | 13 | 14 | def get_all_fb_items(): 15 | cur = mysql.connection.cursor() 16 | cur.execute("SELECT * FROM fb_items") 17 | fb_data = cur.fetchall() 18 | cur.close() 19 | return fb_data 20 | -------------------------------------------------------------------------------- /fbmarket/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdubey07/FacebookMarketplaceScraper/ab6a7f3a2f2d29f1a8ba449fc5834450f66df1fe/fbmarket/__init__.py -------------------------------------------------------------------------------- /fbmarket/chromedriver.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdubey07/FacebookMarketplaceScraper/ab6a7f3a2f2d29f1a8ba449fc5834450f66df1fe/fbmarket/chromedriver.exe -------------------------------------------------------------------------------- /fbmarket/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # https://docs.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class FbmarketItem(scrapy.Item): 12 | # define the fields for your item here like: 13 | name = scrapy.Field() 14 | price = scrapy.Field() 15 | category = scrapy.Field() 16 | location = scrapy.Field() 17 | search_term = scrapy.Field() 18 | img_url = scrapy.Field() 19 | item_url = scrapy.Field() 20 | # current_date = scrapy.Field() 21 | # slot_number = scrapy.Field() 22 | # pass 23 | -------------------------------------------------------------------------------- /fbmarket/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | from scrapy.http import HtmlResponse 10 | from selenium import webdriver 11 | from selenium.webdriver.common.by import By 12 | from selenium.webdriver.support.ui import WebDriverWait 13 | from selenium.webdriver.support import expected_conditions as EC 14 | import time 15 | 16 | options = webdriver.ChromeOptions() 17 | options.add_argument('headless') 18 | options.add_argument('window-size=720x600') 19 | options.add_argument("--log-level=3") 20 | driver = webdriver.Chrome("G:/Projects/MyPython/flaskapp/fbmarket/chromedriver.exe", chrome_options=options) 21 | 22 | 23 | # for mobile emulation 24 | # mobile_emulation = {"deviceName": "Nexus 5"} 25 | # chrome_options = webdriver.ChromeOptions() 26 | # chrome_options.add_argument('headless') 27 | # chrome_options.add_experimental_option("mobileEmulation", mobile_emulation) 28 | 29 | # driver = webdriver.Chrome("G:/Projects/MyPython/mfbmarketplace/mfbmarketplace/fbmarket/chromedriver.exe", desired_capabilities=chrome_options.to_capabilities()) 30 | 31 | 32 | class FbmarketSpiderMiddleware(object): 33 | # Not all methods need to be defined. If a method is not defined, 34 | # scrapy acts as if the spider middleware does not modify the 35 | # passed objects. 36 | 37 | @classmethod 38 | def from_crawler(cls, crawler): 39 | # This method is used by Scrapy to create your spiders. 40 | s = cls() 41 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 42 | return s 43 | 44 | def process_spider_input(self, response, spider): 45 | # Called for each response that goes through the spider 46 | # middleware and into the spider. 47 | 48 | # Should return None or raise an exception. 49 | return None 50 | 51 | def process_spider_output(self, response, result, spider): 52 | # Called with the results returned from the Spider, after 53 | # it has processed the response. 54 | 55 | # Must return an iterable of Request, dict or Item objects. 56 | for i in result: 57 | yield i 58 | 59 | def process_spider_exception(self, response, exception, spider): 60 | # Called when a spider or process_spider_input() method 61 | # (from other spider middleware) raises an exception. 62 | 63 | # Should return either None or an iterable of Request, dict 64 | # or Item objects. 65 | pass 66 | 67 | def process_start_requests(self, start_requests, spider): 68 | # Called with the start requests of the spider, and works 69 | # similarly to the process_spider_output() method, except 70 | # that it doesn’t have a response associated. 71 | 72 | # Must return only requests (not items). 73 | for r in start_requests: 74 | yield r 75 | 76 | def spider_opened(self, spider): 77 | spider.logger.info('Spider opened: %s' % spider.name) 78 | 79 | 80 | class FbmarketDownloaderMiddleware(object): 81 | # Not all methods need to be defined. If a method is not defined, 82 | # scrapy acts as if the downloader middleware does not modify the 83 | # passed objects. 84 | 85 | @classmethod 86 | def from_crawler(cls, crawler): 87 | # This method is used by Scrapy to create your spiders. 88 | s = cls() 89 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 90 | return s 91 | 92 | def process_request(self, request, spider): 93 | driver.get(request.url) 94 | SCROLL_PAUSE_TIME = 25 95 | scroll_times = 0 96 | 97 | # Get scroll height 98 | last_height = driver.execute_script("return document.body.scrollHeight") 99 | 100 | while True: 101 | # Scroll down to bottom 102 | scroll_times = scroll_times + 1 103 | driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") 104 | 105 | # Wait to load page 106 | time.sleep(SCROLL_PAUSE_TIME) 107 | 108 | # Calculate new scroll height and compare with last scroll height 109 | new_height = driver.execute_script("return document.body.scrollHeight") 110 | if new_height == last_height or scroll_times >= 2: 111 | break 112 | last_height = new_height 113 | 114 | body = driver.page_source 115 | return HtmlResponse(driver.current_url, body=body, encoding='utf-8', request=request) 116 | 117 | def process_response(self, request, response, spider): 118 | # Called with the response returned from the downloader. 119 | 120 | # Must either; 121 | # - return a Response object 122 | # - return a Request object 123 | # - or raise IgnoreRequest 124 | return response 125 | 126 | def process_exception(self, request, exception, spider): 127 | # Called when a download handler or a process_request() 128 | # (from other downloader middleware) raises an exception. 129 | 130 | # Must either: 131 | # - return None: continue processing this exception 132 | # - return a Response object: stops process_exception() chain 133 | # - return a Request object: stops process_exception() chain 134 | pass 135 | 136 | def spider_opened(self, spider): 137 | spider.logger.info('Spider opened: %s' % spider.name) 138 | -------------------------------------------------------------------------------- /fbmarket/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | import pymysql 9 | 10 | 11 | class FbmarketPipeline(object): 12 | def __init__(self): 13 | self.conn = pymysql.connect(host='localhost', user='root', password='', database='fbmarketdb') 14 | self.cursor = self.conn.cursor() 15 | # self.conn.autocommit(True) 16 | 17 | def process_item(self, item, spider): 18 | self.insert_item(item) 19 | return item 20 | 21 | def insert_item(self, item): 22 | sql = "INSERT INTO fb_item (item_name, item_price, " \ 23 | "item_category, item_location, item_search_term, item_img, item_url) " \ 24 | "VALUES (%s, %s, %s, %s, %s, %s, %s)" 25 | self.cursor.execute(sql, ( 26 | item['name'], 27 | item['price'], 28 | item['category'], 29 | item['location'], 30 | item['search_term'], 31 | item['img_url'], 32 | item['item_url'], 33 | )) 34 | self.conn.commit() 35 | -------------------------------------------------------------------------------- /fbmarket/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for fbmarket project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # https://docs.scrapy.org/en/latest/topics/settings.html 9 | # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html 10 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'fbmarket' 13 | 14 | SPIDER_MODULES = ['fbmarket.spiders'] 15 | NEWSPIDER_MODULE = 'fbmarket.spiders' 16 | 17 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 18 | USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36' 19 | 20 | # Obey robots.txt rules 21 | ROBOTSTXT_OBEY = False 22 | 23 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 24 | CONCURRENT_REQUESTS = 8 25 | 26 | # Configure a delay for requests for the same website (default: 0) 27 | # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay 28 | # See also autothrottle settings and docs 29 | DOWNLOAD_DELAY = 3 30 | # The download delay setting will honor only one of: 31 | CONCURRENT_REQUESTS_PER_DOMAIN = 8 32 | CONCURRENT_REQUESTS_PER_IP = 8 33 | 34 | # Disable cookies (enabled by default) 35 | # COOKIES_ENABLED = False 36 | 37 | # Disable Telnet Console (enabled by default) 38 | # TELNETCONSOLE_ENABLED = False 39 | 40 | # Override the default request headers: 41 | # DEFAULT_REQUEST_HEADERS = { 42 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 43 | # 'Accept-Language': 'en', 44 | # } 45 | 46 | # Enable or disable spider middlewares 47 | # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html 48 | # SPIDER_MIDDLEWARES = { 49 | # 'fbmarket.middlewares.FbmarketSpiderMiddleware': 543, 50 | # } 51 | 52 | # Enable or disable downloader middlewares 53 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html 54 | DOWNLOADER_MIDDLEWARES = { 55 | 'fbmarket.middlewares.FbmarketDownloaderMiddleware': 543, 56 | 'rotating_proxies.middlewares.RotatingProxyMiddleware': 610, 57 | 'rotating_proxies.middlewares.BanDetectionMiddleware': 620, 58 | } 59 | 60 | # Enable or disable extensions 61 | # See https://docs.scrapy.org/en/latest/topics/extensions.html 62 | # EXTENSIONS = { 63 | # 'scrapy.extensions.telnet.TelnetConsole': None, 64 | # } 65 | 66 | # Configure item pipelines 67 | # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html 68 | ITEM_PIPELINES = { 69 | 'fbmarket.pipelines.FbmarketPipeline': 300, 70 | } 71 | 72 | # Enable and configure the AutoThrottle extension (disabled by default) 73 | # See https://docs.scrapy.org/en/latest/topics/autothrottle.html 74 | # AUTOTHROTTLE_ENABLED = True 75 | # The initial download delay 76 | # AUTOTHROTTLE_START_DELAY = 5 77 | # The maximum download delay to be set in case of high latencies 78 | # AUTOTHROTTLE_MAX_DELAY = 60 79 | # The average number of requests Scrapy should be sending in parallel to 80 | # each remote server 81 | # AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 82 | # Enable showing throttling stats for every response received: 83 | # AUTOTHROTTLE_DEBUG = False 84 | 85 | # Enable and configure HTTP caching (disabled by default) 86 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 87 | # HTTPCACHE_ENABLED = True 88 | # HTTPCACHE_EXPIRATION_SECS = 0 89 | # HTTPCACHE_DIR = 'httpcache' 90 | # HTTPCACHE_IGNORE_HTTP_CODES = [] 91 | # HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 92 | ROTATING_PROXY_LIST = [ 93 | # 'http://user797:scrp1ng@107.175.247.7:80', 94 | # 'http://user797:scrp1ng@107.175.90.224:80', 95 | 'http://user797:scrp1ng@23.95.239.51:80', 96 | # 'http://user797:scrp1ng@23.95.224.231:80', 97 | # 'http://user797:scrp1ng@23.95.219.228:80', 98 | 99 | # 'http://user797:scrp1ng@45.13.230.67:80', 100 | # 'http://user797:scrp1ng@45.13.230.17:80', 101 | # 'http://user797:scrp1ng@176.119.24.133:80', 102 | # 'http://user797:scrp1ng@176.119.24.50:80', 103 | # 'http://user797:scrp1ng@176.119.24.191:80', 104 | # 'http://username:pass@IP:port' 105 | ] 106 | -------------------------------------------------------------------------------- /fbmarket/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /fbmarket/spiders/fmarket.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | from ..items import FbmarketItem 4 | from urllib.parse import urlencode 5 | from urllib.parse import quote 6 | from helper import functions 7 | 8 | 9 | class FmarketSpider(scrapy.Spider): 10 | name = 'fmarket' 11 | allowed_domains = ['facebook.com'] 12 | start_urls = ['https://m.facebook.com/marketplace'] 13 | 14 | location = 'delhi' 15 | search_query = 'new girls jacket' 16 | radius_km = 1 17 | category = '' 18 | 19 | form_data = '' 20 | 21 | # query_parameter = {'query': search_query, 'radius_in_km': radius_km} 22 | 23 | def start_requests(self): 24 | if self.f_data: 25 | self.form_data = self.f_data 26 | self.form_data = self.form_data.split(',') 27 | self.location = self.form_data[0] 28 | self.category = self.form_data[1] 29 | self.search_query = self.form_data[2] 30 | self.radius_km = self.form_data[3] 31 | 32 | query_parameter = {'query': self.form_data[2], 'radius_in_km': self.form_data[3]} 33 | # ab_url = self.absolute_url(self.location, self.category, self.queryParameter) 34 | ab_url = self.absolute_url(self.form_data[0], self.form_data[1], query_parameter) 35 | print(self.form_data) 36 | 37 | yield scrapy.Request(url=ab_url, callback=self.parse) 38 | 39 | def parse(self, response): 40 | # print(response.body) 41 | # filename = response.url.split("/")[-1] + '.html' 42 | items = FbmarketItem() 43 | filename = 'fbwebpage1' + '.html' 44 | with open(filename, 'wb') as f: 45 | f.write(response.body) 46 | 47 | for product in response.css('div._a5o'): 48 | product_url = product.css("a::attr(href)").get() 49 | product_name = product.css('div:last-child::text').extract() 50 | product_price = product.css('a div:last-child span::text').extract() 51 | 52 | if product_price: 53 | product_price = self.rm_whilespace(product_price) 54 | 55 | if product_name: 56 | product_name = self.rm_whilespace(product_name) 57 | 58 | if product_url: 59 | product_url = response.urljoin(product_url) 60 | product_url = product_url.split('?')[0] 61 | product_url = product_url.replace('m.', '') 62 | 63 | if self.category: 64 | search_category = self.category.strip() 65 | else: 66 | search_category = 'NA' 67 | 68 | if self.location: 69 | search_loc = self.location.strip() 70 | else: 71 | search_loc = 'NA' 72 | 73 | if self.search_query: 74 | search_term = self.search_query.strip() 75 | else: 76 | search_term = 'NA' 77 | 78 | if product_price is None or product_url is None or product_name is None: 79 | pass 80 | 81 | if '/item/' not in product_url: 82 | pass 83 | 84 | item_tem_img = 'https://5.imimg.com/data5/PJ/DI/MY-3877854/round-neck-plain-tshirt-with-multi-color-design-500x500.png' 85 | 86 | items['name'] = product_name 87 | items['price'] = product_price 88 | items['category'] = search_category 89 | items['location'] = search_loc 90 | items['search_term'] = search_term 91 | items['img_url'] = item_tem_img 92 | items['item_url'] = product_url 93 | # items['current_date'] = product_name 94 | # items['slot_number'] = product_name 95 | 96 | yield items 97 | 98 | @staticmethod 99 | def rm_whilespace(query_term): 100 | if query_term: 101 | None_ = [nn_.replace('\n', '') for nn_ in query_term] 102 | None_ = [nn_.strip() for nn_ in None_] 103 | None_ = filter(None, None_) 104 | None_ = ' '.join(None_) 105 | ret_value = None_ 106 | return ret_value 107 | # query_term = query_term.encode('ascii', 'xmlcharrefreplace').decode('utf8') 108 | return query_term 109 | 110 | def absolute_url(self, location, category, query_parameter): 111 | url = self.start_urls[0] 112 | fb_query = urlencode(query_parameter) 113 | if location: 114 | url = url + '/' + quote(location) 115 | if category: 116 | url = url + '/' + quote(category) 117 | url = url + '/?' + fb_query 118 | return url 119 | -------------------------------------------------------------------------------- /helper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdubey07/FacebookMarketplaceScraper/ab6a7f3a2f2d29f1a8ba449fc5834450f66df1fe/helper/__init__.py -------------------------------------------------------------------------------- /helper/functions.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request 2 | import csv 3 | import json 4 | import os 5 | import subprocess 6 | import config 7 | 8 | app = Flask(__name__) 9 | 10 | 11 | def get_table_data_by_csv(): 12 | file_path = 'static/output/data2.csv' 13 | table_data = [] 14 | with open(file_path, 'rt', encoding="utf8")as f: 15 | data = csv.reader(f) 16 | headers = next(data, None) 17 | garbage = next(data, None) 18 | for row in data: 19 | table_data.append(row) 20 | return table_data 21 | 22 | 23 | def test(): 24 | filepath = os.path.join(config.project_dir(), 'static\\output\\' 'data2.csv') 25 | 26 | 27 | def run_spider(form_data): 28 | spider_name = "fmarket" 29 | cat = form_data['cat'] 30 | location = form_data['location'] 31 | print(location + cat) 32 | # filepath = os.path.join(config.project_dir(), 'static\\output\\' 'data2.json') 33 | subprocess.call( 34 | ['scrapy', 'crawl', spider_name, '-a', 'f_data='+form_data['location'] + ',' + form_data['cat'] + ',' + 35 | form_data['search_term']+','+form_data['radius']]) 36 | 37 | # subprocess.check_output(['scrapy', 'crawl', spider_name, "-o", 'xyz1.csv']) 38 | 39 | 40 | def get_table_data_by_json(): 41 | # file_path = 'static/output/abc.json' 42 | filepath = os.path.join(config.project_dir(), 'static\\output\\' 'data2.json') 43 | table_data = [] 44 | with open(filepath, 'rt', encoding="utf8") as f: 45 | data = json.load(f) 46 | headers = next(data, None) 47 | # garbage = next(data, None) 48 | for row in data: 49 | table_data.append(row) 50 | return table_data 51 | 52 | 53 | def get_form_data(): 54 | cat = request.form['category'] 55 | location = request.form['loc'] 56 | search_term = request.form['skey'] 57 | radius_in_km = request.form['radius'] 58 | form_data = { 59 | 'cat': cat, 60 | 'location': location, 61 | 'search_term': search_term, 62 | 'radius': radius_in_km, 63 | 'result': 3 64 | } 65 | return form_data 66 | 67 | # test() 68 | -------------------------------------------------------------------------------- /scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html 5 | 6 | [settings] 7 | default = fbmarket.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = fbmarket 12 | -------------------------------------------------------------------------------- /static/css/main.css: -------------------------------------------------------------------------------- 1 | .small-img{ 2 | width: 100px; 3 | height: auto; 4 | } -------------------------------------------------------------------------------- /templates/404.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | {% block title %} 3 | 404 Page Not Found 4 | {% endblock %} 5 | {% block content %} 6 |
Showing result for: {{ form_data }}
#} 10 |S.No | 15 |Product Name | 16 |Price | 17 |Category | 18 |Location | 19 |Search Term | 20 |Image | 21 |Url | 22 |Date | 23 |View Detail | 24 | 25 |
---|---|---|---|---|---|---|---|---|---|
{{ row[0] }} | 32 |{{ row[1] }} | 33 |{{ row[2] }} | 34 |{{ row[3] }} | 35 |{{ row[4] }} | 36 |{{ row[5] }} | 37 |Item Url | 39 |07/03/2020 | 40 |View more | 41 |