├── .gitignore
├── LICENSE
├── README.md
├── joble
    ├── __init__.py
    ├── items.py
    ├── middlewares.py
    ├── pipelines.py
    ├── scraper
    │   └── google.py
    ├── settings.py
    └── spiders
    │   ├── __init__.py
    │   ├── glassdoor.py
    │   ├── monsterindia.py
    │   └── naukri.py
├── requirements.txt
└── scrapy.cfg


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .vscode/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Ravishankar Chavare
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Joble
 2 | This Platform  Search Thousands Of Job Boards In Different Technologies From Over The World .
 3 | 
 4 | 
 5 | ## Installation
 6 | 
 7 | step 1 : Create clone of Joble.
 8 | ```
 9 | git clone https://github.com/chavarera/Joble.git
10 | ```
11 | step 2 : Change working directory to Joble
12 | ```
13 | cd Joble
14 | ```
15 | step 3 : Create Virtual environment
16 | ```
17 | virtualenv -p python3 vnev
18 | ```
19 | 
20 | setp 4 : Activate Virtual environment
21 | ```
22 | source vnev/bin/activate
23 | ```
24 | 
25 | step 5 : Install required packages
26 | ```
27 | pip install -r requirements.txt
28 | ```
29 | 
30 | step 6 : Now Exceute Spiders
31 | 
32 | 
33 | 
34 | ## List of Spiders Available
35 | 1. Naukri
36 | 2. MonsterIndia
37 | 
38 | 
39 | ### 1. Naukri
40 | Get 20 Jobs per catgory
41 | ```
42 | scrapy crawl Naukri
43 | ```
44 | 
45 | Available Option
46 | 1. city
47 | 2. count
48 | 3. keyword
49 | 
50 | For Example
51 | ```
52 | scrapy crawl Naukri -a  keyword=python -a count=20 -a city=pune
53 | 
54 | ```
55 | 
56 | Export Output in csv,json
57 | ```
58 | scrapy crawl Naukri -a  keyword=python -o python.csv
59 | 
60 | ```
61 | 
62 | ### 2. MonsterIndia
63 | Available Option
64 | 1. city
65 | 2. count
66 | 3. keyword
67 | 
68 | For Example
69 | ```
70 | scrapy crawl MonsterIndia -a  keyword=python -a
71 | ```


--------------------------------------------------------------------------------
/joble/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Python-World/Joble/b44ea73ff798b52df036810526ecfe2775b5d8e1/joble/__init__.py


--------------------------------------------------------------------------------
/joble/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # https://docs.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class JobleItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     pass
15 | 


--------------------------------------------------------------------------------
/joble/middlewares.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Define here the models for your spider middleware
  4 | #
  5 | # See documentation in:
  6 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
  7 | 
  8 | from scrapy import signals
  9 | 
 10 | 
 11 | class JobleSpiderMiddleware:
 12 |     # Not all methods need to be defined. If a method is not defined,
 13 |     # scrapy acts as if the spider middleware does not modify the
 14 |     # passed objects.
 15 | 
 16 |     @classmethod
 17 |     def from_crawler(cls, crawler):
 18 |         # This method is used by Scrapy to create your spiders.
 19 |         s = cls()
 20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
 21 |         return s
 22 | 
 23 |     def process_spider_input(self, response, spider):
 24 |         # Called for each response that goes through the spider
 25 |         # middleware and into the spider.
 26 | 
 27 |         # Should return None or raise an exception.
 28 |         return None
 29 | 
 30 |     def process_spider_output(self, response, result, spider):
 31 |         # Called with the results returned from the Spider, after
 32 |         # it has processed the response.
 33 | 
 34 |         # Must return an iterable of Request, dict or Item objects.
 35 |         for i in result:
 36 |             yield i
 37 | 
 38 |     def process_spider_exception(self, response, exception, spider):
 39 |         # Called when a spider or process_spider_input() method
 40 |         # (from other spider middleware) raises an exception.
 41 | 
 42 |         # Should return either None or an iterable of Request, dict
 43 |         # or Item objects.
 44 |         pass
 45 | 
 46 |     def process_start_requests(self, start_requests, spider):
 47 |         # Called with the start requests of the spider, and works
 48 |         # similarly to the process_spider_output() method, except
 49 |         # that it doesn’t have a response associated.
 50 | 
 51 |         # Must return only requests (not items).
 52 |         for r in start_requests:
 53 |             yield r
 54 | 
 55 |     def spider_opened(self, spider):
 56 |         spider.logger.info("Spider opened: %s" % spider.name)
 57 | 
 58 | 
 59 | class JobleDownloaderMiddleware:
 60 |     # Not all methods need to be defined. If a method is not defined,
 61 |     # scrapy acts as if the downloader middleware does not modify the
 62 |     # passed objects.
 63 | 
 64 |     @classmethod
 65 |     def from_crawler(cls, crawler):
 66 |         # This method is used by Scrapy to create your spiders.
 67 |         s = cls()
 68 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
 69 |         return s
 70 | 
 71 |     def process_request(self, request, spider):
 72 |         # Called for each request that goes through the downloader
 73 |         # middleware.
 74 | 
 75 |         # Must either:
 76 |         # - return None: continue processing this request
 77 |         # - or return a Response object
 78 |         # - or return a Request object
 79 |         # - or raise IgnoreRequest: process_exception() methods of
 80 |         #   installed downloader middleware will be called
 81 |         return None
 82 | 
 83 |     def process_response(self, request, response, spider):
 84 |         # Called with the response returned from the downloader.
 85 | 
 86 |         # Must either;
 87 |         # - return a Response object
 88 |         # - return a Request object
 89 |         # - or raise IgnoreRequest
 90 |         return response
 91 | 
 92 |     def process_exception(self, request, exception, spider):
 93 |         # Called when a download handler or a process_request()
 94 |         # (from other downloader middleware) raises an exception.
 95 | 
 96 |         # Must either:
 97 |         # - return None: continue processing this exception
 98 |         # - return a Response object: stops process_exception() chain
 99 |         # - return a Request object: stops process_exception() chain
100 |         pass
101 | 
102 |     def spider_opened(self, spider):
103 |         spider.logger.info("Spider opened: %s" % spider.name)
104 | 


--------------------------------------------------------------------------------
/joble/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 7 | 
 8 | 
 9 | class JoblePipeline:
10 |     def process_item(self, item, spider):
11 |         return item
12 | 


--------------------------------------------------------------------------------
/joble/scraper/google.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import requests
 4 | from bs4 import BeautifulSoup
 5 | 
 6 | 
 7 | # function to remove random characters from the end of the url if exists
 8 | def fix_url(url_list):
 9 |     if len(url_list[-1]) > 8:
10 |         url_list[-1] = url_list[-1].split("&")[0]
11 |     return url_list
12 | 
13 | 
14 | # return the link for carrers page from google
15 | def get_carrer_page(name):
16 |     query = name.replace(" ", "+")
17 |     URL = f"https://google.com/search?q={query}+carrers"
18 | 
19 |     resp = requests.get(URL)
20 |     if "." in name:
21 |         name = name.split(".")[0]
22 | 
23 |     if resp.status_code == 200:
24 |         soup = BeautifulSoup(resp.content, "html.parser")
25 | 
26 |         for a in soup.find_all("a", href=True):
27 |             if "url" in a["href"]:
28 |                 # removes '/url?q=' from the start of the url
29 |                 url_list = a["href"][7:].split("/")
30 |                 if name in url_list[2]:
31 |                     return "/".join(fix_url(url_list))
32 |                     break
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     parser = argparse.ArgumentParser()
37 |     parser.add_argument("name", help="name of the company", type=str)
38 |     args = parser.parse_args()
39 | 
40 |     url = get_carrer_page(args.name)
41 |     print(url)
42 | 


--------------------------------------------------------------------------------
/joble/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Scrapy settings for joble project
 4 | #
 5 | # For simplicity, this file contains only settings considered important or
 6 | # commonly used. You can find more settings consulting the documentation:
 7 | #
 8 | #     https://docs.scrapy.org/en/latest/topics/settings.html
 9 | #     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
10 | #     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
11 | 
12 | BOT_NAME = "joble"
13 | 
14 | SPIDER_MODULES = ["joble.spiders"]
15 | NEWSPIDER_MODULE = "joble.spiders"
16 | 
17 | 
18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
19 | # USER_AGENT = 'joble (+http://www.yourdomain.com)'
20 | 
21 | # Obey robots.txt rules
22 | ROBOTSTXT_OBEY = True
23 | 
24 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
25 | # CONCURRENT_REQUESTS = 32
26 | 
27 | # Configure a delay for requests for the same website (default: 0)
28 | # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
29 | # See also autothrottle settings and docs
30 | # DOWNLOAD_DELAY = 3
31 | # The download delay setting will honor only one of:
32 | # CONCURRENT_REQUESTS_PER_DOMAIN = 16
33 | # CONCURRENT_REQUESTS_PER_IP = 16
34 | 
35 | # Disable cookies (enabled by default)
36 | # COOKIES_ENABLED = False
37 | 
38 | # Disable Telnet Console (enabled by default)
39 | # TELNETCONSOLE_ENABLED = False
40 | 
41 | # Override the default request headers:
42 | # DEFAULT_REQUEST_HEADERS = {
43 | #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
44 | #   'Accept-Language': 'en',
45 | # }
46 | 
47 | # Enable or disable spider middlewares
48 | # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
49 | # SPIDER_MIDDLEWARES = {
50 | #    'joble.middlewares.JobleSpiderMiddleware': 543,
51 | # }
52 | 
53 | # Enable or disable downloader middlewares
54 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
55 | # DOWNLOADER_MIDDLEWARES = {
56 | #    'joble.middlewares.JobleDownloaderMiddleware': 543,
57 | # }
58 | 
59 | # Enable or disable extensions
60 | # See https://docs.scrapy.org/en/latest/topics/extensions.html
61 | # EXTENSIONS = {
62 | #    'scrapy.extensions.telnet.TelnetConsole': None,
63 | # }
64 | 
65 | # Configure item pipelines
66 | # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
67 | # ITEM_PIPELINES = {
68 | #    'joble.pipelines.JoblePipeline': 300,
69 | # }
70 | 
71 | # Enable and configure the AutoThrottle extension (disabled by default)
72 | # See https://docs.scrapy.org/en/latest/topics/autothrottle.html
73 | # AUTOTHROTTLE_ENABLED = True
74 | # The initial download delay
75 | # AUTOTHROTTLE_START_DELAY = 5
76 | # The maximum download delay to be set in case of high latencies
77 | # AUTOTHROTTLE_MAX_DELAY = 60
78 | # The average number of requests Scrapy should be sending in parallel to
79 | # each remote server
80 | # AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
81 | # Enable showing throttling stats for every response received:
82 | # AUTOTHROTTLE_DEBUG = False
83 | 
84 | # Enable and configure HTTP caching (disabled by default)
85 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
86 | # HTTPCACHE_ENABLED = True
87 | # HTTPCACHE_EXPIRATION_SECS = 0
88 | # HTTPCACHE_DIR = 'httpcache'
89 | # HTTPCACHE_IGNORE_HTTP_CODES = []
90 | # HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
91 | 


--------------------------------------------------------------------------------
/joble/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/joble/spiders/glassdoor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import urllib
 3 | 
 4 | import scrapy
 5 | 
 6 | 
 7 | # scrapy crawl Glassdoor
 8 | class GlassdoorSpider(scrapy.Spider):
 9 |     name = "Glassdoor"
10 |     allowed_domains = ["glassdoor.com"]
11 |     url = "https://www.glassdoor.com"
12 | 
13 |     def __init__(self, keyword=None, count=20):
14 |         self.keyword = keyword
15 |         self.count = int(count)
16 | 
17 |     def start_requests(self):
18 |         url = "{}/Job/jobs.htm?sc.keyword={}".format(self.url, self.keyword)
19 |         yield scrapy.Request(url=url, callback=self.parse)
20 | 
21 |     def parse(self, response):
22 |         elements = response.css("ul.jlGrid li.react-job-listing")
23 | 
24 |         for element in elements[: self.count]:
25 |             job = {
26 |                 "title": element.attrib["data-normalize-job-title"],
27 |                 "location": element.attrib["data-job-loc"],
28 |                 "employer": self.url
29 |                 + element.css("div div.jobHeader a span::text").get(),
30 |                 "job-link": element.css(
31 |                     "div div.jobHeader a::attr(href)"
32 |                 ).get(),
33 |             }
34 |             yield job
35 | 


--------------------------------------------------------------------------------
/joble/spiders/monsterindia.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from urllib.parse import urlencode, urljoin
 3 | 
 4 | import scrapy
 5 | from scrapy import Request
 6 | 
 7 | 
 8 | # Execute : scrapy crawl MonsterIndia -a keyword=python
 9 | class MonsterindiaSpider(scrapy.Spider):
10 |     name = "MonsterIndia"
11 |     allowed_domains = ["monsterindia.com"]
12 |     start_urls = ["http://monsterindia.com/"]
13 | 
14 |     def __init__(self, keyword, count=20, location=None):
15 |         self.URL = "http://monsterindia.com/"
16 |         self.count = count
17 |         self.keyword = keyword
18 |         self.location = location
19 | 
20 |     def get_url(self):
21 |         base_url = "https://www.monsterindia.com/middleware/jobsearch?"
22 |         params = {
23 |             "sort": "2",
24 |             "limit": self.count,
25 |             "query": self.keyword,
26 |             "locations": self.location,
27 |         }
28 |         if self.location is None:
29 |             params.pop("locations")
30 | 
31 |         return {"url": base_url, "body": urlencode(params)}
32 | 
33 |     def parse(self, response):
34 |         url = self.get_url()
35 |         yield Request(
36 |             url["url"] + url["body"], meta={"url": url}, callback=self.JobData
37 |         )
38 | 
39 |     def JobData(self, response):
40 |         data = response.text
41 |         data = data.replace("false", "False")
42 |         data = data.replace("true", "True")
43 |         jobdata = eval(data)
44 |         if jobdata.get("jobSearchResponse"):
45 |             for record in jobdata["jobSearchResponse"]["data"]:
46 |                 headers = [
47 |                     "jobId",
48 |                     "title",
49 |                     "locations",
50 |                     "updatedAt",
51 |                     "summary",
52 |                     "skills",
53 |                     "companyName",
54 |                     "seoJdUrl",
55 |                 ]
56 |                 job_details = {}
57 |                 for head in headers:
58 |                     try:
59 |                         job_details[head] = record.get(head)
60 |                     except Exception as ex:
61 |                         print("error in head:", head, ex)
62 |                 job_details["seoJdUrl"] = urljoin(
63 |                     self.URL, job_details["seoJdUrl"]
64 |                 )
65 |                 if job_details["jobId"]:
66 |                     yield job_details
67 | 


--------------------------------------------------------------------------------
/joble/spiders/naukri.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import urllib
  3 | 
  4 | import scrapy
  5 | 
  6 | 
  7 | # scrapy crawl Naukri
  8 | class NaukriSpider(scrapy.Spider):
  9 |     name = "Naukri"
 10 |     allowed_domains = ["naukri.com"]
 11 |     start_urls = ["https://www.naukri.com"]
 12 | 
 13 |     def __init__(self, keyword=None, count=20, city=None):
 14 |         self.count = count
 15 |         self.keyword = keyword
 16 |         self.city = city
 17 | 
 18 |     def get_url(self):
 19 |         base_url = "https://www.naukri.com/jobapi/v3/search?"
 20 |         params = {
 21 |             "noOfResults": self.count,
 22 |             "urlType": "search_by_key_loc",
 23 |             "searchType": "adv",
 24 |             "keyword": self.keyword,
 25 |             "location": self.city,
 26 |             "sort": "r",
 27 |             "k": self.keyword,
 28 |             "l": self.city,
 29 |             "seoKey": "{}-jobs-in-{}".format(self.keyword, self.city)
 30 |             if self.city
 31 |             else "{}-jobs".format(self.keyword),
 32 |             "src": "jobsearchDesk",
 33 |             "latLong": "",
 34 |         }
 35 |         default = ["keyword", "sort", "l", "k", "location"]
 36 |         if self.city is None:
 37 |             for key in default:
 38 |                 params.pop(key)
 39 |         return {"url": base_url, "body": params}
 40 | 
 41 |     def parse(self, response):
 42 |         if self.keyword:
 43 |             record = self.get_url()
 44 |             yield scrapy.Request(
 45 |                 url=record["url"] + urllib.parse.urlencode(record["body"]),
 46 |                 headers={
 47 |                     "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0",
 48 |                     "appid": "109",
 49 |                     "systemid": "109",
 50 |                 },
 51 |                 meta={"keyword": self.keyword},
 52 |                 callback=self.jobData,
 53 |             )
 54 |         else:
 55 |             yield scrapy.Request(
 56 |                 "https://www.naukri.com/jobs-by-category",
 57 |                 callback=self.get_by_category,
 58 |             )
 59 | 
 60 |     def get_by_category(self, response):
 61 |         for j in response.xpath('//div[@class="lmrWrap wrap"]/div/div/div/a'):
 62 |             title = j.xpath("text()").get().strip()
 63 |             url = j.xpath("@href").get().strip()
 64 |             yield scrapy.Request(
 65 |                 url,
 66 |                 callback=self.job_list,
 67 |                 meta={"keyword": title, "count": 0, "plink": url},
 68 |             )
 69 | 
 70 |     def job_list(self, response):
 71 |         plink = response.meta["plink"].split("/")[-1]
 72 |         keyword = plink.split("-jobs")[0]
 73 |         seokeys = keyword + "-jobs"
 74 |         ids = plink.split("=")[-1]
 75 |         joburl = "https://www.naukri.com/jobapi/v3/search?noOfResults=20&urlType=search_by_keyword&searchType=adv&keyword={}&xt=catsrch&functionAreaId={}&seoKey={}&src=jobsearchDesk&latLong=".format(
 76 |             keyword, ids, seokeys
 77 |         )
 78 |         yield scrapy.Request(
 79 |             joburl,
 80 |             headers={
 81 |                 "Referer": response.meta["plink"],
 82 |                 "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0",
 83 |                 "appid": "109",
 84 |                 "systemid": "109",
 85 |             },
 86 |             meta={
 87 |                 "url": response.meta["plink"],
 88 |                 "keyword": keyword,
 89 |                 "ids": ids,
 90 |                 "seokeys": seokeys,
 91 |             },
 92 |             callback=self.jobData,
 93 |         )
 94 | 
 95 |     def jobData(self, response):
 96 |         data = response.text
 97 |         data = data.replace("false", "False")
 98 |         data = data.replace("true", "True")
 99 |         jobdata = eval(data)
100 |         if jobdata.get("jobDetails"):
101 |             for job in jobdata["jobDetails"]:
102 |                 place = job["placeholders"]
103 |                 detail = {}
104 |                 for p in place:
105 |                     key, value = p.values()
106 |                     detail[key] = value
107 |                 details = {
108 |                     "category": response.meta["keyword"],
109 |                     "title": job["title"],
110 |                     "jobId": job["jobId"],
111 |                     "companyName": job["companyName"],
112 |                     "skills": job.get("tagsAndSkills"),
113 |                     "joburl": job["jdURL"],
114 |                     "postedon": job["footerPlaceholderLabel"],
115 |                     "descreption": job.get("jobDescription"),
116 |                 }
117 |                 final_result = {**detail, **details}
118 |                 yield final_result
119 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | attrs==19.3.0
 2 | Automat==20.2.0
 3 | beautifulsoup4==4.9.2
 4 | cffi==1.14.0
 5 | constantly==15.1.0
 6 | cryptography==41.0.4
 7 | cssselect==1.1.0
 8 | hyperlink==19.0.0
 9 | idna==2.10
10 | incremental==17.5.0
11 | itemadapter==0.1.0
12 | lxml==4.9.1
13 | parsel==1.6.0
14 | pkg-resources==0.0.0
15 | Protego==0.1.16
16 | pyasn1==0.4.8
17 | pyasn1-modules==0.2.8
18 | pycparser==2.20
19 | PyDispatcher==2.0.5
20 | PyHamcrest==2.0.2
21 | pyOpenSSL==19.1.0
22 | queuelib==1.5.0
23 | requests==2.31.0
24 | Scrapy==2.6.2
25 | service-identity==18.1.0
26 | six==1.15.0
27 | Twisted==22.10.0
28 | w3lib==1.22.0
29 | zope.interface==5.1.0
30 | 


--------------------------------------------------------------------------------
/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = joble.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = joble
12 | 


--------------------------------------------------------------------------------