├── .gitignore
├── LICENSE
├── README.md
├── deployment.md
├── local_output
└── readme.md
├── requirements.txt
├── scrapinghub.yml
├── scrapy.cfg
├── setup.py
└── tutorial
├── __init__.py
├── items.py
├── middlewares.py
├── models.py
├── pipelines.py
├── settings.py
└── spiders
├── __init__.py
├── quotes_spider.py
├── quotes_spider_v1.py
└── quotes_spider_v2.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | .DS_Store
107 |
108 | /local_output/*.html
109 | /local_output/*.json
110 |
111 | # sqlite
112 | *.db
113 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Harry Wang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Scrapy Tutorial
2 |
3 | This repo contains the code for my tutorial: A Minimalist End-to-End Scrapy Tutorial (https://medium.com/p/11e350bcdec0).
4 |
5 | The website to crawl is [http://quotes.toscrape.com](http://quotes.toscrape.com).
6 |
7 | ## Setup
8 | Tested with Python 3.6 via virtual environment:
9 | ```shell
10 | $ python3.6 -m venv venv
11 | $ source venv/bin/activate
12 | $ pip install -r requirements.txt
13 | ```
14 |
15 | ## Run
16 |
17 | Run `scrapy crawl quotes` at the project top level.
18 |
19 | Note that spider name is defined in the spider class, e.g., `quotes_spider.py`:
20 | ```python
21 | class QuotesSpider(scrapy.Spider):
22 | name = "quotes"
23 | ```
24 |
25 | ## Deployment
26 |
27 | See deployment.md
28 |
29 | ## Versions
30 |
31 | I keep different versions for learning purposes using git tags:
32 |
33 | ### Version 1 (tag v1.0)
34 |
35 | Key Concepts: basic spider setup, project folder structure, saving files as json and html files, using Scrap shell,Following links, etc.
36 |
37 | Local outputs (json and html pages) are stored in "local-output" folder, which is ignored in .gitignore.
38 |
39 | For example:
40 |
41 | scrapy crawl quotes saves a set of html pages to /local_output
42 | scrapy crawl quotes -o ./local_output/quotes.json saves the output to a json file
43 |
44 |
45 |
46 | To create the initial project folder, run `scrapy startproject tutorial` (only need to do this once) I removed the top level `tutorial` folder and add additional files and folders as shown below:
47 |
48 | ```
49 | tutorial/
50 | scrapy.cfg # deploy configuration file
51 |
52 |
53 | tutorial/ # project's Python module, you'll import your code from here
54 | __init__.py
55 |
56 | items.py # project items definition file
57 |
58 | middlewares.py # project middlewares file
59 |
60 | pipelines.py # project pipelines file
61 |
62 | settings.py # project settings file
63 |
64 | spiders/ # a directory where you'll later put your spiders
65 | __init__.py
66 | ```
67 | `self.log('Saved file %s' % filename)` outputs to the log console. `yield` also outputs the DEBUG info in the console, e.g.:
68 |
69 |
70 |
71 |
72 | ### Version 2 (tag v2.0)
73 |
74 | The major change is to use Items.
75 |
76 | Why use Items?
77 |
78 | - clearly specify the structured data to be collected - a central place to look
79 | - leverage pre and post processors for Items via ItemLoaders (you can also define additional custom processors)
80 | - Use item pipelines to save data to databases (Version 3)
81 | - Better code organization - you know where to look for certain processing code
82 |
83 | ### Version 3 (tag v3.0)
84 |
85 | - Add database support via SQLAlchemy and use Item pipeline to save items into database (sqlite and mysql)
86 | - Add instructions on deploying ScrapingHub.com
87 |
88 | Three tables: Authors, Quotes, Tags.
89 |
90 | - One-to-Many between Authors and Quotes
91 | - Many-to-Many between Tags and Quotes
92 | - Many-to-Many between Tags and Authors
93 |
94 | Database schema is defined in `/tutorial/models.py` file and connection string is specified in `/tutorial/settings.py`.
95 | Add a pipleline file and enable the pipeline in `/tutorial/settings.py` (The number 0-1000 specifies the execution order of the pipelines).
96 |
97 | ```
98 | ITEM_PIPELINES = {
99 | 'tutorial.pipelines.SaveQuotesPipeline': 300,
100 | }
101 | ```
102 |
103 | Use the following commands to check local SQLite database. https://sqlitebrowser.org can be used as a GUI tool.
104 |
105 | ```
106 | $ man sqlite3
107 | $ sqlite3 scrapy_quotes.db
108 | sqlite> .tables
109 | sqlite> .schema quote
110 | sqlite> .quit
111 | ```
112 | ### Test SQLAlchemy in Shell
113 |
114 | Once you setup models and pipelines, you can run `scrapy shell` to test the database part. Just paste the code block below and open sqlite database to check the results.
115 |
116 | ```python
117 | from sqlalchemy.orm import sessionmaker
118 | from tutorial.models import Quote, Author, Tag, db_connect, create_table
119 | engine = db_connect()
120 | create_table(engine)
121 | Session = sessionmaker(bind=engine)
122 | session = Session()
123 |
124 | quote1 = Quote()
125 | author1 = Author()
126 | author1.name = "Linus Torvalds"
127 | author1.bio = "Linus Torvalds is the creator the Linux kernel and Git."
128 | quote1.quote_content = "Talk is cheap. Show me the code."
129 | quote1.author = author1
130 | tag1 = Tag(name="linux")
131 | tag2 = Tag(name="git")
132 | tag3 = Tag(name="simple")
133 | quote1.tags.append(tag1)
134 | quote1.tags.append(tag2)
135 | quote1.tags.append(tag3)
136 |
137 | try:
138 | session.add(author1)
139 | session.add(quote1)
140 | session.commit()
141 | except:
142 | session.rollback()
143 | raise
144 |
145 | quote2 = Quote()
146 | author2 = Author()
147 | author2.name = "Steven Jobs"
148 | author2.bio = "Steven Jobs was the chairman, chief executive officer, and co-founder of Apple Inc."
149 | quote2.quote_content = "Stay Hungry Stay Foolish."
150 | quote2.author = author2
151 | tag4 = Tag(name="inspiring")
152 | tag5 = Tag(name="simple") # this already exists in the database
153 |
154 | # See difference between filter and filter_by at https://bit.ly/2TLvqeV
155 |
156 | # exist_tag = session.query(Tag).filter(Tag.name == tag5.name).first()
157 | exist_tag = session.query(Tag).filter_by(name = tag5.name).first()
158 | if exist_tag is not None: # the current tag exists
159 | tag5 = exist_tag
160 |
161 | quote2.tags.append(tag4)
162 | quote2.tags.append(tag5)
163 |
164 | try:
165 |
166 | session.add(author2)
167 | session.add(quote2)
168 | session.commit()
169 | except:
170 | session.rollback()
171 | raise
172 | finally:
173 | session.close()
174 | ```
175 | ### MySQL
176 |
177 | - Install MySQL locally: ``$brew install mysql`, which installs MySQL without password. To start MySQL: `mysql.server start` and then connect: `mysql -u root`.
178 |
179 | - Create a local database and related user: `CREATE SCHEMA scrapy_quotes DEFAULT CHARACTER SET utf8mb4 ;`
180 |
181 | - `mysqlclient` package is required.
182 |
183 | - Comment out MySQL connection string in `settings.py` to use MySQL to store items:
184 |
185 | ```python3
186 | # SQLite
187 | # CONNECTION_STRING = 'sqlite:///scrapy_quotes.db'
188 |
189 | # MySQL
190 | CONNECTION_STRING = "{drivername}://{user}:{passwd}@{host}:{port}/{db_name}?charset=utf8".format(
191 | drivername="mysql",
192 | user="harrywang",
193 | passwd="tutorial",
194 | host="localhost",
195 | port="3306",
196 | db_name="scrapy_quotes",
197 | )
198 | ```
199 |
200 | ### Version 4 (tag v4.0)
201 | Deployment to Scrapinghub and ScrapydWeb. See [deployment.md](deployment.md) for details.
202 |
203 |
204 | ## Other Notes
205 |
206 | ### Scrapy Shell
207 |
208 |
209 | Enter shell: `scrapy shell 'http://quotes.toscrape.com/page/1/'`
210 |
211 | Extract data examples (css and xpath):
212 |
213 | CSS:
214 | ```bash
215 | >>> response.css('title').getall()
216 | ['
Quotes to Scrape']
217 | >>> response.css('title::text').get()
218 | 'Quotes to Scrape'
219 | >>> response.css('title::text')[0].get()
220 | 'Quotes to Scrape'
221 | >>> response.css('title::text').re(r'Quotes.*')
222 | ['Quotes to Scrape']
223 | >>> response.css('title::text').re(r'Q\w+')
224 | ['Quotes']
225 | >>> response.css('title::text').re(r'(\w+) to (\w+)')
226 | ['Quotes', 'Scrape']
227 | ```
228 | XPath:
229 |
230 | ```bash
231 | >>> response.xpath('//title')
232 | []
233 | >>> response.xpath('//title/text()').get()
234 | 'Quotes to Scrape'
235 | ```
236 |
237 | View page in browser from shell: `>>> view(response)`
238 |
239 | ### Extracting quotes and authors
240 |
241 | HTML to parse:
242 |
243 | ```html
244 |
245 |
“The world as we have created it is a process of our
246 | thinking. It cannot be changed without changing our thinking.”
247 |
248 | by Albert Einstein
249 | (about)
250 |
251 |
258 |
259 | ```
260 |
261 | Parse and output to log:
262 |
263 | ```python
264 | import scrapy
265 |
266 |
267 | class QuotesSpider(scrapy.Spider):
268 | name = "quotes"
269 | start_urls = [
270 | 'http://quotes.toscrape.com/page/1/',
271 | 'http://quotes.toscrape.com/page/2/',
272 | ]
273 |
274 | def parse(self, response):
275 | for quote in response.css('div.quote'):
276 | yield {
277 | 'text': quote.css('span.text::text').get(),
278 | 'author': quote.css('small.author::text').get(),
279 | 'tags': quote.css('div.tags a.tag::text').getall(),
280 | }
281 | ```
282 | Save the output above to json: `scrapy crawl quotes -o ./local_output/quotes.json` - Note: **this command appends to existing json instead of overwriting it**.
283 |
284 | ### Following links
285 |
286 | Next link html on the page:
287 |
288 | ```html
289 |
294 | ```
295 | Extract it via shell:
296 |
297 | ```bash
298 | >>> response.css('li.next a::attr(href)').get()
299 | '/page/2/'
300 | >>> response.css('li.next a').attrib['href']
301 | '/page/2'
302 | ```
303 | Follow links:
304 |
305 | ```python
306 | for a in response.css('li.next a'):
307 | yield response.follow(a, callback=self.parse)
308 | ```
309 |
310 | ### Using spider arguments
311 | See https://docs.scrapy.org/en/latest/topics/spiders.html#spiderargs
312 |
--------------------------------------------------------------------------------
/deployment.md:
--------------------------------------------------------------------------------
1 | # Deployment Instructions
2 | Check the following sections for deployment instructions for Scrapinghub and Scrapydweb.
3 |
4 | ## Scrapinghub Deployment
5 |
6 | Create an free account and create a project:
7 | 
8 |
9 | We will use the `shub` command line to deploy. You can find your API key and deploy number once in your project Code & Deploys page:
10 | 
11 |
12 | Go back to the root of Scrapy-tutorial (the root of the Scrapy project) and use the following command to deploy your project to Scrapyinghub.
13 |
14 | ```bash
15 |
16 | (venv) dami:scrapy-tutorial harrywang$ shub login
17 | Enter your API key from https://app.scrapinghub.com/account/apikey
18 | API key: xxxxx
19 | Validating API key...
20 | API key is OK, you are logged in now.
21 | (venv) dami:scrapy-tutorial harrywang$ shub deploy 404937
22 | Messagepack is not available, please ensure that msgpack-python library is properly installed.
23 | Saving project 404937 as default target. You can deploy to it via 'shub deploy' from now on
24 | Saved to /Users/harrywang/xxx/scrapy-tutorial/scrapinghub.yml.
25 | Packing version b6ac860-master
26 | Created setup.py at /Users/harrywang/xxx/scrapy-tutorial
27 | Deploying to Scrapy Cloud project "404937"
28 | {"status": "ok", "project": 4xxx, "version": "b6ac860-master", "spiders": 3}
29 | Run your spiders at: https://app.scrapinghub.com/p/404937/
30 | ```
31 | Scrapinghub configuration file is created `scrapinghub.yml` and you need to edit it to specify:
32 |
33 | - scrapy 1.7 running Python 3
34 | - requirements files for other packages
35 |
36 | ```yml
37 | project: 404937
38 |
39 | stacks:
40 | default: scrapy:1.7-py3
41 |
42 | requirements:
43 | file: requirements.txt
44 | ```
45 |
46 | run `$ shub deploy` to deploy again.
47 |
48 | We have three spiders in the project:
49 | - quotes_spider.py is the main spider
50 | - quotes_spider_v1.py is the version 1 of the spider that writes to files, etc.
51 | - authors_spider.py is the spider to get author page from the official tutorial
52 |
53 | You can see your current deployment on scrapinghub.com:
54 | 
55 |
56 | Then, you can run your spider:
57 |
58 | 
59 |
60 | 
61 |
62 | Once the job is complete, you can check the results and download the items:
63 | 
64 |
65 | 
66 |
67 | You can schedule periodic jobs if you upgrade your free plan.
68 |
69 | ## Scrapydweb Deployment
70 |
71 | I found this repo https://github.com/my8100/scrapydweb and follow https://github.com/my8100/scrapyd-cluster-on-heroku to setup the server.
72 |
73 | We need a custom deployment because our scrapy project has specific package requirements, e.g., SQLAlchemy, MySQL, etc. if no special package is needed, you can follow the easy setup below.
74 |
75 | ### Custom Setup
76 |
77 | #### Setup repo and Heroku account
78 | fork a copy of https://github.com/my8100/scrapyd-cluster-on-heroku to your account, e.g., https://github.com/harrywang/scrapyd-cluster-on-heroku
79 |
80 | create a free account at heroku.com and install Heroku CLI: `brew tap heroku/brew && brew install heroku`
81 |
82 | clone the repo:
83 |
84 | ```bash
85 | git clone https://github.com/harrywang/scrapyd-cluster-on-heroku
86 | cd scrapyd-cluster-on-heroku/
87 | ```
88 | login to Heroku
89 |
90 | ```
91 | scrapyd-cluster-on-heroku harrywang$ heroku login
92 | heroku: Press any key to open up the browser to login or q to exit:
93 | Opening browser to https://cli-auth.heroku.com/auth/browser/3ba7221b-9c2a-4355-ab3b-d2csda
94 | Logging in... done
95 | Logged in as xxx@gmail.com
96 | ```
97 |
98 | #### Set up Scrapyd server/app
99 |
100 | In this step, you should update the `runtime.txt` to specify the Python version and `requirements.txt` to include all packages your spider needs.
101 |
102 | After changes, `runtime.txt` is:
103 | ```
104 | python-3.6
105 | ```
106 | `requirements.txt` is:
107 | ```
108 | pip>=19.1
109 | #Twisted==18.9.0
110 | scrapy
111 | scrapyd>=1.2.1
112 | scrapy-redis
113 | logparser>=0.8.2
114 |
115 | mysqlclient>=1.4.4
116 | SQLAlchemy>=1.3.6
117 | ```
118 |
119 | Setup the repo and commit the changes we just made:
120 |
121 | ```bash
122 | cd scrapyd
123 | git init
124 | git status
125 | git add .
126 | git commit -a -m "first commit"
127 | git status
128 | ```
129 |
130 | Deploy Scrapyd app
131 |
132 | ```bash
133 | heroku apps:create scrapy-server1
134 | heroku git:remote -a scrapy-server1
135 | git remote -v
136 | git push heroku master
137 | heroku logs --tail
138 | # Press ctrl+c to stop logs outputting
139 | # Visit https://svr-1.herokuapp.com
140 | ```
141 | Add environment variables
142 |
143 | Timezone
144 |
145 | ```
146 | # python -c "import tzlocal; print(tzlocal.get_localzone())"
147 | heroku config:set TZ=US/Eastern
148 | # heroku config:get TZ
149 | ```
150 | Redis (optional - not in this tutorial)
151 | Redis account (optional, see settings.py in the scrapy_redis_demo_project.zip)
152 | ```
153 | heroku config:set REDIS_HOST=your-redis-host
154 | heroku config:set REDIS_PORT=your-redis-port
155 | heroku config:set REDIS_PASSWORD=your-redis-password
156 | ```
157 | Repeat this step if multiple scrapyd server is needed.
158 |
159 | #### Setup ScrapydWeb server/app
160 |
161 | go to scrapydweb subfolder and update `runtime.txt`, `requirements.txt`, and `scrapydweb_settings_v10.py` if needed.
162 |
163 | Let's enable authentication, edit the following section of `scrapydweb_settings_v10.py`:
164 |
165 | ```
166 | # The default is False, set it to True to enable basic auth for the web UI.
167 | ENABLE_AUTH = True
168 | if os.environ.get('ENABLE_AUTH', 'False') == 'True':
169 | ENABLE_AUTH = True
170 | # In order to enable basic auth, both USERNAME and PASSWORD should be non-empty strings.
171 | USERNAME = 'admin'
172 | PASSWORD = 'scrapydweb'
173 | USERNAME = os.environ.get('USERNAME', 'admin')
174 | PASSWORD = os.environ.get('PASSWORD', 'scrapydweb')
175 | ```
176 |
177 | Otherwise, proceed as follows:
178 |
179 | ```
180 | cd ..
181 | cd scrapydweb
182 | git init
183 | git status
184 | git add .
185 | git commit -a -m "first commit"
186 | git status
187 | ```
188 |
189 | Deploy ScrapydWeb app
190 | ```bash
191 | heroku apps:create scrapyd-web
192 | heroku git:remote -a scrapyd-web
193 | git remote -v
194 | git push heroku master
195 | ```
196 |
197 | Add environment variables
198 |
199 | Timezone
200 | ```
201 | heroku config:set TZ=US/Eastern
202 | ```
203 |
204 | Scrapyd servers - you have to use the scrapyd server address you just setup above (see scrapydweb_settings_vN.py in the scrapydweb directory)
205 |
206 | ```
207 | heroku config:set SCRAPYD_SERVER_1=scrapy-server1.herokuapp.com:80
208 | # heroku config:set SCRAPYD_SERVER_2=svr-2.herokuapp.com:80#group1
209 | # heroku config:set SCRAPYD_SERVER_3=svr-3.herokuapp.com:80#group1
210 | # heroku config:set SCRAPYD_SERVER_4=svr-4.herokuapp.com:80#group2
211 | ```
212 |
213 | #### Deploy the scrapy project
214 |
215 | We need to package the project and upload to the server.
216 |
217 | First, install scrapyd-client using `pip install git+https://github.com/scrapy/scrapyd-client` (note: pip does not work as of writing this document see: https://stackoverflow.com/questions/45750739/scrapyd-client-command-not-found)
218 |
219 | change the deploy setting in scrapy.cfg:
220 | ```
221 | [deploy]
222 | url = http://scrapyd-server1.herokuapp.com
223 | username = admin
224 | password = scrapydweb
225 | project = scrapy-tutorial
226 | ```
227 | Then, use `scrapyd-deploy` to package and deploy to scrapyd server:
228 |
229 | ```
230 | (venv) dami:scrapy-tutorial harrywang$ scrapyd-deploy
231 | /Users/harrywang/sandbox/scrapy-tutorial/venv/lib/python3.6/site-packages/scrapyd_client/deploy.py:23: ScrapyDeprecationWarning: Module `scrapy.utils.http` is deprecated, Please import from `w3lib.http` instead.
232 | from scrapy.utils.http import basic_auth_header
233 | Packing version 1566253506
234 | Deploying to project "scrapy-tutorial" in http://scrapyd-server1.herokuapp.com/addversion.json
235 | Server response (200):
236 | {"node_name": "9177f699-b645-4656-82d1-beef2898fdc1", "status": "ok", "project": "scrapy-tutorial", "version": "1566253506", "spiders": 3}
237 | ```
238 | go to https://srapyd-web.herokuapp.com, you should see your project deployed:
239 | 
240 |
241 | go to the following page to run the spider:
242 |
243 | 
244 |
245 | Once the spider finishes, you can check the items in Files menu.
246 |
247 | You can specify Timer Tasks. The following shows a task that runs every 10 minutes. This part is based on APScheduler, see [document](https://apscheduler.readthedocs.io/en/latest/modules/triggers/cron.html#expression-types) to figure out how to set the values (this could be confusing.)
248 | 
249 |
250 |
251 | ### Easy Setup
252 | - create a free account at heroku.com and login
253 | - go to https://github.com/my8100/scrapyd-cluster-on-heroku-scrapyd-app and click "Deploy to Heroku" button to setup a scrayd server app (scrapyd-server1.herokuapp.com):
254 | 
255 |
256 | Use the following settings (No redis setting) and the app is at scrapyd-server1.herokuapp.com
257 | 
258 |
259 | - go to https://github.com/my8100/scrapyd-cluster-on-heroku-scrapydweb-app-git and click "Deploy to Heroku" button to setup a scrapydweb server app:
260 | 
261 |
262 | Use the following settings (No redis setting) and the app is at scrapyd-server1.herokuapp.com
263 | 
264 |
265 | - go to https://srapyd-web.herokuapp.com and login, you can see that one scrapyd server is ready:
266 |
267 | 
268 |
269 | We need to package the project and upload to the server.
270 |
271 | First, install scrapyd-client using `pip install git+https://github.com/scrapy/scrapyd-client` (note: pip does not work as of writing this document see: https://stackoverflow.com/questions/45750739/scrapyd-client-command-not-found)
272 |
273 | change the deploy setting in scrapy.cfg:
274 | ```
275 | [deploy]
276 | url = http://scrapyd-server1.herokuapp.com
277 | username = admin
278 | password = scrapydweb
279 | project = scrapy-tutorial
280 | ```
281 | Then, use `scrapyd-deploy` to package and deploy to scrapyd server:
282 |
283 | ```
284 | (venv) dami:scrapy-tutorial harrywang$ scrapyd-deploy
285 | /Users/harrywang/sandbox/scrapy-tutorial/venv/lib/python3.6/site-packages/scrapyd_client/deploy.py:23: ScrapyDeprecationWarning: Module `scrapy.utils.http` is deprecated, Please import from `w3lib.http` instead.
286 | from scrapy.utils.http import basic_auth_header
287 | Packing version 1566253506
288 | Deploying to project "scrapy-tutorial" in http://scrapyd-server1.herokuapp.com/addversion.json
289 | Server response (200):
290 | {"node_name": "9177f699-b645-4656-82d1-beef2898fdc1", "status": "ok", "project": "scrapy-tutorial", "version": "1566253506", "spiders": 3}
291 | ```
292 | go to https://srapyd-web.herokuapp.com, you should see your project deployed:
293 | 
294 |
--------------------------------------------------------------------------------
/local_output/readme.md:
--------------------------------------------------------------------------------
1 | This folder is ignored and stores local outputs (data saved to local such as json and html)
2 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | mysqlclient>=1.4.4
2 | Scrapy>=1.7.3
3 | shub>=2.9.0
4 | SQLAlchemy>=1.3.6
5 |
--------------------------------------------------------------------------------
/scrapinghub.yml:
--------------------------------------------------------------------------------
1 | project: 404937
2 |
3 | stacks:
4 | default: scrapy:1.7-py3
5 | requirements:
6 | file: requirements.txt
7 |
--------------------------------------------------------------------------------
/scrapy.cfg:
--------------------------------------------------------------------------------
1 | # Automatically created by: scrapy startproject
2 | #
3 | # For more information about the [deploy] section see:
4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
5 |
6 | [settings]
7 | default = tutorial.settings
8 |
9 | [deploy]
10 | url = http://scrapy-server1.herokuapp.com
11 | username = admin
12 | password = scrapydweb
13 | project = scrapy-tutorial
14 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # Automatically created by: shub deploy
2 |
3 | from setuptools import setup, find_packages
4 |
5 | setup(
6 | name = 'project',
7 | version = '1.0',
8 | packages = find_packages(),
9 | entry_points = {'scrapy': ['settings = tutorial.settings']},
10 | )
11 |
--------------------------------------------------------------------------------
/tutorial/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harrywang/scrapy-tutorial/1dde391b856491202eded192cd0384be38f01b43/tutorial/__init__.py
--------------------------------------------------------------------------------
/tutorial/items.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define here the models for your scraped items
4 | #
5 | # See documentation in:
6 | # https://doc.scrapy.org/en/latest/topics/items.html
7 |
8 | from scrapy.item import Item, Field
9 | from scrapy.loader.processors import MapCompose, TakeFirst
10 | from datetime import datetime
11 |
12 |
13 | def remove_quotes(text):
14 | # strip the unicode quotes
15 | text = text.strip(u'\u201c'u'\u201d')
16 | return text
17 |
18 |
19 | def convert_date(text):
20 | # convert string March 14, 1879 to Python date
21 | return datetime.strptime(text, '%B %d, %Y')
22 |
23 |
24 | def parse_location(text):
25 | # parse location "in Ulm, Germany"
26 | # this simply remove "in ", you can further parse city, state, country, etc.
27 | return text[3:]
28 |
29 |
30 | class QuoteItem(Item):
31 | quote_content = Field(
32 | input_processor=MapCompose(remove_quotes),
33 | # TakeFirst return the first value not the whole list
34 | output_processor=TakeFirst()
35 | )
36 | author_name = Field(
37 | input_processor=MapCompose(str.strip),
38 | output_processor=TakeFirst()
39 | )
40 | author_birthday = Field(
41 | input_processor=MapCompose(convert_date),
42 | output_processor=TakeFirst()
43 | )
44 | author_bornlocation = Field(
45 | input_processor=MapCompose(parse_location),
46 | output_processor=TakeFirst()
47 | )
48 | author_bio = Field(
49 | input_processor=MapCompose(str.strip),
50 | output_processor=TakeFirst()
51 | )
52 | tags = Field()
53 |
--------------------------------------------------------------------------------
/tutorial/middlewares.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define here the models for your spider middleware
4 | #
5 | # See documentation in:
6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
7 |
8 | from scrapy import signals
9 |
10 |
11 | class TutorialSpiderMiddleware(object):
12 | # Not all methods need to be defined. If a method is not defined,
13 | # scrapy acts as if the spider middleware does not modify the
14 | # passed objects.
15 |
16 | @classmethod
17 | def from_crawler(cls, crawler):
18 | # This method is used by Scrapy to create your spiders.
19 | s = cls()
20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
21 | return s
22 |
23 | def process_spider_input(self, response, spider):
24 | # Called for each response that goes through the spider
25 | # middleware and into the spider.
26 |
27 | # Should return None or raise an exception.
28 | return None
29 |
30 | def process_spider_output(self, response, result, spider):
31 | # Called with the results returned from the Spider, after
32 | # it has processed the response.
33 |
34 | # Must return an iterable of Request, dict or Item objects.
35 | for i in result:
36 | yield i
37 |
38 | def process_spider_exception(self, response, exception, spider):
39 | # Called when a spider or process_spider_input() method
40 | # (from other spider middleware) raises an exception.
41 |
42 | # Should return either None or an iterable of Response, dict
43 | # or Item objects.
44 | pass
45 |
46 | def process_start_requests(self, start_requests, spider):
47 | # Called with the start requests of the spider, and works
48 | # similarly to the process_spider_output() method, except
49 | # that it doesn’t have a response associated.
50 |
51 | # Must return only requests (not items).
52 | for r in start_requests:
53 | yield r
54 |
55 | def spider_opened(self, spider):
56 | spider.logger.info('Spider opened: %s' % spider.name)
57 |
58 |
59 | class TutorialDownloaderMiddleware(object):
60 | # Not all methods need to be defined. If a method is not defined,
61 | # scrapy acts as if the downloader middleware does not modify the
62 | # passed objects.
63 |
64 | @classmethod
65 | def from_crawler(cls, crawler):
66 | # This method is used by Scrapy to create your spiders.
67 | s = cls()
68 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
69 | return s
70 |
71 | def process_request(self, request, spider):
72 | # Called for each request that goes through the downloader
73 | # middleware.
74 |
75 | # Must either:
76 | # - return None: continue processing this request
77 | # - or return a Response object
78 | # - or return a Request object
79 | # - or raise IgnoreRequest: process_exception() methods of
80 | # installed downloader middleware will be called
81 | return None
82 |
83 | def process_response(self, request, response, spider):
84 | # Called with the response returned from the downloader.
85 |
86 | # Must either;
87 | # - return a Response object
88 | # - return a Request object
89 | # - or raise IgnoreRequest
90 | return response
91 |
92 | def process_exception(self, request, exception, spider):
93 | # Called when a download handler or a process_request()
94 | # (from other downloader middleware) raises an exception.
95 |
96 | # Must either:
97 | # - return None: continue processing this exception
98 | # - return a Response object: stops process_exception() chain
99 | # - return a Request object: stops process_exception() chain
100 | pass
101 |
102 | def spider_opened(self, spider):
103 | spider.logger.info('Spider opened: %s' % spider.name)
104 |
--------------------------------------------------------------------------------
/tutorial/models.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import create_engine, Column, Table, ForeignKey, MetaData
2 | from sqlalchemy.orm import relationship
3 | from sqlalchemy.ext.declarative import declarative_base
4 | from sqlalchemy import (
5 | Integer, String, Date, DateTime, Float, Boolean, Text)
6 | from scrapy.utils.project import get_project_settings
7 |
8 | Base = declarative_base()
9 |
10 |
11 | def db_connect():
12 | """
13 | Performs database connection using database settings from settings.py.
14 | Returns sqlalchemy engine instance
15 | """
16 | return create_engine(get_project_settings().get("CONNECTION_STRING"))
17 |
18 |
19 | def create_table(engine):
20 | Base.metadata.create_all(engine)
21 |
22 |
23 | # Association Table for Many-to-Many relationship between Quote and Tag
24 | # https://docs.sqlalchemy.org/en/13/orm/basic_relationships.html#many-to-many
25 | quote_tag = Table('quote_tag', Base.metadata,
26 | Column('quote_id', Integer, ForeignKey('quote.id')),
27 | Column('tag_id', Integer, ForeignKey('tag.id'))
28 | )
29 |
30 |
31 | class Quote(Base):
32 | __tablename__ = "quote"
33 |
34 | id = Column(Integer, primary_key=True)
35 | quote_content = Column('quote_content', Text())
36 | author_id = Column(Integer, ForeignKey('author.id')) # Many quotes to one author
37 | tags = relationship('Tag', secondary='quote_tag',
38 | lazy='dynamic', backref="quote") # M-to-M for quote and tag
39 |
40 |
41 | class Author(Base):
42 | __tablename__ = "author"
43 |
44 | id = Column(Integer, primary_key=True)
45 | name = Column('name', String(50), unique=True)
46 | birthday = Column('birthday', DateTime)
47 | bornlocation = Column('bornlocation', String(150))
48 | bio = Column('bio', Text())
49 | quotes = relationship('Quote', backref='author') # One author to many Quotes
50 |
51 |
52 | class Tag(Base):
53 | __tablename__ = "tag"
54 |
55 | id = Column(Integer, primary_key=True)
56 | name = Column('name', String(30), unique=True)
57 | quotes = relationship('Quote', secondary='quote_tag',
58 | lazy='dynamic', backref="tag") # M-to-M for quote and tag
59 |
--------------------------------------------------------------------------------
/tutorial/pipelines.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define your item pipelines here
4 | #
5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
7 |
8 |
9 | from sqlalchemy.orm import sessionmaker
10 | from scrapy.exceptions import DropItem
11 | from tutorial.models import Quote, Author, Tag, db_connect, create_table
12 | import logging
13 |
14 | class DuplicatesPipeline(object):
15 |
16 | def __init__(self):
17 | """
18 | Initializes database connection and sessionmaker.
19 | Creates tables.
20 | """
21 | engine = db_connect()
22 | create_table(engine)
23 | self.Session = sessionmaker(bind=engine)
24 | logging.info("****DuplicatesPipeline: database connected****")
25 |
26 | def process_item(self, item, spider):
27 | session = self.Session()
28 | exist_quote = session.query(Quote).filter_by(quote_content = item["quote_content"]).first()
29 | if exist_quote is not None: # the current quote exists
30 | raise DropItem("Duplicate item found: %s" % item["quote_content"])
31 | session.close()
32 | else:
33 | return item
34 | session.close()
35 |
36 |
37 | class SaveQuotesPipeline(object):
38 | def __init__(self):
39 | """
40 | Initializes database connection and sessionmaker
41 | Creates tables
42 | """
43 | engine = db_connect()
44 | create_table(engine)
45 | self.Session = sessionmaker(bind=engine)
46 | logging.info("****SaveQuotePipeline: database connected****")
47 |
48 |
49 | def process_item(self, item, spider):
50 | """Save quotes in the database
51 | This method is called for every item pipeline component
52 | """
53 | session = self.Session()
54 | quote = Quote()
55 | author = Author()
56 | tag = Tag()
57 | author.name = item["author_name"]
58 | author.birthday = item["author_birthday"]
59 | author.bornlocation = item["author_bornlocation"]
60 | author.bio = item["author_bio"]
61 | quote.quote_content = item["quote_content"]
62 |
63 | # check whether the author exists
64 | exist_author = session.query(Author).filter_by(name = author.name).first()
65 | if exist_author is not None: # the current author exists
66 | quote.author = exist_author
67 | else:
68 | quote.author = author
69 |
70 | # check whether the current quote has tags or not
71 | if "tags" in item:
72 | for tag_name in item["tags"]:
73 | tag = Tag(name=tag_name)
74 | # check whether the current tag already exists in the database
75 | exist_tag = session.query(Tag).filter_by(name = tag.name).first()
76 | if exist_tag is not None: # the current tag exists
77 | tag = exist_tag
78 | quote.tags.append(tag)
79 |
80 | try:
81 | session.add(quote)
82 | session.commit()
83 |
84 | except:
85 | session.rollback()
86 | raise
87 |
88 | finally:
89 | session.close()
90 |
91 | return item
92 |
--------------------------------------------------------------------------------
/tutorial/settings.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Scrapy settings for tutorial project
4 | #
5 | # For simplicity, this file contains only settings considered important or
6 | # commonly used. You can find more settings consulting the documentation:
7 | #
8 | # https://doc.scrapy.org/en/latest/topics/settings.html
9 | # https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
10 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
11 |
12 | BOT_NAME = 'tutorial'
13 |
14 | SPIDER_MODULES = ['tutorial.spiders']
15 | NEWSPIDER_MODULE = 'tutorial.spiders'
16 |
17 | # Database Connection String
18 |
19 | # SQLite
20 | CONNECTION_STRING = 'sqlite:///scrapy_quotes.db'
21 |
22 | # MySQL
23 | # CONNECTION_STRING = "{drivername}://{user}:{passwd}@{host}:{port}/{db_name}?charset=utf8".format(
24 | # drivername="mysql",
25 | # user="harrywang",
26 | # passwd="tutorial",
27 | # host="localhost",
28 | # port="3306",
29 | # db_name="scrapy_quotes",
30 | # )
31 |
32 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
33 | #USER_AGENT = 'tutorial (+http://www.yourdomain.com)'
34 |
35 | # Obey robots.txt rules
36 | ROBOTSTXT_OBEY = True
37 |
38 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
39 | #CONCURRENT_REQUESTS = 32
40 |
41 | # Configure a delay for requests for the same website (default: 0)
42 | # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
43 | # See also autothrottle settings and docs
44 | #DOWNLOAD_DELAY = 3
45 | # The download delay setting will honor only one of:
46 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16
47 | #CONCURRENT_REQUESTS_PER_IP = 16
48 |
49 | # Disable cookies (enabled by default)
50 | #COOKIES_ENABLED = False
51 |
52 | # Disable Telnet Console (enabled by default)
53 | #TELNETCONSOLE_ENABLED = False
54 |
55 | # Override the default request headers:
56 | #DEFAULT_REQUEST_HEADERS = {
57 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
58 | # 'Accept-Language': 'en',
59 | #}
60 |
61 | # Enable or disable spider middlewares
62 | # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
63 | #SPIDER_MIDDLEWARES = {
64 | # 'tutorial.middlewares.TutorialSpiderMiddleware': 543,
65 | #}
66 |
67 | # Enable or disable downloader middlewares
68 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
69 | #DOWNLOADER_MIDDLEWARES = {
70 | # 'tutorial.middlewares.TutorialDownloaderMiddleware': 543,
71 | #}
72 |
73 | # Enable or disable extensions
74 | # See https://doc.scrapy.org/en/latest/topics/extensions.html
75 | #EXTENSIONS = {
76 | # 'scrapy.extensions.telnet.TelnetConsole': None,
77 | #}
78 |
79 | # Configure item pipelines
80 | # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
81 | ITEM_PIPELINES = {
82 |
83 | 'tutorial.pipelines.DuplicatesPipeline': 100,
84 | 'tutorial.pipelines.SaveQuotesPipeline': 200,
85 | }
86 |
87 | # Enable and configure the AutoThrottle extension (disabled by default)
88 | # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
89 | #AUTOTHROTTLE_ENABLED = True
90 | # The initial download delay
91 | #AUTOTHROTTLE_START_DELAY = 5
92 | # The maximum download delay to be set in case of high latencies
93 | #AUTOTHROTTLE_MAX_DELAY = 60
94 | # The average number of requests Scrapy should be sending in parallel to
95 | # each remote server
96 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
97 | # Enable showing throttling stats for every response received:
98 | #AUTOTHROTTLE_DEBUG = False
99 |
100 | # Enable and configure HTTP caching (disabled by default)
101 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
102 | #HTTPCACHE_ENABLED = True
103 | #HTTPCACHE_EXPIRATION_SECS = 0
104 | #HTTPCACHE_DIR = 'httpcache'
105 | #HTTPCACHE_IGNORE_HTTP_CODES = []
106 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
107 |
--------------------------------------------------------------------------------
/tutorial/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 |
--------------------------------------------------------------------------------
/tutorial/spiders/quotes_spider.py:
--------------------------------------------------------------------------------
1 | import scrapy
2 | from scrapy.loader import ItemLoader
3 | from tutorial.items import QuoteItem
4 |
5 | class QuotesSpider(scrapy.Spider):
6 | name = "quotes"
7 | allowed_domains = ["toscrape.com"]
8 | start_urls = ['http://quotes.toscrape.com/']
9 |
10 |
11 | def parse(self, response):
12 | self.logger.info('Parse function called on {}'.format(response.url))
13 | # quotes = response.xpath("//div[@class='quote']")
14 | quotes = response.css('div.quote')
15 |
16 | for quote in quotes:
17 | loader = ItemLoader(item=QuoteItem(), selector=quote)
18 | # pay attention to the dot .// to use relative xpath
19 | # loader.add_xpath('quote_content', ".//span[@class='text']/text()")
20 | loader.add_css('quote_content', '.text::text')
21 | # loader.add_xpath('author', './/small//text()')
22 | loader.add_css('tags', '.tag::text')
23 | quote_item = loader.load_item()
24 | author_url = quote.css('.author + a::attr(href)').get()
25 | # go to the author page and pass the current collected quote info
26 | yield response.follow(author_url, self.parse_author, meta={'quote_item': quote_item})
27 |
28 | # go to Next page
29 | for a in response.css('li.next a'):
30 | yield response.follow(a, self.parse)
31 |
32 | def parse_author(self, response):
33 | quote_item = response.meta['quote_item']
34 | loader = ItemLoader(item=quote_item, response=response)
35 | loader.add_css('author_name', '.author-title::text')
36 | loader.add_css('author_birthday', '.author-born-date::text')
37 | loader.add_css('author_bornlocation', '.author-born-location::text')
38 | loader.add_css('author_bio', '.author-description::text')
39 | yield loader.load_item()
40 |
--------------------------------------------------------------------------------
/tutorial/spiders/quotes_spider_v1.py:
--------------------------------------------------------------------------------
1 | import scrapy
2 |
3 | class QuotesSpider(scrapy.Spider):
4 | name = "quotes_v1"
5 |
6 | start_urls = [
7 | 'http://quotes.toscrape.com/page/1/',
8 | #'http://quotes.toscrape.com/page/2/',
9 | ]
10 | # long version to implement start_urls array:
11 | # def start_requests(self):
12 | # urls = [
13 | # 'http://quotes.toscrape.com/page/1/',
14 | # 'http://quotes.toscrape.com/page/2/',
15 | # ]
16 | # for url in urls:
17 | # yield scrapy.Request(url=url, callback=self.parse)
18 |
19 | def parse(self, response):
20 | page = response.url.split("/")[-2] # getting the page number from the URL
21 | filename = 'local_output/' + 'quotes-%s.html' % page
22 | with open(filename, 'wb') as f:
23 | f.write(response.body)
24 | self.log('Saved file %s' % filename)
25 |
26 | for quote in response.css('div.quote'):
27 | yield {
28 | 'text': quote.css('span.text::text').get().strip(u'\u201c'u'\u201d'), # strip the unicode quotes
29 | 'author': quote.css('small.author::text').get(),
30 | 'tags': quote.css('div.tags a.tag::text').getall(),
31 | }
32 |
33 | # next_page = response.css('li.next a::attr(href)').get()
34 |
35 | # if next_page is not None:
36 | # next_page = response.urljoin(next_page)
37 | # yield scrapy.Request(next_page, callback=self.parse)
38 |
39 | # shortcut 1
40 | # if next_page is not None:
41 | # yield response.follow(next_page, callback=self.parse)
42 |
43 | # shortcut 2
44 | # for href in response.css('li.next a::attr(href)'):
45 | # yield response.follow(href, callback=self.parse)
46 |
47 | # shortcut 3
48 | for a in response.css('li.next a'):
49 | yield response.follow(a, callback=self.parse)
50 |
--------------------------------------------------------------------------------
/tutorial/spiders/quotes_spider_v2.py:
--------------------------------------------------------------------------------
1 | import scrapy
2 |
3 | class QuotesSpider(scrapy.Spider):
4 | name = "quotes_v2"
5 |
6 | start_urls = ['http://quotes.toscrape.com']
7 |
8 | def parse(self, response):
9 | # self.logger.info('hello this is my first spider')
10 | quotes = response.css('div.quote')
11 | for quote in quotes:
12 |
13 | yield {
14 | 'text': quote.css('.text::text').get(),
15 | 'author': quote.css('.author::text').get(),
16 | 'tags': quote.css('.tag::text').getall(),
17 | }
18 |
19 | author_url = quote.css('.author + a::attr(href)').get()
20 | self.logger.info('get author page url')
21 | # go to the author page
22 | yield response.follow(author_url, callback=self.parse_author)
23 |
24 | for a in response.css('li.next a'):
25 | yield response.follow(a, callback=self.parse)
26 |
27 |
28 | def parse_author(self, response):
29 | yield {
30 | 'author_name': response.css('.author-title::text').get(),
31 | 'author_birthday': response.css('.author-born-date::text').get(),
32 | 'author_bornlocation': response.css('.author-born-location::text').get(),
33 | 'author_bio': response.css('.author-description::text').get(),
34 | }
35 |
--------------------------------------------------------------------------------