"
355 | ]
356 | },
357 | {
358 | "cell_type": "code",
359 | "execution_count": null,
360 | "metadata": {
361 | "collapsed": true
362 | },
363 | "outputs": [],
364 | "source": [
365 | "soup.select('div#session-objects')"
366 | ]
367 | },
368 | {
369 | "cell_type": "code",
370 | "execution_count": null,
371 | "metadata": {
372 | "collapsed": true
373 | },
374 | "outputs": [],
375 | "source": []
376 | },
377 | {
378 | "cell_type": "code",
379 | "execution_count": null,
380 | "metadata": {
381 | "collapsed": true
382 | },
383 | "outputs": [],
384 | "source": [
385 | "
"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": null,
391 | "metadata": {
392 | "collapsed": true
393 | },
394 | "outputs": [],
395 | "source": [
396 | "soup.select('div#request-and-response-objects')"
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": null,
402 | "metadata": {
403 | "collapsed": true
404 | },
405 | "outputs": [],
406 | "source": []
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": null,
411 | "metadata": {
412 | "collapsed": true
413 | },
414 | "outputs": [],
415 | "source": []
416 | },
417 | {
418 | "cell_type": "code",
419 | "execution_count": null,
420 | "metadata": {
421 | "collapsed": true
422 | },
423 | "outputs": [],
424 | "source": []
425 | }
426 | ],
427 | "metadata": {
428 | "kernelspec": {
429 | "display_name": "Python 3",
430 | "language": "python",
431 | "name": "python3"
432 | },
433 | "language_info": {
434 | "codemirror_mode": {
435 | "name": "ipython",
436 | "version": 3
437 | },
438 | "file_extension": ".py",
439 | "mimetype": "text/x-python",
440 | "name": "python",
441 | "nbconvert_exporter": "python",
442 | "pygments_lexer": "ipython3",
443 | "version": "3.6.0"
444 | }
445 | },
446 | "nbformat": 4,
447 | "nbformat_minor": 1
448 | }
449 |
--------------------------------------------------------------------------------
/Extracting_html_table_1_and_2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import requests\n",
12 | "from bs4 import BeautifulSoup\n",
13 | "import time"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": null,
19 | "metadata": {
20 | "collapsed": true
21 | },
22 | "outputs": [],
23 | "source": [
24 | "url ='http://www.espn.com/nba/statistics/player/_/stat/assists/sort/avgAssists/\n",
25 | "\n",
26 | "headers= {'User-Agent': 'Mozilla/5.0'}"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": null,
32 | "metadata": {
33 | "collapsed": true
34 | },
35 | "outputs": [],
36 | "source": [
37 | "response = requests.get(url)"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": null,
43 | "metadata": {
44 | "collapsed": false
45 | },
46 | "outputs": [],
47 | "source": [
48 | "response.status_code"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {
55 | "collapsed": true
56 | },
57 | "outputs": [],
58 | "source": [
59 | "response.content"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {
66 | "collapsed": false
67 | },
68 | "outputs": [],
69 | "source": [
70 | "soup = BeautifulSoup(response.content, 'html.parser')"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": null,
76 | "metadata": {
77 | "collapsed": false
78 | },
79 | "outputs": [],
80 | "source": [
81 | "stat_table =soup.find_all('table', class_ = 'tablehead' )"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {
88 | "collapsed": false
89 | },
90 | "outputs": [],
91 | "source": [
92 | "len(stat_table)"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": null,
98 | "metadata": {
99 | "collapsed": false
100 | },
101 | "outputs": [],
102 | "source": [
103 | "stat_table = stat_table[0]"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": null,
109 | "metadata": {
110 | "collapsed": true
111 | },
112 | "outputs": [],
113 | "source": [
114 | "for row in stat_table.find_all('tr'):\n",
115 | " for cell in row.find_all('td'):\n",
116 | " print(cell.text)\n",
117 | " "
118 | ]
119 | },
120 | {
121 | "cell_type": "code",
122 | "execution_count": null,
123 | "metadata": {
124 | "collapsed": true
125 | },
126 | "outputs": [],
127 | "source": [
128 | "with open ('basketball_stats.txt', 'w') as r:\n",
129 | " for row in stat_table.find_all('tr'):\n",
130 | " for cell in row.find_all('td'):\n",
131 | " r.write(cell.text.ljust(22))\n",
132 | " r.write('\\n')`\n",
133 | " "
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "metadata": {
140 | "collapsed": true
141 | },
142 | "outputs": [],
143 | "source": [
144 | "#http://www.whoishostingthis.com/tools/user-agent/"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": null,
150 | "metadata": {
151 | "collapsed": true
152 | },
153 | "outputs": [],
154 | "source": [
155 | "#'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'"
156 | ]
157 | },
158 | {
159 | "cell_type": "markdown",
160 | "metadata": {},
161 | "source": [
162 | "# video 2"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 2,
168 | "metadata": {
169 | "collapsed": true
170 | },
171 | "outputs": [],
172 | "source": [
173 | "import requests\n",
174 | "from bs4 import BeautifulSoup\n",
175 | "import time"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": 3,
181 | "metadata": {
182 | "collapsed": true
183 | },
184 | "outputs": [],
185 | "source": []
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": null,
190 | "metadata": {
191 | "collapsed": true
192 | },
193 | "outputs": [],
194 | "source": [
195 | "#http://www.espn.com/nba/statistics/player/_/stat/assists/sort/avgAssists/count/41"
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": 11,
201 | "metadata": {
202 | "collapsed": false
203 | },
204 | "outputs": [],
205 | "source": [
206 | "num = 1\n",
207 | "\n",
208 | "url ='http://www.espn.com/nba/statistics/player/_/stat/assists/sort/avgAssists/count/{}'.format(num)\n",
209 | "\n",
210 | "headers= {'User-Agent': 'Mozilla/5.0'}\n",
211 | "with open ('basketball_stats.txt', 'w') as r:\n",
212 | " r.write('BASKETBALL ASSISTS TABLE\\n')\n",
213 | "\n",
214 | "\n",
215 | "while num < 272:\n",
216 | " url ='http://www.espn.com/nba/statistics/player/_/stat/assists/sort/avgAssists/count/{}'.format(num)\n",
217 | " \n",
218 | " time.sleep(1)\n",
219 | " response = requests.get(url, headers)\n",
220 | " \n",
221 | " if response.status_code == 200:\n",
222 | " soup = BeautifulSoup(response.content, 'html.parser')\n",
223 | " stat_table = soup.find_all('table', class_ = 'tablehead')\n",
224 | " if len(stat_table) < 2:\n",
225 | " stat_table = stat_table[0]\n",
226 | " with open ('basketball_stats.txt', 'a') as r:\n",
227 | " for row in stat_table.find_all('tr'):\n",
228 | " for cell in row.find_all('td'):\n",
229 | " r.write(cell.text.ljust(22))\n",
230 | " r.write('\\n')\n",
231 | " else: print('Too many tables')\n",
232 | " \n",
233 | " else:\n",
234 | " print('No response')\n",
235 | " print(num)\n",
236 | " \n",
237 | " \n",
238 | " num += 40\n",
239 | " \n",
240 | " \n",
241 | " \n",
242 | " "
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": null,
248 | "metadata": {
249 | "collapsed": true
250 | },
251 | "outputs": [],
252 | "source": []
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": null,
257 | "metadata": {
258 | "collapsed": true
259 | },
260 | "outputs": [],
261 | "source": []
262 | }
263 | ],
264 | "metadata": {
265 | "kernelspec": {
266 | "display_name": "Python 3",
267 | "language": "python",
268 | "name": "python3"
269 | },
270 | "language_info": {
271 | "codemirror_mode": {
272 | "name": "ipython",
273 | "version": 3
274 | },
275 | "file_extension": ".py",
276 | "mimetype": "text/x-python",
277 | "name": "python",
278 | "nbconvert_exporter": "python",
279 | "pygments_lexer": "ipython3",
280 | "version": "3.6.0"
281 | }
282 | },
283 | "nbformat": 4,
284 | "nbformat_minor": 1
285 | }
286 |
--------------------------------------------------------------------------------
/Navigating_Websites_Part_1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Navigating Gamefaqs"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {
14 | "collapsed": false
15 | },
16 | "outputs": [],
17 | "source": [
18 | "import requests\n",
19 | "from bs4 import BeautifulSoup\n",
20 | "\n",
21 | "response = requests.get('https://www.gamefaqs.com', headers={'User-Agent': 'Mozilla/5.0'})"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {
28 | "collapsed": false
29 | },
30 | "outputs": [],
31 | "source": [
32 | "response.status_code"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": null,
38 | "metadata": {
39 | "collapsed": true
40 | },
41 | "outputs": [],
42 | "source": [
43 | "#Here is another head to try if you get an 401 status code\n",
44 | "\n",
45 | "response = requests.get('https://www.gamefaqs.com', headers ={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'})"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": null,
51 | "metadata": {
52 | "collapsed": false
53 | },
54 | "outputs": [],
55 | "source": [
56 | "response.status_code"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": null,
62 | "metadata": {
63 | "collapsed": true
64 | },
65 | "outputs": [],
66 | "source": []
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {
72 | "collapsed": false
73 | },
74 | "outputs": [],
75 | "source": [
76 | "#if you get another 401 error, try this method\n",
77 | "\n",
78 | "import requests\n",
79 | "\n",
80 | "session = requests.Session()\n",
81 | "response = session.get('https://www.gamefaqs.com', headers={'User-Agent': 'Mozilla/5.0'})\n",
82 | "\n",
83 | "print(response.status_code)\n",
84 | "\n",
85 | "\n",
86 | "\n"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {
93 | "collapsed": false
94 | },
95 | "outputs": [],
96 | "source": [
97 | "soup = BeautifulSoup(response.content, 'html.parser')"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {
104 | "collapsed": false
105 | },
106 | "outputs": [],
107 | "source": [
108 | "soup.title"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": null,
114 | "metadata": {
115 | "collapsed": true
116 | },
117 | "outputs": [],
118 | "source": [
119 | "soup.get_text(strip = True) #gets rid of new lines"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {
126 | "collapsed": true
127 | },
128 | "outputs": [],
129 | "source": [
130 | "#Let's get info about Vita"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": null,
136 | "metadata": {
137 | "collapsed": false
138 | },
139 | "outputs": [],
140 | "source": [
141 | "soup.find_all(string = 'Vita') #capitalized"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": null,
147 | "metadata": {
148 | "collapsed": false
149 | },
150 | "outputs": [],
151 | "source": [
152 | "soup.find_all(string = 'vita') #lower-case"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": null,
158 | "metadata": {
159 | "collapsed": false
160 | },
161 | "outputs": [],
162 | "source": [
163 | "type(soup.find_all(string = 'Vita')) #element-tag"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": null,
169 | "metadata": {
170 | "collapsed": false
171 | },
172 | "outputs": [],
173 | "source": [
174 | "len(soup.find_all(string = 'Vita')) #how many results"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": null,
180 | "metadata": {
181 | "collapsed": true
182 | },
183 | "outputs": [],
184 | "source": [
185 | "soup.find_all(string = 'Vita')[0].find_parents()"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "metadata": {
192 | "collapsed": false
193 | },
194 | "outputs": [],
195 | "source": [
196 | "len(soup.find_all(string = 'Vita')[0].find_parents())"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": null,
202 | "metadata": {
203 | "collapsed": false
204 | },
205 | "outputs": [],
206 | "source": [
207 | "soup.find_all('p',string = 'Vita') #no results"
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": null,
213 | "metadata": {
214 | "collapsed": false
215 | },
216 | "outputs": [],
217 | "source": [
218 | "soup.find_all('a',string = 'Vita')"
219 | ]
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": null,
224 | "metadata": {
225 | "collapsed": false
226 | },
227 | "outputs": [],
228 | "source": [
229 | "soup.find_all('a',string = 'Vita')[0].get('href')"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": null,
235 | "metadata": {
236 | "collapsed": true
237 | },
238 | "outputs": [],
239 | "source": [
240 | "response = requests.get('https://www.gamefaqs.com/vita', headers ={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'})"
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": null,
246 | "metadata": {
247 | "collapsed": false
248 | },
249 | "outputs": [],
250 | "source": [
251 | "response.status_code"
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": null,
257 | "metadata": {
258 | "collapsed": true
259 | },
260 | "outputs": [],
261 | "source": [
262 | "soup = BeautifulSoup(response.content, 'html.parser')"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": null,
268 | "metadata": {
269 | "collapsed": true
270 | },
271 | "outputs": [],
272 | "source": [
273 | "soup.get_text(strip = True)"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": null,
279 | "metadata": {
280 | "collapsed": true
281 | },
282 | "outputs": [],
283 | "source": [
284 | "\"Top 10 Games\""
285 | ]
286 | },
287 | {
288 | "cell_type": "code",
289 | "execution_count": null,
290 | "metadata": {
291 | "collapsed": false
292 | },
293 | "outputs": [],
294 | "source": [
295 | "soup.find_all(string = 'Top 10 Games' )"
296 | ]
297 | },
298 | {
299 | "cell_type": "code",
300 | "execution_count": null,
301 | "metadata": {
302 | "collapsed": true
303 | },
304 | "outputs": [],
305 | "source": []
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": null,
310 | "metadata": {
311 | "collapsed": true
312 | },
313 | "outputs": [],
314 | "source": [
315 | "soup.find_all(string = 'Top 10 Games')[0].find_parents()\n",
316 | "\n",
317 | "#Lots of elements"
318 | ]
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": null,
323 | "metadata": {
324 | "collapsed": false
325 | },
326 | "outputs": [],
327 | "source": [
328 | "len(soup.find_all(string = 'Top 10 Games')[0].find_parents())"
329 | ]
330 | },
331 | {
332 | "cell_type": "code",
333 | "execution_count": null,
334 | "metadata": {
335 | "collapsed": false
336 | },
337 | "outputs": [],
338 | "source": [
339 | "soup.find_all(string = 'Top 10 Games')[0].find_parents()[0]"
340 | ]
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": null,
345 | "metadata": {
346 | "collapsed": false
347 | },
348 | "outputs": [],
349 | "source": [
350 | "soup.find_all(string = 'Top 10 Games')[0].find_parents()[1]"
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": null,
356 | "metadata": {
357 | "collapsed": true
358 | },
359 | "outputs": [],
360 | "source": [
361 | "soup.find_all(string = 'Top 10 Games')[0].find_parents()[2]"
362 | ]
363 | },
364 | {
365 | "cell_type": "code",
366 | "execution_count": null,
367 | "metadata": {
368 | "collapsed": false
369 | },
370 | "outputs": [],
371 | "source": [
372 | "top_ten_vita_games =soup.find_all(string = 'Top 10 Games')[0].find_parents()[2]"
373 | ]
374 | },
375 | {
376 | "cell_type": "code",
377 | "execution_count": null,
378 | "metadata": {
379 | "collapsed": false
380 | },
381 | "outputs": [],
382 | "source": [
383 | "type(top_ten_vita_games)"
384 | ]
385 | },
386 | {
387 | "cell_type": "code",
388 | "execution_count": null,
389 | "metadata": {
390 | "collapsed": true
391 | },
392 | "outputs": [],
393 | "source": [
394 | "for i in top_ten_vita_games.find('ol').find_all('li'):\n",
395 | " print(i.get_text().strip())"
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": null,
401 | "metadata": {
402 | "collapsed": true
403 | },
404 | "outputs": [],
405 | "source": [
406 | "for i in top_ten_vita_games.find('ol').find_all('li'):\n",
407 | " print(i.find('a').get_text().strip())"
408 | ]
409 | },
410 | {
411 | "cell_type": "code",
412 | "execution_count": null,
413 | "metadata": {
414 | "collapsed": false
415 | },
416 | "outputs": [],
417 | "source": [
418 | "top_ten_vita_games.find('ol').find_all('li')[1].get_text().strip()\n",
419 | " "
420 | ]
421 | },
422 | {
423 | "cell_type": "code",
424 | "execution_count": null,
425 | "metadata": {
426 | "collapsed": false
427 | },
428 | "outputs": [],
429 | "source": [
430 | "for i in top_ten_vita_games.find('ol').find_all('li'):\n",
431 | " print(i.get_text().strip().split())"
432 | ]
433 | },
434 | {
435 | "cell_type": "code",
436 | "execution_count": null,
437 | "metadata": {
438 | "collapsed": false
439 | },
440 | "outputs": [],
441 | "source": [
442 | "for i in top_ten_vita_games.find('ol').find_all('li'):\n",
443 | " print(' '.join((i.get_text()).strip().split()))\n",
444 | " \n",
445 | " \n",
446 | " \n",
447 | " \n",
448 | " \n",
449 | " "
450 | ]
451 | },
452 | {
453 | "cell_type": "code",
454 | "execution_count": null,
455 | "metadata": {
456 | "collapsed": true
457 | },
458 | "outputs": [],
459 | "source": []
460 | }
461 | ],
462 | "metadata": {
463 | "kernelspec": {
464 | "display_name": "Python 3",
465 | "language": "python",
466 | "name": "python3"
467 | },
468 | "language_info": {
469 | "codemirror_mode": {
470 | "name": "ipython",
471 | "version": 3
472 | },
473 | "file_extension": ".py",
474 | "mimetype": "text/x-python",
475 | "name": "python",
476 | "nbconvert_exporter": "python",
477 | "pygments_lexer": "ipython3",
478 | "version": "3.6.0"
479 | }
480 | },
481 | "nbformat": 4,
482 | "nbformat_minor": 1
483 | }
484 |
--------------------------------------------------------------------------------
/Navigating_websites_1-4_(all parts of the series).ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Navigating Gamefaqs"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {
14 | "collapsed": false
15 | },
16 | "outputs": [],
17 | "source": [
18 | "import requests\n",
19 | "from bs4 import BeautifulSoup\n",
20 | "\n",
21 | "response = requests.get('https://www.gamefaqs.com', headers={'User-Agent': 'Mozilla/5.0'})"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {
28 | "collapsed": false
29 | },
30 | "outputs": [],
31 | "source": [
32 | "response.status_code"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": null,
38 | "metadata": {
39 | "collapsed": true
40 | },
41 | "outputs": [],
42 | "source": [
43 | "response = requests.get('https://www.gamefaqs.com', headers ={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'})"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": null,
49 | "metadata": {
50 | "collapsed": false
51 | },
52 | "outputs": [],
53 | "source": [
54 | "response.status_code"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {
61 | "collapsed": true
62 | },
63 | "outputs": [],
64 | "source": []
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {
70 | "collapsed": false
71 | },
72 | "outputs": [],
73 | "source": [
74 | "#if you get a 401 error, try this method\n",
75 | "\n",
76 | "import requests\n",
77 | "\n",
78 | "session = requests.Session()\n",
79 | "response = session.get('https://www.gamefaqs.com', headers={'User-Agent': 'Mozilla/5.0'})\n",
80 | "\n",
81 | "print(response.status_code)\n",
82 | "\n",
83 | "\n",
84 | "\n"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": null,
90 | "metadata": {
91 | "collapsed": false
92 | },
93 | "outputs": [],
94 | "source": [
95 | "soup = BeautifulSoup(response.content, 'html.parser')"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {
102 | "collapsed": false
103 | },
104 | "outputs": [],
105 | "source": [
106 | "soup.title"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": null,
112 | "metadata": {
113 | "collapsed": true
114 | },
115 | "outputs": [],
116 | "source": [
117 | "soup.get_text(strip = True) #gets rid of new lines"
118 | ]
119 | },
120 | {
121 | "cell_type": "code",
122 | "execution_count": null,
123 | "metadata": {
124 | "collapsed": true
125 | },
126 | "outputs": [],
127 | "source": [
128 | "#Let's get info about Vita"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": null,
134 | "metadata": {
135 | "collapsed": false
136 | },
137 | "outputs": [],
138 | "source": [
139 | "soup.find_all(string = 'Vita') #capitalized"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": null,
145 | "metadata": {
146 | "collapsed": false
147 | },
148 | "outputs": [],
149 | "source": [
150 | "soup.find_all(string = 'vita') #lower-case"
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": null,
156 | "metadata": {
157 | "collapsed": false
158 | },
159 | "outputs": [],
160 | "source": [
161 | "type(soup.find_all(string = 'Vita')) #element-tag"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": null,
167 | "metadata": {
168 | "collapsed": false
169 | },
170 | "outputs": [],
171 | "source": [
172 | "len(soup.find_all(string = 'Vita')) #how many results"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": null,
178 | "metadata": {
179 | "collapsed": true
180 | },
181 | "outputs": [],
182 | "source": [
183 | "soup.find_all(string = 'Vita')[0].find_parents()"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": null,
189 | "metadata": {
190 | "collapsed": false
191 | },
192 | "outputs": [],
193 | "source": [
194 | "len(soup.find_all(string = 'Vita')[0].find_parents())"
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "execution_count": null,
200 | "metadata": {
201 | "collapsed": false
202 | },
203 | "outputs": [],
204 | "source": [
205 | "soup.find_all('p',string = 'Vita') #no results"
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": null,
211 | "metadata": {
212 | "collapsed": false
213 | },
214 | "outputs": [],
215 | "source": [
216 | "soup.find_all('a',string = 'Vita')"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": null,
222 | "metadata": {
223 | "collapsed": false
224 | },
225 | "outputs": [],
226 | "source": [
227 | "soup.find_all('a',string = 'Vita')[0].get('href')"
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": null,
233 | "metadata": {
234 | "collapsed": true
235 | },
236 | "outputs": [],
237 | "source": [
238 | "response = requests.get('https://www.gamefaqs.com/vita', headers ={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'})"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": null,
244 | "metadata": {
245 | "collapsed": false
246 | },
247 | "outputs": [],
248 | "source": [
249 | "response.status_code"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": null,
255 | "metadata": {
256 | "collapsed": true
257 | },
258 | "outputs": [],
259 | "source": [
260 | "soup = BeautifulSoup(response.content, 'html.parser')"
261 | ]
262 | },
263 | {
264 | "cell_type": "code",
265 | "execution_count": null,
266 | "metadata": {
267 | "collapsed": true
268 | },
269 | "outputs": [],
270 | "source": [
271 | "soup.get_text(strip = True)"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": null,
277 | "metadata": {
278 | "collapsed": true
279 | },
280 | "outputs": [],
281 | "source": [
282 | "\"Top 10 Games\""
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": null,
288 | "metadata": {
289 | "collapsed": false
290 | },
291 | "outputs": [],
292 | "source": [
293 | "soup.find_all(string = 'Top 10 Games' )"
294 | ]
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": null,
299 | "metadata": {
300 | "collapsed": true
301 | },
302 | "outputs": [],
303 | "source": []
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": null,
308 | "metadata": {
309 | "collapsed": true
310 | },
311 | "outputs": [],
312 | "source": [
313 | "soup.find_all(string = 'Top 10 Games')[0].find_parents()\n",
314 | "\n",
315 | "#Lots of elements"
316 | ]
317 | },
318 | {
319 | "cell_type": "code",
320 | "execution_count": null,
321 | "metadata": {
322 | "collapsed": false
323 | },
324 | "outputs": [],
325 | "source": [
326 | "len(soup.find_all(string = 'Top 10 Games')[0].find_parents())"
327 | ]
328 | },
329 | {
330 | "cell_type": "code",
331 | "execution_count": null,
332 | "metadata": {
333 | "collapsed": false
334 | },
335 | "outputs": [],
336 | "source": [
337 | "soup.find_all(string = 'Top 10 Games')[0].find_parents()[0]"
338 | ]
339 | },
340 | {
341 | "cell_type": "code",
342 | "execution_count": null,
343 | "metadata": {
344 | "collapsed": false
345 | },
346 | "outputs": [],
347 | "source": [
348 | "soup.find_all(string = 'Top 10 Games')[0].find_parents()[1]"
349 | ]
350 | },
351 | {
352 | "cell_type": "code",
353 | "execution_count": null,
354 | "metadata": {
355 | "collapsed": true
356 | },
357 | "outputs": [],
358 | "source": [
359 | "soup.find_all(string = 'Top 10 Games')[0].find_parents()[2]"
360 | ]
361 | },
362 | {
363 | "cell_type": "code",
364 | "execution_count": null,
365 | "metadata": {
366 | "collapsed": false
367 | },
368 | "outputs": [],
369 | "source": [
370 | "top_ten_vita_games =soup.find_all(string = 'Top 10 Games')[0].find_parents()[2]"
371 | ]
372 | },
373 | {
374 | "cell_type": "code",
375 | "execution_count": null,
376 | "metadata": {
377 | "collapsed": false
378 | },
379 | "outputs": [],
380 | "source": [
381 | "type(top_ten_vita_games)"
382 | ]
383 | },
384 | {
385 | "cell_type": "code",
386 | "execution_count": null,
387 | "metadata": {
388 | "collapsed": true
389 | },
390 | "outputs": [],
391 | "source": [
392 | "for i in top_ten_vita_games.find('ol').find_all('li'):\n",
393 | " print(i.get_text().strip())"
394 | ]
395 | },
396 | {
397 | "cell_type": "code",
398 | "execution_count": null,
399 | "metadata": {
400 | "collapsed": true
401 | },
402 | "outputs": [],
403 | "source": [
404 | "for i in top_ten_vita_games.find('ol').find_all('li'):\n",
405 | " print(i.find('a').get_text().strip())"
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": null,
411 | "metadata": {
412 | "collapsed": false
413 | },
414 | "outputs": [],
415 | "source": [
416 | "top_ten_vita_games.find('ol').find_all('li')[1].get_text().strip()\n",
417 | " "
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "execution_count": null,
423 | "metadata": {
424 | "collapsed": false
425 | },
426 | "outputs": [],
427 | "source": [
428 | "for i in top_ten_vita_games.find('ol').find_all('li'):\n",
429 | " print(i.get_text().strip().split())"
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "execution_count": null,
435 | "metadata": {
436 | "collapsed": false
437 | },
438 | "outputs": [],
439 | "source": [
440 | "for i in top_ten_vita_games.find('ol').find_all('li'):\n",
441 | " print(' '.join((i.get_text()).strip().split()))\n",
442 | " \n",
443 | " \n",
444 | " \n",
445 | " \n",
446 | " \n",
447 | " "
448 | ]
449 | },
450 | {
451 | "cell_type": "markdown",
452 | "metadata": {},
453 | "source": [
454 | "# 'Upcoming PlayStation Vita Game Releases'"
455 | ]
456 | },
457 | {
458 | "cell_type": "code",
459 | "execution_count": null,
460 | "metadata": {
461 | "collapsed": true
462 | },
463 | "outputs": [],
464 | "source": [
465 | "Brief Overview of some of these methods:\n",
466 | " \n",
467 | "find() and find_all() \n",
468 | "find_parents() and find_parent()\n",
469 | "find_next_siblings() and find_next_si`bling()\n",
470 | "find_previous_siblings() and find_previous_sibling()\n",
471 | "find_all_next() and find_next()\n",
472 | "find_all_previous() and find_previous()"
473 | ]
474 | },
475 | {
476 | "cell_type": "code",
477 | "execution_count": null,
478 | "metadata": {
479 | "collapsed": true
480 | },
481 | "outputs": [],
482 | "source": [
483 | "#Sample html\n",
484 | "\n",
485 | "\n",
486 | "html1 = '''\n",
487 | "\n",
488 | "\n",
489 | "
HTML Sample Cell\n",
490 | " \n",
491 | "
\n",
492 | "
New York
\n",
493 | "
New York is the home of various cultures and ethnicities.
\n",
494 | "
\n",
495 | "\n",
496 | "
\n",
497 | "
London
\n",
498 | "
London is the home of various cultures and ethnicities.
\n",
499 | "
\n",
500 | "'''"
501 | ]
502 | },
503 | {
504 | "cell_type": "code",
505 | "execution_count": null,
506 | "metadata": {
507 | "collapsed": true
508 | },
509 | "outputs": [],
510 | "source": [
511 | "soup = BeautifulSoup(html1, 'html.parser')"
512 | ]
513 | },
514 | {
515 | "cell_type": "code",
516 | "execution_count": null,
517 | "metadata": {
518 | "collapsed": true
519 | },
520 | "outputs": [],
521 | "source": [
522 | "soup.find_all('div')"
523 | ]
524 | },
525 | {
526 | "cell_type": "code",
527 | "execution_count": null,
528 | "metadata": {
529 | "collapsed": true
530 | },
531 | "outputs": [],
532 | "source": [
533 | "#find parents"
534 | ]
535 | },
536 | {
537 | "cell_type": "code",
538 | "execution_count": null,
539 | "metadata": {
540 | "collapsed": false
541 | },
542 | "outputs": [],
543 | "source": [
544 | "soup.find(string = 'New York').find_parent()"
545 | ]
546 | },
547 | {
548 | "cell_type": "code",
549 | "execution_count": null,
550 | "metadata": {
551 | "collapsed": true
552 | },
553 | "outputs": [],
554 | "source": [
555 | "soup.find(string = 'New York').find_parents()"
556 | ]
557 | },
558 | {
559 | "cell_type": "code",
560 | "execution_count": null,
561 | "metadata": {
562 | "collapsed": false
563 | },
564 | "outputs": [],
565 | "source": [
566 | "#find_siblings\n",
567 | "\n",
568 | "soup.find(string = 'New York').find_next_siblings()\n",
569 | "\n"
570 | ]
571 | },
572 | {
573 | "cell_type": "code",
574 | "execution_count": null,
575 | "metadata": {
576 | "collapsed": false
577 | },
578 | "outputs": [],
579 | "source": [
580 | "soup.find(string = 'New York').find_previous_siblings()"
581 | ]
582 | },
583 | {
584 | "cell_type": "code",
585 | "execution_count": null,
586 | "metadata": {
587 | "collapsed": false
588 | },
589 | "outputs": [],
590 | "source": [
591 | "soup.find(string = 'New York').find_parent().find_next_sibling()"
592 | ]
593 | },
594 | {
595 | "cell_type": "code",
596 | "execution_count": null,
597 | "metadata": {
598 | "collapsed": false
599 | },
600 | "outputs": [],
601 | "source": [
602 | "soup.find(string = 'New York').find_parent().find_previous_siblings()"
603 | ]
604 | },
605 | {
606 | "cell_type": "code",
607 | "execution_count": null,
608 | "metadata": {
609 | "collapsed": true
610 | },
611 | "outputs": [],
612 | "source": [
613 | "soup.find(string = 'New York').find_parent().find_previous_sibling()"
614 | ]
615 | },
616 | {
617 | "cell_type": "code",
618 | "execution_count": null,
619 | "metadata": {
620 | "collapsed": false
621 | },
622 | "outputs": [],
623 | "source": [
624 | "soup.find(string = 'New York').find_parent().find_next_sibling().find_previous_siblings()"
625 | ]
626 | },
627 | {
628 | "cell_type": "code",
629 | "execution_count": null,
630 | "metadata": {
631 | "collapsed": true
632 | },
633 | "outputs": [],
634 | "source": [
635 | "#find next and previous\n",
636 | "find_all_next() and find_next()\n",
637 | "find_all_previous() and find_previous()"
638 | ]
639 | },
640 | {
641 | "cell_type": "code",
642 | "execution_count": null,
643 | "metadata": {
644 | "collapsed": false
645 | },
646 | "outputs": [],
647 | "source": [
648 | "soup.find(string = 'New York').find_next()"
649 | ]
650 | },
651 | {
652 | "cell_type": "code",
653 | "execution_count": null,
654 | "metadata": {
655 | "collapsed": false
656 | },
657 | "outputs": [],
658 | "source": [
659 | "soup.find(string = 'New York').find_next().find_next()"
660 | ]
661 | },
662 | {
663 | "cell_type": "code",
664 | "execution_count": null,
665 | "metadata": {
666 | "collapsed": false
667 | },
668 | "outputs": [],
669 | "source": [
670 | "soup.find(string = 'New York').find_all_next()"
671 | ]
672 | },
673 | {
674 | "cell_type": "code",
675 | "execution_count": null,
676 | "metadata": {
677 | "collapsed": false
678 | },
679 | "outputs": [],
680 | "source": [
681 | "soup.find(string = 'London').find_all_next()"
682 | ]
683 | },
684 | {
685 | "cell_type": "code",
686 | "execution_count": null,
687 | "metadata": {
688 | "collapsed": false
689 | },
690 | "outputs": [],
691 | "source": [
692 | "soup.find(string = 'New York').find_previous() #same as parent"
693 | ]
694 | },
695 | {
696 | "cell_type": "code",
697 | "execution_count": null,
698 | "metadata": {
699 | "collapsed": false
700 | },
701 | "outputs": [],
702 | "source": [
703 | "soup.find(string = 'New York').find_all_previous() #same as all parents"
704 | ]
705 | },
706 | {
707 | "cell_type": "code",
708 | "execution_count": null,
709 | "metadata": {
710 | "collapsed": true
711 | },
712 | "outputs": [],
713 | "source": []
714 | },
715 | {
716 | "cell_type": "markdown",
717 | "metadata": {},
718 | "source": [
719 | "# Let's find UPCOMING VITA RELEASES"
720 | ]
721 | },
722 | {
723 | "cell_type": "code",
724 | "execution_count": 39,
725 | "metadata": {
726 | "collapsed": true
727 | },
728 | "outputs": [],
729 | "source": [
730 | "import requests\n",
731 | "from bs4 import BeautifulSoup"
732 | ]
733 | },
734 | {
735 | "cell_type": "code",
736 | "execution_count": 40,
737 | "metadata": {
738 | "collapsed": true
739 | },
740 | "outputs": [],
741 | "source": [
742 | "response = requests.get('https://www.gamefaqs.com/vita', headers ={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'})"
743 | ]
744 | },
745 | {
746 | "cell_type": "code",
747 | "execution_count": 41,
748 | "metadata": {
749 | "collapsed": false
750 | },
751 | "outputs": [
752 | {
753 | "data": {
754 | "text/plain": [
755 | "200"
756 | ]
757 | },
758 | "execution_count": 41,
759 | "metadata": {},
760 | "output_type": "execute_result"
761 | }
762 | ],
763 | "source": [
764 | "response.status_code"
765 | ]
766 | },
767 | {
768 | "cell_type": "code",
769 | "execution_count": 42,
770 | "metadata": {
771 | "collapsed": true
772 | },
773 | "outputs": [],
774 | "source": [
775 | "soup = BeautifulSoup(response.content, 'html.parser')"
776 | ]
777 | },
778 | {
779 | "cell_type": "code",
780 | "execution_count": 43,
781 | "metadata": {
782 | "collapsed": false
783 | },
784 | "outputs": [
785 | {
786 | "data": {
787 | "text/plain": [
788 | "'Upcoming PlayStation Vita Game Releases'"
789 | ]
790 | },
791 | "execution_count": 43,
792 | "metadata": {},
793 | "output_type": "execute_result"
794 | }
795 | ],
796 | "source": [
797 | "soup.find(string = 'Upcoming PlayStation Vita Game Releases')"
798 | ]
799 | },
800 | {
801 | "cell_type": "code",
802 | "execution_count": null,
803 | "metadata": {
804 | "collapsed": true
805 | },
806 | "outputs": [],
807 | "source": [
808 | "soup.find(string = 'Upcoming PlayStation Vita Game Releases').find_parents()"
809 | ]
810 | },
811 | {
812 | "cell_type": "code",
813 | "execution_count": 7,
814 | "metadata": {
815 | "collapsed": false
816 | },
817 | "outputs": [
818 | {
819 | "data": {
820 | "text/plain": [
821 | "11"
822 | ]
823 | },
824 | "execution_count": 7,
825 | "metadata": {},
826 | "output_type": "execute_result"
827 | }
828 | ],
829 | "source": [
830 | "len(soup.find(string = 'Upcoming PlayStation Vita Game Releases').find_parents())"
831 | ]
832 | },
833 | {
834 | "cell_type": "code",
835 | "execution_count": null,
836 | "metadata": {
837 | "collapsed": true
838 | },
839 | "outputs": [],
840 | "source": [
841 | "for i in range(11):\n",
842 | " \n",
843 | " print(soup.find(string = 'Upcoming PlayStation Vita Game Releases').find_parents()[i])\n",
844 | " print('\\n***************************************************')"
845 | ]
846 | },
847 | {
848 | "cell_type": "code",
849 | "execution_count": null,
850 | "metadata": {
851 | "collapsed": true
852 | },
853 | "outputs": [],
854 | "source": []
855 | },
856 | {
857 | "cell_type": "code",
858 | "execution_count": null,
859 | "metadata": {
860 | "collapsed": true
861 | },
862 | "outputs": [],
863 | "source": [
864 | "soup.find(string = 'Upcoming PlayStation Vita Game Releases').find_parents()"
865 | ]
866 | },
867 | {
868 | "cell_type": "code",
869 | "execution_count": null,
870 | "metadata": {
871 | "collapsed": true
872 | },
873 | "outputs": [],
874 | "source": [
875 | "soup.find(string = 'Upcoming PlayStation Vita Game Releases').find_parents()[2]"
876 | ]
877 | },
878 | {
879 | "cell_type": "code",
880 | "execution_count": 45,
881 | "metadata": {
882 | "collapsed": true
883 | },
884 | "outputs": [],
885 | "source": [
886 | "North_America_games = soup.find(string = 'Upcoming PlayStation Vita Game Releases').find_parents()[2]"
887 | ]
888 | },
889 | {
890 | "cell_type": "code",
891 | "execution_count": null,
892 | "metadata": {
893 | "collapsed": true
894 | },
895 | "outputs": [],
896 | "source": [
897 | "North_America_games"
898 | ]
899 | },
900 | {
901 | "cell_type": "code",
902 | "execution_count": 47,
903 | "metadata": {
904 | "collapsed": false
905 | },
906 | "outputs": [
907 | {
908 | "name": "stdout",
909 | "output_type": "stream",
910 | "text": [
911 | "07/18\n",
912 | "99Vidas\n",
913 | "Fallen Legion: Flames of Rebellion\n",
914 | "07/28\n",
915 | "Collar x Malice\n",
916 | "08/11\n",
917 | "Drive Girls\n",
918 | "08/15\n",
919 | "Summon Night 6: Lost Borders\n",
920 | "08/29\n",
921 | "WindJammers\n",
922 | "08/31\n",
923 | "Mary Skelter: Nightmares\n",
924 | "09/12\n",
925 | "Ys VIII: Lacrimosa of DANA\n",
926 | "09/26\n",
927 | "Danganronpa V3: Killing Harmony\n",
928 | "09/29\n",
929 | "Bad Apple Wars\n",
930 | "10/10\n",
931 | "Touhou Kobuto V: Burst Battle\n",
932 | "10/24\n",
933 | "Yomawari: Midnight Shadows\n",
934 | "11/30\n",
935 | "Tokyo Tattoo Girls\n",
936 | "Utawarerumono: Mask of Truth\n",
937 | "03/20\n",
938 | "Penny-Punching Princess\n"
939 | ]
940 | }
941 | ],
942 | "source": [
943 | "for i in North_America_games.find_all('dl')[:-1]:\n",
944 | " print (i.find('dt').get_text())\n",
945 | " for games in (i.find_all('dd')):\n",
946 | " print(games.get_text())"
947 | ]
948 | },
949 | {
950 | "cell_type": "code",
951 | "execution_count": null,
952 | "metadata": {
953 | "collapsed": true
954 | },
955 | "outputs": [],
956 | "source": []
957 | },
958 | {
959 | "cell_type": "code",
960 | "execution_count": null,
961 | "metadata": {
962 | "collapsed": true
963 | },
964 | "outputs": [],
965 | "source": [
966 | "soup.find(string = 'Japan').find_all_next()"
967 | ]
968 | },
969 | {
970 | "cell_type": "code",
971 | "execution_count": null,
972 | "metadata": {
973 | "collapsed": true
974 | },
975 | "outputs": [],
976 | "source": [
977 | "len(soup.find(string = 'Japan').find_all_next())"
978 | ]
979 | },
980 | {
981 | "cell_type": "code",
982 | "execution_count": 22,
983 | "metadata": {
984 | "collapsed": false
985 | },
986 | "outputs": [
987 | {
988 | "data": {
989 | "text/plain": [
990 | "[
See Also...
,
North America
,
Japan
,
Europe
]"
991 | ]
992 | },
993 | "execution_count": 22,
994 | "metadata": {},
995 | "output_type": "execute_result"
996 | }
997 | ],
998 | "source": [
999 | "soup.select('h3')"
1000 | ]
1001 | },
1002 | {
1003 | "cell_type": "code",
1004 | "execution_count": 28,
1005 | "metadata": {
1006 | "collapsed": false
1007 | },
1008 | "outputs": [
1009 | {
1010 | "data": {
1011 | "text/plain": [
1012 | "
Dragon Ball Z: Battle of Z (Welcome Price!!)"
1013 | ]
1014 | },
1015 | "execution_count": 28,
1016 | "metadata": {},
1017 | "output_type": "execute_result"
1018 | }
1019 | ],
1020 | "source": [
1021 | "soup.select('h3')[2].find_next().find_next().find_next().find_next()"
1022 | ]
1023 | },
1024 | {
1025 | "cell_type": "code",
1026 | "execution_count": 34,
1027 | "metadata": {
1028 | "collapsed": true
1029 | },
1030 | "outputs": [],
1031 | "source": [
1032 | "append_list =[]\n",
1033 | "x =soup.select('h3')[2]\n",
1034 | "\n",
1035 | "while str(x) != '
Europe
':\n",
1036 | " x = x.find_next()\n",
1037 | " append_list.append(x.get_text())\n",
1038 | " \n",
1039 | " "
1040 | ]
1041 | },
1042 | {
1043 | "cell_type": "code",
1044 | "execution_count": null,
1045 | "metadata": {
1046 | "collapsed": true
1047 | },
1048 | "outputs": [],
1049 | "source": []
1050 | },
1051 | {
1052 | "cell_type": "code",
1053 | "execution_count": 36,
1054 | "metadata": {
1055 | "collapsed": false
1056 | },
1057 | "outputs": [
1058 | {
1059 | "name": "stdout",
1060 | "output_type": "stream",
1061 | "text": [
1062 | "07/20\n",
1063 | "Dragon Ball Z: Battle of Z (Welcome Price!!)\n",
1064 | "Farming Simulator 18: Pocket Nouen 4\n",
1065 | "HimeHibi: Princess Days\n",
1066 | "Tokyo Ghoul: Jail (Welcome Price!!)\n",
1067 | "Under Night In-Birth Exe:Late[st]\n",
1068 | "World Election\n",
1069 | "07/21\n",
1070 | "Onigiri\n",
1071 | "07/27\n",
1072 | "Grisaia no Kajitsu: Side Episode\n",
1073 | "Hiiro no Kakera: Omoi Iro no Kioku\n",
1074 | "Kenka Bancho Otome: Kanzenmuketsu no My Honey\n",
1075 | "Utsusemi no Mawari\n",
1076 | "Wagamama High Spec\n",
1077 | "08/24\n",
1078 | "Futagoza no Paradox\n",
1079 | "Starry * Sky: Autumn Stories\n",
1080 | "Taishou Mebiusline: Teito Bibouroku Hare\n",
1081 | "The Lost Child\n",
1082 | "Yomawari: Midnight Shadows\n",
1083 | "08/31\n",
1084 | "Crank In\n",
1085 | "Dungeon Travelers 2: Ouritsu Toshokan to Mamono no Fuuin (Aqua...\n",
1086 | "To Heart 2: Dungeon Travelers (Aquaprice 2800)\n"
1087 | ]
1088 | }
1089 | ],
1090 | "source": [
1091 | "append_list\n",
1092 | "from collections import OrderedDict\n",
1093 | "list(OrderedDict.fromkeys(append_list))\n",
1094 | "\n",
1095 | "for i in list(OrderedDict.fromkeys(append_list))[:-2]:\n",
1096 | " print(i)\n"
1097 | ]
1098 | },
1099 | {
1100 | "cell_type": "code",
1101 | "execution_count": null,
1102 | "metadata": {
1103 | "collapsed": true
1104 | },
1105 | "outputs": [],
1106 | "source": [
1107 | "list(OrderedDict.fromkeys(append_list))"
1108 | ]
1109 | },
1110 | {
1111 | "cell_type": "code",
1112 | "execution_count": null,
1113 | "metadata": {
1114 | "collapsed": true
1115 | },
1116 | "outputs": [],
1117 | "source": [
1118 | "\n",
1119 | "\n",
1120 | "\n",
1121 | "###THE END####"
1122 | ]
1123 | }
1124 | ],
1125 | "metadata": {
1126 | "kernelspec": {
1127 | "display_name": "Python 3",
1128 | "language": "python",
1129 | "name": "python3"
1130 | },
1131 | "language_info": {
1132 | "codemirror_mode": {
1133 | "name": "ipython",
1134 | "version": 3
1135 | },
1136 | "file_extension": ".py",
1137 | "mimetype": "text/x-python",
1138 | "name": "python",
1139 | "nbconvert_exporter": "python",
1140 | "pygments_lexer": "ipython3",
1141 | "version": "3.6.0"
1142 | }
1143 | },
1144 | "nbformat": 4,
1145 | "nbformat_minor": 1
1146 | }
1147 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Gentle-Intro-to-Web-Scraping
2 | This is the code to my Gentle Intro to Web Scraping series. Enjoy!
3 |
--------------------------------------------------------------------------------