├── runtime.txt ├── Procfile ├── requirements.txt ├── sample_docs ├── beautifulSoup_samples │ ├── helloWorld.py │ ├── findAllLinks.py │ ├── basicTagAtt.py │ ├── testeLocalUnico.py │ ├── findElementByClass.py │ ├── testeLocalVarios.py │ └── testeAdoroCinema.py ├── flask_samples │ └── getApi.py ├── emcartazUnico.html └── emcartazVarios.html ├── settings └── config.json ├── .vscode └── tasks.json ├── test.py ├── README.md ├── web └── flask │ ├── estreiasCinema.py │ └── filmesCartazCinema.py └── web.py /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.6.5 -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: python web.py 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.5.3 2 | Flask==0.12 -------------------------------------------------------------------------------- /sample_docs/beautifulSoup_samples/helloWorld.py: -------------------------------------------------------------------------------- 1 | print("hello world 2") -------------------------------------------------------------------------------- /settings/config.json: -------------------------------------------------------------------------------- 1 | { 2 | // for the documentation about the tasks.json format 3 | "template": "adoroCinema.json", 4 | "urlOrigem": "http://www.adorocinema.com/filmes/em-cartaz/estreias/" 5 | } -------------------------------------------------------------------------------- /sample_docs/beautifulSoup_samples/findAllLinks.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import urllib.request 3 | soup = BeautifulSoup(urllib.request.urlopen("http://google.com").read()) 4 | 5 | for link in soup.find_all('a'): 6 | print(link.get('href')) -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | // See https://go.microsoft.com/fwlink/?LinkId=733558 3 | // for the documentation about the tasks.json format 4 | "version": "0.1.0", 5 | "command": "python", 6 | "isShellCommand": true, 7 | "args": ["${file}"], 8 | "showOutput": "always" 9 | } -------------------------------------------------------------------------------- /sample_docs/beautifulSoup_samples/basicTagAtt.py: -------------------------------------------------------------------------------- 1 | #1 2 | from bs4 import BeautifulSoup 3 | #2 4 | import urllib.request 5 | #3 6 | soup = BeautifulSoup('Extremely bold',"html.parser") 7 | #4 8 | tag = soup.b 9 | #5 10 | print(tag['class']) 11 | 12 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, jsonify, request 2 | from bs4 import BeautifulSoup 3 | from urllib.request import urlopen, Request 4 | import os 5 | 6 | URL = "http://www.adorocinema.com/filmes/todos-filmes/notas-espectadores/" 7 | 8 | html_doc = urlopen(URL).read() 9 | soup = BeautifulSoup(html_doc, "html.parser") 10 | data = [] 11 | for dataBox in soup.find_all("div", class_="data_box"): 12 | content = dataBox.find("div", class_="content") 13 | print(content.text.strip().split('Gênero')[1][100:]) 14 | -------------------------------------------------------------------------------- /sample_docs/flask_samples/getApi.py: -------------------------------------------------------------------------------- 1 | #PARTE1 2 | from flask import Flask, jsonify, request 3 | from bs4 import BeautifulSoup 4 | from urllib.request import urlopen, Request 5 | import os 6 | 7 | #PARTE2 8 | app = Flask(__name__) 9 | 10 | #PARTE3 11 | @app.route('/api/v1/filmes', methods=['GET']) 12 | def filmes2(): 13 | #MEU CÒDIGO AQUI 14 | return "Tudo pronto!" 15 | 16 | #PARTE4 17 | if __name__ == '__main__': 18 | # Bind to PORT if defined, otherwise default to 5000. 19 | port = int(os.environ.get('PORT', 5000)) 20 | # Tem que ser 0.0.0.0 para rodar no Heroku 21 | app.run(host='127.0.0.1', port=port) -------------------------------------------------------------------------------- /sample_docs/beautifulSoup_samples/testeLocalUnico.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import urllib.request 3 | html = urllib.request.urlopen("file:///C:/Users/Rodrigo/Desktop/helloWordPy/emcartazUnico.html").read() 4 | soup = BeautifulSoup(html, "html.parser") 5 | 6 | #RETORNA NOME DO FILME 7 | resp = soup.find("h2", class_="tt_18 d_inline").find("a", class_="no_underline") 8 | print("Nome: " + resp.text.strip()) 9 | 10 | #RETORNA IMAGEM 11 | resp = soup.find(class_="img_side_content") 12 | print("Imagem: " + resp.img['src'].strip()) 13 | 14 | #RETORNA SINOPSE 15 | resp = soup.find("div", class_="content").find("p") 16 | print("Sinopse: " + resp.text.strip()) 17 | 18 | #RETORNA DATA LANÇAMENTO 19 | resp = soup.find("ul", class_="list_item_p2v tab_col_first").find("div", class_="oflow_a") 20 | print("Data lancamento: " + resp.text.strip()) -------------------------------------------------------------------------------- /sample_docs/beautifulSoup_samples/findElementByClass.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import urllib.request 3 | #html = urllib.request.urlopen("http://google.com").read() 4 | html = """ 5 | The Dormouse's story 6 | 7 |

The Dormouse's story

8 | 9 |

Once upon a time there were three little sisters; and their names were 10 | Elsie, 11 | Lacie and 12 | Tillie; 13 | and they lived at the bottom of a well.

14 | 15 |

...

16 | """ 17 | soup = BeautifulSoup(html, "html.parser") 18 | #soup = BeautifulSoup('
Extremely bold
',"html.parser") 19 | resp = soup.find(class_="story").find(id="link2") 20 | 21 | print(resp) 22 | 23 | -------------------------------------------------------------------------------- /sample_docs/beautifulSoup_samples/testeLocalVarios.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import urllib.request 3 | html = urllib.request.urlopen("file:///C:/Users/Rodrigo/Desktop/helloWordPy/emcartazVarios.html").read() 4 | soup = BeautifulSoup(html, "html.parser") 5 | 6 | #PERCORRE TODOS DATABOX 7 | for dataBox in soup.find_all("div",class_="data_box"): 8 | #RETORNA NOME DO FILME 9 | resp = dataBox.find("h2", class_="tt_18 d_inline").find("a", class_="no_underline") 10 | print("Nome: " + resp.text.strip()) 11 | 12 | #RETORNA IMAGEM 13 | resp = dataBox.find(class_="img_side_content") 14 | print("Imagem: " + resp.img['src'].strip()) 15 | 16 | #RETORNA SINOPSE 17 | resp = dataBox.find("div", class_="content").find("p") 18 | print("Sinopse: " + resp.text.strip()) 19 | 20 | #RETORNA DATA LANÇAMENTO 21 | resp = dataBox.find("ul", class_="list_item_p2v tab_col_first").find("div", class_="oflow_a") 22 | print("Data lancamento: " + resp.text.strip()) 23 | print(" ") 24 | -------------------------------------------------------------------------------- /sample_docs/beautifulSoup_samples/testeAdoroCinema.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import urllib.request 3 | 4 | html = urllib.request.urlopen("http://www.adorocinema.com/filmes/numero-cinemas/").read() 5 | soup = BeautifulSoup(html, "html.parser") 6 | print(html) 7 | 8 | #PERCORRE TODOS DATABOX 9 | for dataBox in soup.find_all("div", class_="card card-entity card-entity-list cf hred"): 10 | #RETORNA NOME DO FILME 11 | resp = dataBox.find("h2", class_="meta-title") 12 | print("Nome: " + resp.text.strip()) 13 | 14 | #RETORNA IMAGEM !!!!!!!!!!! 15 | resp = dataBox.find(class_="thumbnail ") 16 | print("Imagem: " + resp.img['data-src'].strip()) 17 | 18 | # #RETORNA SINOPSE 19 | resp = dataBox.find("div", class_="synopsis") 20 | print("Sinopse: " + resp.text.strip()) 21 | 22 | # #RETORNA DATA LANÇAMENTO 23 | resp = dataBox.find(class_="meta-body").find(class_="meta-body-item meta-body-info") 24 | print("Data lancamento: " + resp.text[1:23].strip().replace('/',' ')) 25 | print(" ") 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Como funciona? 2 | ----------------- 3 | 4 | 1. Acesse o arquivo /web/flask/filmesCartazCinema.py 5 | 2. Execute com python e acesse o endereço local no seu PC: http://127.0.0.1:5000/ 6 | 3. É mostrada a lista em formato JSON com base na URL do site AdoroCinema em: http://www.adorocinema.com/filmes/numero-cinemas/ 7 | 4. A API pública pode ser visualizada aqui: https://filmespy.herokuapp.com/api/v1/filmes 8 | 9 | Estrutura do retorno em JSON 10 | ----------------- 11 | 12 | [{ 13 | "sinopse": "Em 2029, Logan (Hugh Jackman) ganha a vida como chofer de limousine para cuidar do nonagen\u00e1rio Charles Xavier (Patrick Stewart). Debilitado...", 14 | "nome": "Logan", 15 | "data": "02/03/2017\n(2h17min)", 16 | "poster": "http://br.web.img1.acsta.net/cx_160_213/b_1_d6d6d6/pictures/16/10/05/19/59/363613.jpg" 17 | }, 18 | 19 | { 20 | "sinopse": "Moradora de uma pequena aldeia francesa, Bela (Emma Watson) tem o pai capturado pela Fera (Dan Stevens) e decide entregar sua vida ao estranho ser...", 21 | "nome": "A Bela e a Fera", 22 | "data": "16/03/2017\n(2h9min)", 23 | "poster": "http://br.web.img1.acsta.net/cx_160_213/b_1_d6d6d6/pictures/17/01/09/12/22/442219.jpg" 24 | }] 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /web/flask/estreiasCinema.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from bs4 import BeautifulSoup 3 | import urllib.request 4 | import html 5 | import json 6 | 7 | app = Flask(__name__) 8 | 9 | @app.route("/") 10 | def summary(): 11 | html_doc = urllib.request.urlopen("http://www.adorocinema.com/filmes/em-cartaz/estreias/").read() 12 | soup = BeautifulSoup(html_doc, "html.parser") 13 | 14 | data = [] 15 | for dataBox in soup.find_all("div",class_="data_box"): 16 | nomeObj = dataBox.find("h2", class_="tt_18 d_inline").find("a", class_="no_underline") 17 | imgObj = dataBox.find(class_="img_side_content") 18 | sinopseObj = dataBox.find("div", class_="content").find("p") 19 | dataObj = dataBox.find("ul", class_="list_item_p2v tab_col_first").find("div", class_="oflow_a") 20 | 21 | data.append({ 'nome': nomeObj.text.strip(), 22 | 'poster' : imgObj.img['src'].strip(), 23 | 'sinopse' : sinopseObj.text.strip(), 24 | 'data' : dataObj.text.strip()}) 25 | 26 | response = app.response_class( 27 | response=json.dumps(data), 28 | status=200, 29 | mimetype='application/json' 30 | ) 31 | return response 32 | 33 | if __name__ == "__main__": 34 | app.run() -------------------------------------------------------------------------------- /web/flask/filmesCartazCinema.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, jsonify 2 | from bs4 import BeautifulSoup 3 | import urllib.request 4 | import html 5 | import json 6 | 7 | 8 | app = Flask(__name__) 9 | 10 | @app.route('/api/v1/filmes', methods=['GET']) 11 | def filmes(): 12 | html_doc = urllib.request.urlopen("http://www.adorocinema.com/filmes/numero-cinemas/").read() 13 | soup = BeautifulSoup(html_doc, "html.parser") 14 | 15 | data = [] 16 | for dataBox in soup.find_all("div",class_="data_box"): 17 | nomeObj = dataBox.find("h2", class_="tt_18 d_inline").find("a", class_="no_underline") 18 | imgObj = dataBox.find(class_="img_side_content") 19 | sinopseObj = dataBox.find("div", class_="content").find("p") 20 | dataObj = dataBox.find("ul", class_="list_item_p2v tab_col_first").find("div", class_="oflow_a") 21 | 22 | data.append({ 'nome': nomeObj.text.strip(), 23 | 'poster' : imgObj.img['src'].strip(), 24 | 'sinopse' : sinopseObj.text.strip(), 25 | 'data' : dataObj.text.strip()}) 26 | 27 | # response = app.response_class( 28 | # response=json.dumps(data), 29 | # status=200, 30 | # mimetype='application/json' 31 | # ) 32 | # return response 33 | 34 | return jsonify({'filmes': data}) 35 | 36 | if __name__ == "__main__": 37 | app.run(debug=True) -------------------------------------------------------------------------------- /sample_docs/emcartazUnico.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 |
6 | 7 | A Bela e a Fera 8 | 9 |
10 |
11 | 12 | 1. 13 | 14 |

15 | 16 | A Bela e a Fera 17 | 18 |

19 |
20 | 50 |

51 | Moradora de uma pequena aldeia francesa, Bela (Emma Watson) tem o pai capturado pela Fera (Dan Stevens) e decide entregar sua vida ao estranho ser... 52 |

53 |
54 | 55 | 56 | 57 | AdoroCinema 58 | 59 | 60 |   4,0 62 | 63 | 64 | 65 | Imprensa 66 | 67 | 68 |   3,3 70 | 71 | 72 | 73 | Leitores 74 | 75 | 76 |   4,3 78 | 79 |
80 | 81 | 82 | Ver o trailer 83 | 84 | 90 |
91 |
92 |
93 | 94 | -------------------------------------------------------------------------------- /web.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, jsonify, request, redirect 2 | from bs4 import BeautifulSoup 3 | from urllib.request import urlopen, Request 4 | #from urllib2 import urlopen 5 | import os 6 | 7 | app = Flask(__name__) 8 | 9 | @app.route('/api/v1/filmes', methods=['GET']) 10 | def filmes(): 11 | URL = "http://www.adorocinema.com/filmes/todos-filmes/notas-espectadores/" 12 | 13 | html_doc = urlopen(URL).read() 14 | soup = BeautifulSoup(html_doc, "html.parser") 15 | data = [] 16 | for dataBox in soup.find_all("div", class_="data_box"): 17 | titleObj = dataBox.find("a", class_="no_underline") 18 | imgObj = dataBox.find(class_="img_side_content").find_all(class_="acLnk")[0] 19 | sinopseObj = dataBox.find("div", class_="content").find_all("p")[0] 20 | dateObj = dataBox.find("div", class_="content").find("div", class_="oflow_a") 21 | movieLinkObj = dataBox.find(class_="img_side_content").find_all("a")[0] 22 | generoObj = dataBox.find("div", class_="content").find_all('li')[3].find('div',class_="oflow_a") 23 | detailsLink = 'http://www.adorocinema.com' + movieLinkObj.attrs['href'] 24 | 25 | #LOAD FULL SINOPSE 26 | htmldocMovieDetail = urlopen(detailsLink).read() 27 | soupMovieDetail = BeautifulSoup(htmldocMovieDetail, "html.parser") 28 | fullSinopse = soupMovieDetail.find(class_="content-txt") 29 | fullImgObj = soupMovieDetail.find("meta", property="og:image") 30 | 31 | data.append({'titulo': titleObj.text.strip(), 32 | 'genero': generoObj.text.replace('\n','').strip(), 33 | 'poster' : fullImgObj["content"], 34 | 'sinopse' : sinopseObj.text.strip(), 35 | 'data' : dateObj.text[0:11].strip(), 36 | 'link' : detailsLink, 37 | 'sinopseFull': fullSinopse.text}) 38 | 39 | return jsonify({'filmes': data}) 40 | 41 | @app.route('/api/v1/filmes/', methods=['GET']) 42 | def NotasEspectadores(page_id): 43 | URL = "http://www.adorocinema.com/filmes/todos-filmes/notas-espectadores/?page={}".format(page_id) 44 | 45 | html_doc = urlopen(URL).read() 46 | soup = BeautifulSoup(html_doc, "html.parser") 47 | data = [] 48 | for dataBox in soup.find_all("div", class_="data_box"): 49 | titleObj = dataBox.find("a", class_="no_underline") 50 | imgObj = dataBox.find(class_="img_side_content").find_all(class_="acLnk")[0] 51 | sinopseObj = dataBox.find("div", class_="content").find_all("p")[0] 52 | dateObj = dataBox.find("div", class_="content").find("div", class_="oflow_a") 53 | movieLinkObj = dataBox.find(class_="img_side_content").find_all("a")[0] 54 | generoObj = dataBox.find("div", class_="content").find_all('li')[3].find('div',class_="oflow_a") 55 | detailsLink = 'http://www.adorocinema.com' + movieLinkObj.attrs['href'] 56 | 57 | #LOAD FULL SINOPSE 58 | htmldocMovieDetail = urlopen(detailsLink).read() 59 | soupMovieDetail = BeautifulSoup(htmldocMovieDetail, "html.parser") 60 | fullSinopse = soupMovieDetail.find(class_="content-txt") 61 | fullImgObj = soupMovieDetail.find("meta", property="og:image") 62 | 63 | data.append({'titulo': titleObj.text.strip(), 64 | 'genero': generoObj.text.replace('\n','').strip(), 65 | 'poster' : fullImgObj["content"], 66 | 'sinopse' : sinopseObj.text.strip(), 67 | 'data' : dateObj.text[0:11].strip(), 68 | 'link' : detailsLink, 69 | 'sinopseFull': fullSinopse.text}) 70 | 71 | return jsonify({'filmes': data}) 72 | 73 | @app.route('/api/v1/filmes/emcartaz', methods=['GET']) 74 | def EmCartaz(): 75 | html_doc = urlopen("http://www.adorocinema.com/filmes/numero-cinemas/").read() 76 | soup = BeautifulSoup(html_doc, "html.parser") 77 | 78 | data = [] 79 | for dataBox in soup.find_all("div", class_="card card-entity card-entity-list cf"): 80 | nomeObj = dataBox.find("h2", class_="meta-title") 81 | imgObj = dataBox.find(class_="thumbnail ") 82 | sinopseObj = dataBox.find("div", class_="synopsis") 83 | dataObj = dataBox.find(class_="meta-body").find(class_="meta-body-item meta-body-info") 84 | movieLinkObj = dataBox.find(class_="meta-title-link") 85 | detailsLink = 'http://www.adorocinema.com' + movieLinkObj.attrs['href'] 86 | 87 | #LOAD FULL SINOPSE 88 | htmldocMovieDetail = urlopen(detailsLink).read() 89 | soupMovieDetail = BeautifulSoup(htmldocMovieDetail, "html.parser") 90 | fullSinopse = soupMovieDetail.find(class_="content-txt") 91 | 92 | data.append({ 'nome': nomeObj.text.strip(), 93 | 'poster' : imgObj.img['data-src'].strip(), 94 | 'sinopse' : sinopseObj.text.strip(), 95 | 'data' : dataObj.text[1:23].strip().replace('/',' '), 96 | 'link' : detailsLink, 97 | 'sinopseFull': fullSinopse.text}) 98 | 99 | return jsonify({'filmes': data}) 100 | 101 | @app.route('/api/v1/filmes/') 102 | def filmes_api_page(page_id): 103 | 104 | if int(page_id) > 6: 105 | return redirect("http://python--bergpb.c9users.io/api/v1/filmes", code=200) 106 | 107 | else: 108 | url = "http://www.adorocinema.com/filmes/numero-cinemas/?page={}".format(page_id) 109 | 110 | html_doc = urlopen(url).read() 111 | soup = BeautifulSoup(html_doc, "html.parser") 112 | 113 | data = [] 114 | for dataBox in soup.find_all("div", class_="card card-entity card-entity-list cf"): 115 | nomeObj = dataBox.find("h2", class_="meta-title") 116 | imgObj = dataBox.find(class_="thumbnail ") 117 | sinopseObj = dataBox.find("div", class_="synopsis") 118 | dataObj = dataBox.find(class_="meta-body").find(class_="meta-body-item meta-body-info") 119 | 120 | data.append({ 'nome': nomeObj.text.strip(), 121 | 'poster' : imgObj.img['data-src'].strip(), 122 | 'sinopse' : sinopseObj.text.strip(), 123 | 'data' : dataObj.text[1:23].strip().replace('/',' ')}) 124 | 125 | return jsonify({'filmes': data}) 126 | 127 | if __name__ == '__main__': 128 | port = int(os.environ.get('PORT', 5000)) 129 | app.run(debug=True, host='0.0.0.0', port=port) 130 | -------------------------------------------------------------------------------- /sample_docs/emcartazVarios.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 |
7 |
8 | 9 | A Bela e a Fera 10 | 11 |
12 |
13 | 14 | 1. 15 | 16 |

17 | 18 | A Bela e a Fera 19 | 20 |

21 |
22 | 52 |

53 | Moradora de uma pequena aldeia francesa, Bela (Emma Watson) tem o pai capturado pela Fera (Dan Stevens) e decide entregar sua vida ao estranho ser... 54 |

55 |
56 | 57 | 58 | 59 | AdoroCinema 60 | 61 | 62 |   4,0 64 | 65 | 66 | 67 | Imprensa 68 | 69 | 70 |   3,3 72 | 73 | 74 | 75 | Leitores 76 | 77 | 78 |   4,3 80 | 81 |
82 | 83 | 84 | Ver o trailer 85 | 86 | 92 |
93 |
94 |
95 | 96 | 97 |
98 |
99 | 100 | Kong: A Ilha da Caveira 102 | 103 |
104 |
105 | 106 | 3. 107 | 108 |

109 | 110 | Kong: A Ilha da Caveira 111 | 112 |

113 |
114 | 115 | 145 |

146 | 1944, durante a Segunda Guerra Mundial. Dois aviões, um americano e outro japonês, são abatidos em pleno combate aéreo. Os 147 | pilotos sobrevivem,... 148 |

149 |
150 | 151 | 152 | 153 | AdoroCinema 154 | 155 | 156 |   2,0 158 | 159 | 160 | 161 | Imprensa 162 | 163 | 164 |   3,1 166 | 167 | 168 | 169 | Leitores 170 | 171 | 172 |   4,0 174 | 175 |
176 | 177 | Ver o trailer 178 | 179 | 185 |
186 | 187 |
188 | 189 |
190 |
191 | 192 | --------------------------------------------------------------------------------