├── runtime.txt
├── Procfile
├── requirements.txt
├── sample_docs
├── beautifulSoup_samples
│ ├── helloWorld.py
│ ├── findAllLinks.py
│ ├── basicTagAtt.py
│ ├── testeLocalUnico.py
│ ├── findElementByClass.py
│ ├── testeLocalVarios.py
│ └── testeAdoroCinema.py
├── flask_samples
│ └── getApi.py
├── emcartazUnico.html
└── emcartazVarios.html
├── settings
└── config.json
├── .vscode
└── tasks.json
├── test.py
├── README.md
├── web
└── flask
│ ├── estreiasCinema.py
│ └── filmesCartazCinema.py
└── web.py
/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.6.5
--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: python web.py
2 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.5.3
2 | Flask==0.12
--------------------------------------------------------------------------------
/sample_docs/beautifulSoup_samples/helloWorld.py:
--------------------------------------------------------------------------------
1 | print("hello world 2")
--------------------------------------------------------------------------------
/settings/config.json:
--------------------------------------------------------------------------------
1 | {
2 | // for the documentation about the tasks.json format
3 | "template": "adoroCinema.json",
4 | "urlOrigem": "http://www.adorocinema.com/filmes/em-cartaz/estreias/"
5 | }
--------------------------------------------------------------------------------
/sample_docs/beautifulSoup_samples/findAllLinks.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | import urllib.request
3 | soup = BeautifulSoup(urllib.request.urlopen("http://google.com").read())
4 |
5 | for link in soup.find_all('a'):
6 | print(link.get('href'))
--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
1 | {
2 | // See https://go.microsoft.com/fwlink/?LinkId=733558
3 | // for the documentation about the tasks.json format
4 | "version": "0.1.0",
5 | "command": "python",
6 | "isShellCommand": true,
7 | "args": ["${file}"],
8 | "showOutput": "always"
9 | }
--------------------------------------------------------------------------------
/sample_docs/beautifulSoup_samples/basicTagAtt.py:
--------------------------------------------------------------------------------
1 | #1
2 | from bs4 import BeautifulSoup
3 | #2
4 | import urllib.request
5 | #3
6 | soup = BeautifulSoup('Extremely bold',"html.parser")
7 | #4
8 | tag = soup.b
9 | #5
10 | print(tag['class'])
11 |
12 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, jsonify, request
2 | from bs4 import BeautifulSoup
3 | from urllib.request import urlopen, Request
4 | import os
5 |
6 | URL = "http://www.adorocinema.com/filmes/todos-filmes/notas-espectadores/"
7 |
8 | html_doc = urlopen(URL).read()
9 | soup = BeautifulSoup(html_doc, "html.parser")
10 | data = []
11 | for dataBox in soup.find_all("div", class_="data_box"):
12 | content = dataBox.find("div", class_="content")
13 | print(content.text.strip().split('Gênero')[1][100:])
14 |
--------------------------------------------------------------------------------
/sample_docs/flask_samples/getApi.py:
--------------------------------------------------------------------------------
1 | #PARTE1
2 | from flask import Flask, jsonify, request
3 | from bs4 import BeautifulSoup
4 | from urllib.request import urlopen, Request
5 | import os
6 |
7 | #PARTE2
8 | app = Flask(__name__)
9 |
10 | #PARTE3
11 | @app.route('/api/v1/filmes', methods=['GET'])
12 | def filmes2():
13 | #MEU CÒDIGO AQUI
14 | return "Tudo pronto!"
15 |
16 | #PARTE4
17 | if __name__ == '__main__':
18 | # Bind to PORT if defined, otherwise default to 5000.
19 | port = int(os.environ.get('PORT', 5000))
20 | # Tem que ser 0.0.0.0 para rodar no Heroku
21 | app.run(host='127.0.0.1', port=port)
--------------------------------------------------------------------------------
/sample_docs/beautifulSoup_samples/testeLocalUnico.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | import urllib.request
3 | html = urllib.request.urlopen("file:///C:/Users/Rodrigo/Desktop/helloWordPy/emcartazUnico.html").read()
4 | soup = BeautifulSoup(html, "html.parser")
5 |
6 | #RETORNA NOME DO FILME
7 | resp = soup.find("h2", class_="tt_18 d_inline").find("a", class_="no_underline")
8 | print("Nome: " + resp.text.strip())
9 |
10 | #RETORNA IMAGEM
11 | resp = soup.find(class_="img_side_content")
12 | print("Imagem: " + resp.img['src'].strip())
13 |
14 | #RETORNA SINOPSE
15 | resp = soup.find("div", class_="content").find("p")
16 | print("Sinopse: " + resp.text.strip())
17 |
18 | #RETORNA DATA LANÇAMENTO
19 | resp = soup.find("ul", class_="list_item_p2v tab_col_first").find("div", class_="oflow_a")
20 | print("Data lancamento: " + resp.text.strip())
--------------------------------------------------------------------------------
/sample_docs/beautifulSoup_samples/findElementByClass.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | import urllib.request
3 | #html = urllib.request.urlopen("http://google.com").read()
4 | html = """
5 |
The Dormouse's story
6 |
7 | The Dormouse's story
8 |
9 | Once upon a time there were three little sisters; and their names were
10 | Elsie,
11 | Lacie and
12 | Tillie;
13 | and they lived at the bottom of a well.
14 |
15 | ...
16 | """
17 | soup = BeautifulSoup(html, "html.parser")
18 | #soup = BeautifulSoup('Extremely bold
',"html.parser")
19 | resp = soup.find(class_="story").find(id="link2")
20 |
21 | print(resp)
22 |
23 |
--------------------------------------------------------------------------------
/sample_docs/beautifulSoup_samples/testeLocalVarios.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | import urllib.request
3 | html = urllib.request.urlopen("file:///C:/Users/Rodrigo/Desktop/helloWordPy/emcartazVarios.html").read()
4 | soup = BeautifulSoup(html, "html.parser")
5 |
6 | #PERCORRE TODOS DATABOX
7 | for dataBox in soup.find_all("div",class_="data_box"):
8 | #RETORNA NOME DO FILME
9 | resp = dataBox.find("h2", class_="tt_18 d_inline").find("a", class_="no_underline")
10 | print("Nome: " + resp.text.strip())
11 |
12 | #RETORNA IMAGEM
13 | resp = dataBox.find(class_="img_side_content")
14 | print("Imagem: " + resp.img['src'].strip())
15 |
16 | #RETORNA SINOPSE
17 | resp = dataBox.find("div", class_="content").find("p")
18 | print("Sinopse: " + resp.text.strip())
19 |
20 | #RETORNA DATA LANÇAMENTO
21 | resp = dataBox.find("ul", class_="list_item_p2v tab_col_first").find("div", class_="oflow_a")
22 | print("Data lancamento: " + resp.text.strip())
23 | print(" ")
24 |
--------------------------------------------------------------------------------
/sample_docs/beautifulSoup_samples/testeAdoroCinema.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | import urllib.request
3 |
4 | html = urllib.request.urlopen("http://www.adorocinema.com/filmes/numero-cinemas/").read()
5 | soup = BeautifulSoup(html, "html.parser")
6 | print(html)
7 |
8 | #PERCORRE TODOS DATABOX
9 | for dataBox in soup.find_all("div", class_="card card-entity card-entity-list cf hred"):
10 | #RETORNA NOME DO FILME
11 | resp = dataBox.find("h2", class_="meta-title")
12 | print("Nome: " + resp.text.strip())
13 |
14 | #RETORNA IMAGEM !!!!!!!!!!!
15 | resp = dataBox.find(class_="thumbnail ")
16 | print("Imagem: " + resp.img['data-src'].strip())
17 |
18 | # #RETORNA SINOPSE
19 | resp = dataBox.find("div", class_="synopsis")
20 | print("Sinopse: " + resp.text.strip())
21 |
22 | # #RETORNA DATA LANÇAMENTO
23 | resp = dataBox.find(class_="meta-body").find(class_="meta-body-item meta-body-info")
24 | print("Data lancamento: " + resp.text[1:23].strip().replace('/',' '))
25 | print(" ")
26 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Como funciona?
2 | -----------------
3 |
4 | 1. Acesse o arquivo /web/flask/filmesCartazCinema.py
5 | 2. Execute com python e acesse o endereço local no seu PC: http://127.0.0.1:5000/
6 | 3. É mostrada a lista em formato JSON com base na URL do site AdoroCinema em: http://www.adorocinema.com/filmes/numero-cinemas/
7 | 4. A API pública pode ser visualizada aqui: https://filmespy.herokuapp.com/api/v1/filmes
8 |
9 | Estrutura do retorno em JSON
10 | -----------------
11 |
12 | [{
13 | "sinopse": "Em 2029, Logan (Hugh Jackman) ganha a vida como chofer de limousine para cuidar do nonagen\u00e1rio Charles Xavier (Patrick Stewart). Debilitado...",
14 | "nome": "Logan",
15 | "data": "02/03/2017\n(2h17min)",
16 | "poster": "http://br.web.img1.acsta.net/cx_160_213/b_1_d6d6d6/pictures/16/10/05/19/59/363613.jpg"
17 | },
18 |
19 | {
20 | "sinopse": "Moradora de uma pequena aldeia francesa, Bela (Emma Watson) tem o pai capturado pela Fera (Dan Stevens) e decide entregar sua vida ao estranho ser...",
21 | "nome": "A Bela e a Fera",
22 | "data": "16/03/2017\n(2h9min)",
23 | "poster": "http://br.web.img1.acsta.net/cx_160_213/b_1_d6d6d6/pictures/17/01/09/12/22/442219.jpg"
24 | }]
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/web/flask/estreiasCinema.py:
--------------------------------------------------------------------------------
1 | from flask import Flask
2 | from bs4 import BeautifulSoup
3 | import urllib.request
4 | import html
5 | import json
6 |
7 | app = Flask(__name__)
8 |
9 | @app.route("/")
10 | def summary():
11 | html_doc = urllib.request.urlopen("http://www.adorocinema.com/filmes/em-cartaz/estreias/").read()
12 | soup = BeautifulSoup(html_doc, "html.parser")
13 |
14 | data = []
15 | for dataBox in soup.find_all("div",class_="data_box"):
16 | nomeObj = dataBox.find("h2", class_="tt_18 d_inline").find("a", class_="no_underline")
17 | imgObj = dataBox.find(class_="img_side_content")
18 | sinopseObj = dataBox.find("div", class_="content").find("p")
19 | dataObj = dataBox.find("ul", class_="list_item_p2v tab_col_first").find("div", class_="oflow_a")
20 |
21 | data.append({ 'nome': nomeObj.text.strip(),
22 | 'poster' : imgObj.img['src'].strip(),
23 | 'sinopse' : sinopseObj.text.strip(),
24 | 'data' : dataObj.text.strip()})
25 |
26 | response = app.response_class(
27 | response=json.dumps(data),
28 | status=200,
29 | mimetype='application/json'
30 | )
31 | return response
32 |
33 | if __name__ == "__main__":
34 | app.run()
--------------------------------------------------------------------------------
/web/flask/filmesCartazCinema.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, jsonify
2 | from bs4 import BeautifulSoup
3 | import urllib.request
4 | import html
5 | import json
6 |
7 |
8 | app = Flask(__name__)
9 |
10 | @app.route('/api/v1/filmes', methods=['GET'])
11 | def filmes():
12 | html_doc = urllib.request.urlopen("http://www.adorocinema.com/filmes/numero-cinemas/").read()
13 | soup = BeautifulSoup(html_doc, "html.parser")
14 |
15 | data = []
16 | for dataBox in soup.find_all("div",class_="data_box"):
17 | nomeObj = dataBox.find("h2", class_="tt_18 d_inline").find("a", class_="no_underline")
18 | imgObj = dataBox.find(class_="img_side_content")
19 | sinopseObj = dataBox.find("div", class_="content").find("p")
20 | dataObj = dataBox.find("ul", class_="list_item_p2v tab_col_first").find("div", class_="oflow_a")
21 |
22 | data.append({ 'nome': nomeObj.text.strip(),
23 | 'poster' : imgObj.img['src'].strip(),
24 | 'sinopse' : sinopseObj.text.strip(),
25 | 'data' : dataObj.text.strip()})
26 |
27 | # response = app.response_class(
28 | # response=json.dumps(data),
29 | # status=200,
30 | # mimetype='application/json'
31 | # )
32 | # return response
33 |
34 | return jsonify({'filmes': data})
35 |
36 | if __name__ == "__main__":
37 | app.run(debug=True)
--------------------------------------------------------------------------------
/sample_docs/emcartazUnico.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | 1.
13 |
14 |
19 |
20 |
50 |
51 | Moradora de uma pequena aldeia francesa, Bela (Emma Watson) tem o pai capturado pela Fera (Dan Stevens) e decide entregar sua vida ao estranho ser...
52 |
53 |
80 |
81 |
82 | Ver o trailer
83 |
84 |
90 |
91 |
92 |
93 |
94 |
--------------------------------------------------------------------------------
/web.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, jsonify, request, redirect
2 | from bs4 import BeautifulSoup
3 | from urllib.request import urlopen, Request
4 | #from urllib2 import urlopen
5 | import os
6 |
7 | app = Flask(__name__)
8 |
9 | @app.route('/api/v1/filmes', methods=['GET'])
10 | def filmes():
11 | URL = "http://www.adorocinema.com/filmes/todos-filmes/notas-espectadores/"
12 |
13 | html_doc = urlopen(URL).read()
14 | soup = BeautifulSoup(html_doc, "html.parser")
15 | data = []
16 | for dataBox in soup.find_all("div", class_="data_box"):
17 | titleObj = dataBox.find("a", class_="no_underline")
18 | imgObj = dataBox.find(class_="img_side_content").find_all(class_="acLnk")[0]
19 | sinopseObj = dataBox.find("div", class_="content").find_all("p")[0]
20 | dateObj = dataBox.find("div", class_="content").find("div", class_="oflow_a")
21 | movieLinkObj = dataBox.find(class_="img_side_content").find_all("a")[0]
22 | generoObj = dataBox.find("div", class_="content").find_all('li')[3].find('div',class_="oflow_a")
23 | detailsLink = 'http://www.adorocinema.com' + movieLinkObj.attrs['href']
24 |
25 | #LOAD FULL SINOPSE
26 | htmldocMovieDetail = urlopen(detailsLink).read()
27 | soupMovieDetail = BeautifulSoup(htmldocMovieDetail, "html.parser")
28 | fullSinopse = soupMovieDetail.find(class_="content-txt")
29 | fullImgObj = soupMovieDetail.find("meta", property="og:image")
30 |
31 | data.append({'titulo': titleObj.text.strip(),
32 | 'genero': generoObj.text.replace('\n','').strip(),
33 | 'poster' : fullImgObj["content"],
34 | 'sinopse' : sinopseObj.text.strip(),
35 | 'data' : dateObj.text[0:11].strip(),
36 | 'link' : detailsLink,
37 | 'sinopseFull': fullSinopse.text})
38 |
39 | return jsonify({'filmes': data})
40 |
41 | @app.route('/api/v1/filmes/', methods=['GET'])
42 | def NotasEspectadores(page_id):
43 | URL = "http://www.adorocinema.com/filmes/todos-filmes/notas-espectadores/?page={}".format(page_id)
44 |
45 | html_doc = urlopen(URL).read()
46 | soup = BeautifulSoup(html_doc, "html.parser")
47 | data = []
48 | for dataBox in soup.find_all("div", class_="data_box"):
49 | titleObj = dataBox.find("a", class_="no_underline")
50 | imgObj = dataBox.find(class_="img_side_content").find_all(class_="acLnk")[0]
51 | sinopseObj = dataBox.find("div", class_="content").find_all("p")[0]
52 | dateObj = dataBox.find("div", class_="content").find("div", class_="oflow_a")
53 | movieLinkObj = dataBox.find(class_="img_side_content").find_all("a")[0]
54 | generoObj = dataBox.find("div", class_="content").find_all('li')[3].find('div',class_="oflow_a")
55 | detailsLink = 'http://www.adorocinema.com' + movieLinkObj.attrs['href']
56 |
57 | #LOAD FULL SINOPSE
58 | htmldocMovieDetail = urlopen(detailsLink).read()
59 | soupMovieDetail = BeautifulSoup(htmldocMovieDetail, "html.parser")
60 | fullSinopse = soupMovieDetail.find(class_="content-txt")
61 | fullImgObj = soupMovieDetail.find("meta", property="og:image")
62 |
63 | data.append({'titulo': titleObj.text.strip(),
64 | 'genero': generoObj.text.replace('\n','').strip(),
65 | 'poster' : fullImgObj["content"],
66 | 'sinopse' : sinopseObj.text.strip(),
67 | 'data' : dateObj.text[0:11].strip(),
68 | 'link' : detailsLink,
69 | 'sinopseFull': fullSinopse.text})
70 |
71 | return jsonify({'filmes': data})
72 |
73 | @app.route('/api/v1/filmes/emcartaz', methods=['GET'])
74 | def EmCartaz():
75 | html_doc = urlopen("http://www.adorocinema.com/filmes/numero-cinemas/").read()
76 | soup = BeautifulSoup(html_doc, "html.parser")
77 |
78 | data = []
79 | for dataBox in soup.find_all("div", class_="card card-entity card-entity-list cf"):
80 | nomeObj = dataBox.find("h2", class_="meta-title")
81 | imgObj = dataBox.find(class_="thumbnail ")
82 | sinopseObj = dataBox.find("div", class_="synopsis")
83 | dataObj = dataBox.find(class_="meta-body").find(class_="meta-body-item meta-body-info")
84 | movieLinkObj = dataBox.find(class_="meta-title-link")
85 | detailsLink = 'http://www.adorocinema.com' + movieLinkObj.attrs['href']
86 |
87 | #LOAD FULL SINOPSE
88 | htmldocMovieDetail = urlopen(detailsLink).read()
89 | soupMovieDetail = BeautifulSoup(htmldocMovieDetail, "html.parser")
90 | fullSinopse = soupMovieDetail.find(class_="content-txt")
91 |
92 | data.append({ 'nome': nomeObj.text.strip(),
93 | 'poster' : imgObj.img['data-src'].strip(),
94 | 'sinopse' : sinopseObj.text.strip(),
95 | 'data' : dataObj.text[1:23].strip().replace('/',' '),
96 | 'link' : detailsLink,
97 | 'sinopseFull': fullSinopse.text})
98 |
99 | return jsonify({'filmes': data})
100 |
101 | @app.route('/api/v1/filmes/')
102 | def filmes_api_page(page_id):
103 |
104 | if int(page_id) > 6:
105 | return redirect("http://python--bergpb.c9users.io/api/v1/filmes", code=200)
106 |
107 | else:
108 | url = "http://www.adorocinema.com/filmes/numero-cinemas/?page={}".format(page_id)
109 |
110 | html_doc = urlopen(url).read()
111 | soup = BeautifulSoup(html_doc, "html.parser")
112 |
113 | data = []
114 | for dataBox in soup.find_all("div", class_="card card-entity card-entity-list cf"):
115 | nomeObj = dataBox.find("h2", class_="meta-title")
116 | imgObj = dataBox.find(class_="thumbnail ")
117 | sinopseObj = dataBox.find("div", class_="synopsis")
118 | dataObj = dataBox.find(class_="meta-body").find(class_="meta-body-item meta-body-info")
119 |
120 | data.append({ 'nome': nomeObj.text.strip(),
121 | 'poster' : imgObj.img['data-src'].strip(),
122 | 'sinopse' : sinopseObj.text.strip(),
123 | 'data' : dataObj.text[1:23].strip().replace('/',' ')})
124 |
125 | return jsonify({'filmes': data})
126 |
127 | if __name__ == '__main__':
128 | port = int(os.environ.get('PORT', 5000))
129 | app.run(debug=True, host='0.0.0.0', port=port)
130 |
--------------------------------------------------------------------------------
/sample_docs/emcartazVarios.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | 1.
15 |
16 |
21 |
22 |
52 |
53 | Moradora de uma pequena aldeia francesa, Bela (Emma Watson) tem o pai capturado pela Fera (Dan Stevens) e decide entregar sua vida ao estranho ser...
54 |
55 |
82 |
83 |
84 | Ver o trailer
85 |
86 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
102 |
103 |
104 |
105 |
106 | 3.
107 |
108 |
113 |
114 |
115 |
145 |
146 | 1944, durante a Segunda Guerra Mundial. Dois aviões, um americano e outro japonês, são abatidos em pleno combate aéreo. Os
147 | pilotos sobrevivem,...
148 |
149 |
176 |
177 | Ver o trailer
178 |
179 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
--------------------------------------------------------------------------------