├── pyfutebol
    ├── __init__.py
    └── crawler.py
├── requirements.txt
├── setup.cfg
├── setup.py
├── LICENSE
└── README.md


/pyfutebol/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | lxml
2 | dicttoxml==1.7.4


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | license_file = LICENSE


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | setup(name='pyfutebol',
 5 |       version='2.2.1',
 6 |       description='Crawler para resultados de futebol',
 7 |       url='https://github.com/vinigracindo/pyfutebol/',
 8 |       author='Vinnicyus Gracindo',
 9 |       author_email='vini.gracindo@gmail.com',
10 |       license='MIT',
11 |       packages=['pyfutebol'],
12 |       install_requires=[
13 |         'beautifulsoup4',
14 |       ],
15 |       zip_safe=False)


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Vinnicyus Gracindo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Python Crawler - Resultados de Futebol
 2 | 
 3 | Um pequeno crawler para pegar resultados de futebol ao vivo.
 4 | O crawler coleta as informações do site: https://www.placardefutebol.com.br/
 5 | 
 6 | ## Funções
 7 | 
 8 | * `jogos_de_hoje(format='dict', cache=True)` - Retorna todos os jogos de hoje (Que aconteceram, que estão acontecendo e os que irão acontecer).
 9 | O format indica o tipo de saída.
10 | O format aceita como parâmetro 'json', 'xml' e o padrão 'dict'.
11 | O cache é uma parâmetro que evita consultas ao site toda vez que os métodos forem invocados. Caso você invoque o método com cache=False
12 | o crawler faz uma consulta novamente ao site atualizando os dados.
13 | 
14 | * `jogos_ao_vivo(format='dict', cache=True)` - Retorna os jogos que estão acontecendo no momento.
15 | O format indica o tipo de saída.
16 | O format aceita como parâmetro 'json', 'xml' e o padrão 'dict'.
17 | O cache é uma parâmetro que evita consultas ao site toda vez que os métodos forem invocados. Caso você invoque o método com cache=False
18 | o crawler faz uma consulta novamente ao site atualizando os dados.
19 | 
20 | * `buscar_jogo_por_time(time, cache=True)` - Retorna o jogo do time especificado.
21 | retorna vazio se não houver jogos hoje para o time.
22 | O cache é uma parâmetro que evita consultas ao site toda vez que os métodos forem invocados. Caso você invoque o método com cache=False
23 | o crawler faz uma consulta novamente ao site atualizando os dados.
24 | 
25 | 
26 | ## Como instalar?
27 | 
28 | Instale as dependências
29 | 
30 | ```console
31 | pip install dicttoxml
32 | pip install lxml
33 | ```
34 | 
35 | Para instalar usando pip:
36 | 
37 | ```console
38 | pip install pyfutebol
39 | ```
40 | 
41 | ## Como utilizar?
42 | 
43 | ```python
44 | from pyfutebol import crawler
45 | resultados = crawler.jogos_de_hoje()
46 | for resultado in resultados:
47 | 	print(resultado)
48 | ```
49 | 
50 | ```python
51 | from pyfutebol import crawler
52 | resultados = crawler.jogos_ao_vivo()
53 | for resultado in resultados:
54 | 	print(resultado)
55 | ```
56 | 
57 | ```python
58 | from pyfutebol import crawler
59 | resultados = crawler.jogos_ao_vivo(format='json')
60 | print(resultado) # saída em formato json
61 | ```
62 | 
63 | ```python
64 | from pyfutebol import crawler
65 | resultado = crawler.buscar_jogo_por_time('flamengo')
66 | print(resultado)
67 | ```
68 | 
69 | ## Parâmetro cache
70 | 
71 | ```python
72 | from pyfutebol import crawler
73 | crawler.jogos_de_hoje() # Faz uma consulta no site https://www.placardefutebol.com.br/ e pega os resultados.
74 | crawler.jogos_ao_vivo() # Não faz consulta no site e utiliza os dados obtidos quando o método anterior foi executado.
75 | crawler.jogos_ao_vivo(cache=False) # Faz uma consulta no site https://www.placardefutebol.com.br/ e pega os resultados.
76 | 


--------------------------------------------------------------------------------
/pyfutebol/crawler.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | from urllib.request import urlopen
 3 | import json
 4 | import re
 5 | from dicttoxml import dicttoxml
 6 | 
 7 | _page = None
 8 | 
 9 | def get_html_page(cache):
10 |     global _page
11 |     
12 |     if cache is False:
13 |         _page = None
14 |     if _page is None:
15 |         html = urlopen('https://www.placardefutebol.com.br/')
16 |         _page = BeautifulSoup(html, 'lxml')
17 |         
18 |     res = _page
19 |     
20 |     return res
21 |     
22 | 
23 | def jogos_de_hoje(format='dict', cache=True):
24 |     page = get_html_page(cache)
25 |     titles = page.find_all('h3', class_='match-list_league-name')
26 |     championships = page.find_all('div', class_='container content')
27 |     
28 |     results = []
29 |     
30 |     for id, championship in enumerate(championships):
31 |         matchs = championship.find_all('div', class_='row align-items-center content')
32 |         
33 |         for match in matchs:
34 |             status = match.find('span', class_='status-name').text
35 |             teams = match.find_all('div', class_='team-name')
36 |             status = match.find('span', class_='status-name').text
37 |             scoreboard = match.find_all('span', class_='badge badge-default')
38 |             
39 |             team_home = teams[0].text.strip()
40 |             team_visitor = teams[1].text.strip()
41 |             
42 |             info = {
43 |                 'match': '{} x {}'.format(team_home, team_visitor),
44 |                 'status': status,
45 |                 'league': titles[id].text,
46 |             }
47 |             
48 |             score = {}
49 |             
50 |             # Se o jogo já começou então existe placar.
51 |             try:
52 |                 score['scoreboard'] = {
53 |                     team_home: scoreboard[0].text,
54 |                     team_visitor: scoreboard[1].text
55 |                 }
56 |                 score['summary'] = '{} x {}'.format(scoreboard[0].text, scoreboard[1].text)
57 |             # Caso não tenha começado, armazena o horário de início
58 |             except:
59 |                 score['start_in'] = status
60 |                 score['status'] = 'EM BREVE'
61 |             
62 |             info.update(score)
63 |             
64 |             results.append(info)
65 |         
66 |     if (format == 'json'):
67 |         return json.dumps(results)
68 |     elif (format == 'xml'):
69 |         return dicttoxml(results)
70 |     else:
71 |         return results
72 | 
73 |   
74 | def jogos_ao_vivo(format='dict', cache=True):
75 |     matchs = jogos_de_hoje(cache=cache)
76 |     results = list(filter(lambda match: re.findall(r'INTERVALO|AO VIVO|MIN', match['status']), matchs))
77 |     
78 |     if (format == 'json'):
79 |         return json.dumps(results)
80 |     elif (format == 'xml'):
81 |         return dicttoxml(results)
82 |     else:
83 |         return results
84 |         
85 | def buscar_jogo_por_time(time, cache=True):
86 |     matchs = jogos_de_hoje(cache=cache)
87 |     return list(filter(lambda match: time.lower() in match['match'].lower(), matchs))


--------------------------------------------------------------------------------