├── README.md
├── prophet.py
├── pycaret.py
├── churn_de_clientes.py
├── Previsão Resultado Campeonato Brasileiro.ipynb
└── Previsão preço ações.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # machine_learnig
2 | Notebook contendo o código utilizado para prever os resultados dos jogos do Campeonato Brasileiro, previsão de ações e churn de clientes
3 |
4 | Foi utilizado a linguagem python com as bibliotecas scikit learn, pandas, numpy, seabor e matplotlib.
5 |
6 | A base de dados foi alterada para refletir o resultado em formato numerico.
7 |
8 | Base original foi retitada do site: http://football-data.co.uk/
9 |
10 | Base churn de clientes : https://www.kaggle.com/blastchar/telco-customer-churn
11 |
12 |
--------------------------------------------------------------------------------
/prophet.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | #instalar o yfinance
4 | pip install yfinance
5 |
6 | #import bibliotecas
7 | import pandas as pd
8 | import yfinance as yf
9 | from datetime import datetime
10 | from datetime import timedelta
11 | import plotly.graph_objects as go
12 | from fbprophet import Prophet
13 | from fbprophet.plot import plot_plotly, plot_components_plotly
14 | import warnings
15 | warnings.filterwarnings('ignore')
16 | pd.options.display.float_format = '${:,.2f}'.format
17 |
18 | hj = datetime.today().strftime('%Y-%m-%d')
19 | data_ini = '2016-01-01'
20 | df_eth = yf.download('ETH-USD', data_ini, hj)
21 | df_eth.tail()
22 |
23 | df_eth.reset_index(inplace=True)
24 |
25 | df_eth
26 |
27 | df = df_eth[["Date", "Adj Close"]]
28 | df.rename(columns = {'Date': 'ds', 'Adj Close': 'y' }, inplace=True)
29 |
30 | df
31 |
32 | # Grafico Preço de fechamento
33 | fig = go.Figure()
34 | fig.add_trace(go.Scatter(x=df['ds'], y = df['y']))
35 |
36 | model = Prophet( seasonality_mode='multiplicative')
37 | model.fit(df)
38 |
39 | #criar df com datas no futuro
40 | df_futuro = model.make_future_dataframe(periods=60)
41 | df_futuro.tail (60)
42 |
43 | #previsao
44 | previsao = model.predict(df_futuro)
45 | previsao
46 |
47 | previsao[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(60)
48 |
49 | #grafico
50 | plot_plotly(model, previsao)
51 |
52 | plot_components_plotly(model, previsao)
--------------------------------------------------------------------------------
/pycaret.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | #instalando as bibliotecas
4 | !pip install pycaret == 2.1.2
5 | !pip install yfinance
6 |
7 | from pycaret.utils import enable_colab
8 | enable_colab()
9 |
10 | #importar as bibliotecas
11 | import yfinance as yf
12 | import pandas as pd
13 |
14 | #escolher uma ação
15 | df = yf.Ticker('RADL3.SA')
16 | #escolher o intervalo de dados
17 | raia = df.history(period='2y')
18 | raia
19 |
20 | #retirando os campos
21 | raia = raia.drop(['Dividends','Stock Splits'], axis=1)
22 | raia
23 |
24 | #criando novos campos
25 | raia['MM7d'] = raia['Close'].rolling(window=7).mean().round(2)
26 | raia['MM30d'] = raia['Close'].rolling(window=30).mean().round(2)
27 | raia
28 |
29 | #5 dias para previsao
30 | raia_prever = raia.tail(5)
31 | raia_prever
32 |
33 | #retirar os ultimos 5 dias do df
34 | raia.drop(raia.tail(5).index, inplace=True)
35 | raia
36 |
37 | #empurra para frente os valores das ações
38 | raia['Close'] = raia['Close'].shift(-1)
39 | raia
40 |
41 | #Retirar os nulos
42 | raia.dropna(inplace=True)
43 | raia
44 |
45 | #drop index
46 | raia.reset_index(drop=True, inplace=True)
47 | raia_prever.reset_index(drop=True, inplace=True)
48 |
49 | raia
50 |
51 | #import regression lib pycaret
52 | from pycaret.regression import *
53 | setup(data= raia, target='Close', session_id=123)
54 |
55 | top3 = compare_models(n_select=3)
56 |
57 | print(top3)
58 |
59 | models()
60 |
61 | ridge = create_model('ridge', fold=10)
62 |
63 | lar = create_model('lar', fold=10)
64 |
65 | br = create_model('br', fold=10)
66 |
67 | #Tunning
68 | ridge_params = { 'alpha':[0.02, 0.024, 0.025, 0.026, 0.03]}
69 | tunne_ridge = tune_model(ridge, n_iter=1000, optimize='RMSE', custom_grid=ridge_params)
70 |
71 | tunne_lar = tune_model(lar, n_iter=1000, optimize = 'RMSE')
72 |
73 | tunne_br = tune_model(br, n_iter=1000, optimize = 'RMSE')
74 |
75 | #Grafico erros
76 | plot_model(tunne_ridge, plot='error')
77 |
78 | plot_model(tunne_ridge, plot='feature')
79 |
80 | #Testando com dados de treinameto
81 | predict_model(tunne_ridge)
82 |
83 | #Finalizar o modelo
84 | final_ridge_model = finalize_model(tunne_ridge)
85 |
86 | #Previsao
87 | prev = predict_model(final_ridge_model, data=raia_prever)
88 | prev
89 |
90 | #Salvando o modelo para utilizar com dados novos
91 | save_model(final_ridge_model, 'Modelo Final Ridge Pycaret')
92 |
93 | #Dados novos
94 | novo_dado = yf.download('RADL3.SA', period='45d')
95 | novo_dado
96 |
97 | #retira campos
98 | novo_dado = novo_dado.drop('Adj Close',axis = 1)
99 | #retirar index
100 | novo_dado.reset_index(drop=True, inplace=True)
101 | #criar novos campos
102 | novo_dado['MM7d'] = novo_dado['Close'].rolling(window=7).mean().round(2)
103 | novo_dado['MM30d'] = novo_dado['Close'].rolling(window=30).mean().round(2)
104 | novo_dado
105 |
106 | novo_dado = novo_dado.tail(1)
107 | novo_dado
108 |
109 | #Reutilizando o modelo
110 | saved_final_ridge_model = load_model('Modelo Final Ridge Pycaret')
111 |
112 | #Prevendo novo dado
113 | nova_previsao = predict_model(saved_final_ridge_model, data=novo_dado)
114 | nova_previsao.head()
--------------------------------------------------------------------------------
/churn_de_clientes.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Churn de clientes.ipynb
3 |
4 | Automatically generated by Colaboratory.
5 |
6 | Original file is located at
7 | https://colab.research.google.com/drive/1cWTIyGJbuYPWP0iWl1QCrx2_2P3wtrsd
8 |
9 | ```
10 | Prevendo churn de clientes com machine learning
11 | ```
12 | """
13 |
14 | import pandas as pd
15 | import numpy as np
16 | import matplotlib.pyplot as plt
17 | from sklearn.preprocessing import MinMaxScaler, LabelEncoder
18 | from sklearn.model_selection import train_test_split
19 | from sklearn.metrics import confusion_matrix, classification_report
20 | import tensorflow as tf
21 | from tensorflow import keras
22 | from tensorflow.keras.models import Sequential
23 | from tensorflow.keras.layers import Activation, Dense
24 | from tensorflow.keras.optimizers import Adam
25 | from tensorflow.keras.metrics import categorical_crossentropy
26 |
27 | from google.colab import files
28 | arquivo = files.upload()
29 |
30 | df =pd.read_csv('Telco-Customer-Churn.csv')
31 |
32 | df_val = df.iloc[:10]
33 |
34 | df_val
35 |
36 | #Podemos retirar a coluna de id
37 | df.drop('customerID', 1, inplace=True)
38 |
39 | #verifica os tipos
40 | df.dtypes
41 |
42 | #Parece que temos o campo Montlhy Charges como caracter, então teremos que ajustá-lo
43 |
44 | df.TotalCharges = pd.to_numeric(df.TotalCharges)
45 |
46 | #é possivel usar 'coerce' mas o valores que tem espço em branco vão ficar vazio, podemos retirá-los
47 | df.TotalCharges = pd.to_numeric(df.TotalCharges, errors='coerce')
48 |
49 | df.TotalCharges.isna()
50 |
51 | #Vamos ver existem campos que estão com campos vazios
52 | df[df.TotalCharges.isna()]
53 |
54 | #Podemos excluir os que estão vazios
55 | df.dropna(subset=['TotalCharges'], inplace=True)
56 |
57 | df
58 |
59 | #Graficos para explorar os dados
60 | #filtrar por churn no e yes e verificar os tenure
61 | dfno = df[df.Churn=='No'].tenure
62 | dfyes = df[df.Churn=='Yes'].tenure
63 |
64 | plt.hist([dfno,dfyes], color=['blue', 'red'], label=['Churn = no', 'Churn = yes'])
65 | plt.legend()
66 |
67 | #Graficos para explorar os dados
68 | #filtrar por churn no e yes e verificar os Dependents
69 | dfno = df[df.Churn=='No'].Dependents
70 | dfyes = df[df.Churn=='Yes'].Dependents
71 |
72 | plt.hist([dfno,dfyes], color=['blue', 'red'], label=['Churn = no', 'Churn = yes'])
73 | plt.legend()
74 |
75 | #Graficos para explorar os dados
76 | #filtrar por churn no e yes e verificar os MonthlyCharges
77 | dfno = df[df.Churn=='No'].MonthlyCharges
78 | dfyes = df[df.Churn=='Yes'].MonthlyCharges
79 |
80 | plt.hist([dfno,dfyes], color=['blue', 'red'], label=['Churn = no', 'Churn = yes'])
81 | plt.legend()
82 |
83 | #Verificar quão balanceado esta o dataset
84 | dfno = df[df.Churn=='No'].Churn
85 | dfyes = df[df.Churn=='Yes'].Churn
86 |
87 | total= dfno.count() + dfyes.count()
88 | percent = round ((dfyes.count() / total) * 100 , 2)
89 |
90 | print(f'YES: {dfno.count()}', f'No: {dfyes.count()}', f'% No: {percent}')
91 |
92 | plt.hist([dfno,dfyes], color=['blue', 'red'], label=['Churn = no', 'Churn = yes'])
93 | plt.legend()
94 |
95 | #Ajustas os camp que tem "No service" para No
96 | for i in df:
97 | if df[i].dtypes=='object':
98 | print(f'{i} : {df[i].unique()}')
99 |
100 | df.replace('No internet service', 'No', inplace=True)
101 | df.replace('No phone service', 'No', inplace=True)
102 |
103 | df
104 |
105 | #O que precisamos fazer agora e colocar os campos categoricos como numericos usando o hot enconding do panadas.
106 | #Vamosa fazer essa tranformação pois pois é muito mais facil para os modelos trabalharam com valores, pois são modelos matematicos
107 |
108 | dfnum= pd.get_dummies(data=df, columns=['gender','InternetService', 'Partner','Dependents','PhoneService','MultipleLines','InternetService','OnlineSecurity','OnlineBackup','DeviceProtection','TechSupport','StreamingTV','StreamingMovies','Contract','PaperlessBilling','PaymentMethod'])
109 | dfnum.columns
110 |
111 | #Podemos ver um example dos valores codificados
112 | dfnum.sample(5)
113 |
114 | #Label encoder coluna Churn
115 | le = LabelEncoder()
116 | dfnum['Churn'] = le.fit_transform(dfnum['Churn'])
117 | dfnum.Churn.sample(15)
118 |
119 | #Fazer o scale dos dados nas colunas de "tenure", monthly charges e total cherges'
120 |
121 | scaler = MinMaxScaler()
122 | dfnum[['tenure','MonthlyCharges','TotalCharges']] = scaler.fit_transform(dfnum[['tenure','MonthlyCharges','TotalCharges']])
123 | dfnum.sample(5)
124 |
125 | #Separar os dados em features e labels
126 | features = dfnum.drop('Churn', 1)
127 | label = dfnum['Churn']
128 | print(label)
129 |
130 | #Pegar uam parte para validação
131 | val = features.head(10)
132 | val
133 |
134 | #Novo featured
135 | features = features.iloc[10:]
136 | features
137 |
138 | label = label.iloc[10:]
139 | label
140 |
141 | #Separa train test e split
142 | X_Train, X_test, y_train, y_test = train_test_split(features,label,test_size=0.2, random_state=42)
143 | print(X_Train.shape, y_train.shape)
144 | print(X_test.shape, y_test.shape)
145 |
146 | #impotar tensor flow e keras
147 |
148 | model = Sequential([
149 | Dense(20, input_shape=(41,), activation='relu'),
150 | Dense(52, activation='relu'),
151 | Dense(1,activation='sigmoid')
152 | ])
153 |
154 | #Passando parametro de execução
155 | model.compile(optimizer='adam',
156 | loss='binary_crossentropy',
157 | metrics=['accuracy'])
158 |
159 |
160 | #Um detalhe importante a ser mencionado,e que estamos passando as camadas da rede neural, e na o primieiro comando na verdade já é a segunda camada
161 | # e neste caso estamos passando 20 neuronio(valor arbitrário) e o segundo parêmtro é a uqbntidade de entradas, que no nosso caso são 26 campos e a função
162 | #de ativação usada no neurônio que neste caso é a relu.
163 |
164 | #ReLU é a função de ativação mais amplamente utilizada ao projetar redes neurais atualmente.
165 | #Primeiramente, a função ReLU é não linear, o que significa que podemos facilmente copiar os erros para trás e ter várias camadas de
166 | #neurônios ativados pela função ReLU.
167 |
168 | #A principal vantagem de usar a função ReLU sobre outras funções de ativação é que ela não ativa todos os neurônios ao mesmo tempo.
169 |
170 | #Podemos ver como foi criado nosso modelo
171 | model.summary()
172 |
173 | model.fit(X_Train,y_train, epochs=100)
174 |
175 | model.evaluate(X_test,y_test)
176 |
177 | pred_val = model.predict(val)
178 | print(pred_val)
179 |
180 | y_pred = []
181 | for i in pred_val:
182 | if i > 0.5:
183 | y_pred.append(1)
184 | else:
185 | y_pred.append(0)
186 |
187 | cliente = df_val['customerID']
188 | real = df_val['Churn']
189 | pred = list(pred_val.flatten())
190 |
191 | df=pd.DataFrame({'cliente':cliente, 'previsao':pred, 'real':real, 'churn':y_pred})
192 |
193 | print(df)
194 |
195 | import seaborn as sns
196 |
197 | df = pd.DataFrame(df, columns=['real','churn'])
198 | confusion_matrix = pd.crosstab(df['real'], df['churn'], rownames=['real'], colnames=['churn'])
199 |
200 | sns.heatmap(confusion_matrix, annot=True)
--------------------------------------------------------------------------------
/Previsão Resultado Campeonato Brasileiro.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#importar bibliotecas\n",
10 | "import pandas as pd\n",
11 | "import numpy as np\n",
12 | "from sklearn.linear_model import LogisticRegression\n",
13 | "from sklearn.tree import DecisionTreeClassifier\n",
14 | "from sklearn.naive_bayes import GaussianNB\n",
15 | "from sklearn.svm import SVC\n",
16 | "from sklearn.metrics import accuracy_score , f1_score, precision_score, recall_score\n",
17 | "from sklearn.feature_selection import SelectKBest\n",
18 | "from sklearn.model_selection import GridSearchCV\n",
19 | "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
20 | "from IPython.display import display\n",
21 | "import matplotlib.pyplot as plt\n",
22 | "import seaborn as sns\n",
23 | "from sklearn.preprocessing import scale\n",
24 | "%matplotlib inline"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 2,
30 | "metadata": {},
31 | "outputs": [
32 | {
33 | "data": {
34 | "text/html": [
35 | "
\n",
36 | "\n",
49 | "
\n",
50 | " \n",
51 | " \n",
52 | " | \n",
53 | " Country | \n",
54 | " League | \n",
55 | " game_id | \n",
56 | " Season | \n",
57 | " Date | \n",
58 | " Time | \n",
59 | " Home | \n",
60 | " home_id | \n",
61 | " Away | \n",
62 | " Away_id | \n",
63 | " ... | \n",
64 | " Res | \n",
65 | " PH | \n",
66 | " PD | \n",
67 | " PA | \n",
68 | " MaxH | \n",
69 | " MaxD | \n",
70 | " MaxA | \n",
71 | " AvgH | \n",
72 | " AvgD | \n",
73 | " AvgA | \n",
74 | "
\n",
75 | " \n",
76 | " \n",
77 | " \n",
78 | " | 0 | \n",
79 | " Brazil | \n",
80 | " Serie A | \n",
81 | " 1 | \n",
82 | " 2012 | \n",
83 | " 19-05-12 | \n",
84 | " 22:30 | \n",
85 | " Palmeiras | \n",
86 | " 21 | \n",
87 | " Portuguesa | \n",
88 | " 21 | \n",
89 | " ... | \n",
90 | " 0 | \n",
91 | " 1.75 | \n",
92 | " 3.86 | \n",
93 | " 5.25 | \n",
94 | " 1.76 | \n",
95 | " 3.87 | \n",
96 | " 5.31 | \n",
97 | " 1.69 | \n",
98 | " 3.50 | \n",
99 | " 4.90 | \n",
100 | "
\n",
101 | " \n",
102 | " | 1 | \n",
103 | " Brazil | \n",
104 | " Serie A | \n",
105 | " 2 | \n",
106 | " 2012 | \n",
107 | " 19-05-12 | \n",
108 | " 22:30 | \n",
109 | " Sport Recife | \n",
110 | " 27 | \n",
111 | " Flamengo RJ | \n",
112 | " 27 | \n",
113 | " ... | \n",
114 | " 0 | \n",
115 | " 2.83 | \n",
116 | " 3.39 | \n",
117 | " 2.68 | \n",
118 | " 2.83 | \n",
119 | " 3.42 | \n",
120 | " 2.70 | \n",
121 | " 2.59 | \n",
122 | " 3.23 | \n",
123 | " 2.58 | \n",
124 | "
\n",
125 | " \n",
126 | " | 2 | \n",
127 | " Brazil | \n",
128 | " Serie A | \n",
129 | " 3 | \n",
130 | " 2012 | \n",
131 | " 20-05-12 | \n",
132 | " 1:00 | \n",
133 | " Figueirense | \n",
134 | " 13 | \n",
135 | " Nautico | \n",
136 | " 13 | \n",
137 | " ... | \n",
138 | " 1 | \n",
139 | " 1.60 | \n",
140 | " 4.04 | \n",
141 | " 6.72 | \n",
142 | " 1.67 | \n",
143 | " 4.05 | \n",
144 | " 7.22 | \n",
145 | " 1.59 | \n",
146 | " 3.67 | \n",
147 | " 5.64 | \n",
148 | "
\n",
149 | " \n",
150 | " | 3 | \n",
151 | " Brazil | \n",
152 | " Serie A | \n",
153 | " 4 | \n",
154 | " 2012 | \n",
155 | " 20-05-12 | \n",
156 | " 20:00 | \n",
157 | " Botafogo RJ | \n",
158 | " 7 | \n",
159 | " Sao Paulo | \n",
160 | " 7 | \n",
161 | " ... | \n",
162 | " 1 | \n",
163 | " 2.49 | \n",
164 | " 3.35 | \n",
165 | " 3.15 | \n",
166 | " 2.49 | \n",
167 | " 3.39 | \n",
168 | " 3.15 | \n",
169 | " 2.35 | \n",
170 | " 3.26 | \n",
171 | " 2.84 | \n",
172 | "
\n",
173 | " \n",
174 | " | 4 | \n",
175 | " Brazil | \n",
176 | " Serie A | \n",
177 | " 5 | \n",
178 | " 2012 | \n",
179 | " 20-05-12 | \n",
180 | " 20:00 | \n",
181 | " Corinthians | \n",
182 | " 9 | \n",
183 | " Fluminense | \n",
184 | " 9 | \n",
185 | " ... | \n",
186 | " 2 | \n",
187 | " 1.96 | \n",
188 | " 3.53 | \n",
189 | " 4.41 | \n",
190 | " 1.96 | \n",
191 | " 3.53 | \n",
192 | " 4.41 | \n",
193 | " 1.89 | \n",
194 | " 3.33 | \n",
195 | " 3.89 | \n",
196 | "
\n",
197 | " \n",
198 | "
\n",
199 | "
5 rows × 22 columns
\n",
200 | "
"
201 | ],
202 | "text/plain": [
203 | " Country League game_id Season Date Time Home home_id \\\n",
204 | "0 Brazil Serie A 1 2012 19-05-12 22:30 Palmeiras 21 \n",
205 | "1 Brazil Serie A 2 2012 19-05-12 22:30 Sport Recife 27 \n",
206 | "2 Brazil Serie A 3 2012 20-05-12 1:00 Figueirense 13 \n",
207 | "3 Brazil Serie A 4 2012 20-05-12 20:00 Botafogo RJ 7 \n",
208 | "4 Brazil Serie A 5 2012 20-05-12 20:00 Corinthians 9 \n",
209 | "\n",
210 | " Away Away_id ... Res PH PD PA MaxH MaxD MaxA AvgH \\\n",
211 | "0 Portuguesa 21 ... 0 1.75 3.86 5.25 1.76 3.87 5.31 1.69 \n",
212 | "1 Flamengo RJ 27 ... 0 2.83 3.39 2.68 2.83 3.42 2.70 2.59 \n",
213 | "2 Nautico 13 ... 1 1.60 4.04 6.72 1.67 4.05 7.22 1.59 \n",
214 | "3 Sao Paulo 7 ... 1 2.49 3.35 3.15 2.49 3.39 3.15 2.35 \n",
215 | "4 Fluminense 9 ... 2 1.96 3.53 4.41 1.96 3.53 4.41 1.89 \n",
216 | "\n",
217 | " AvgD AvgA \n",
218 | "0 3.50 4.90 \n",
219 | "1 3.23 2.58 \n",
220 | "2 3.67 5.64 \n",
221 | "3 3.26 2.84 \n",
222 | "4 3.33 3.89 \n",
223 | "\n",
224 | "[5 rows x 22 columns]"
225 | ]
226 | },
227 | "metadata": {},
228 | "output_type": "display_data"
229 | }
230 | ],
231 | "source": [
232 | "#lendo o arquivo BRAS.csv\n",
233 | "#O arquivo utilizado foi baixado do site www.football-data.co.uk, que agrega informações de diversos campeonatos\n",
234 | "#ao redor do mundo.\n",
235 | "\n",
236 | "data=pd.read_csv('BRA.csv',delimiter=',')\n",
237 | "\n",
238 | "#Verificando as 5 primeiras linhas do arquivo a ser utilizado\n",
239 | "display(data.head())\n",
240 | "\n",
241 | "#Notes for football data\n",
242 | "\n",
243 | "#Country = país do campeonato\n",
244 | "#League = nome da liga\n",
245 | "#game_id = id do jogo\n",
246 | "#Season = temporada\n",
247 | "#date = data do jogo\n",
248 | "#Time = hora do jogo\n",
249 | "#Home = Time da casa\n",
250 | "#home_id = id do time da casa\n",
251 | "#Away = time visitante\n",
252 | "#Away_id = id do time visitante\n",
253 | "#HG = Gols do time da casa\n",
254 | "#AG = Gols do time visitante\n",
255 | "#Res = Resultado do jogo (D=Draw, H=Home win, A=Away win)\n",
256 | "#PH = probablidade vitória time da casa provida pela Pinacle (casa de aposta inglesa)\n",
257 | "#PD = probablidade empate provida pela Pinacle (casa de aposta inglesa)\n",
258 | "#PA = probablidade vitória time visitante provida pela Pinacle (casa de aposta inglesa)\n",
259 | "#MaxH = probablidade vitória time da casa provida pela OddsPortal (casa de aposta inglesa)\n",
260 | "#MaxD = probablidade empate provida pela OddsPortal (casa de aposta inglesa)\n",
261 | "#MaxA = probablidade vitória time visitante provida pela OddsPortal (casa de aposta ingles\n",
262 | "#AvgH = probablidade média de vitória em casa provida pela OddsPortal (casa de aposta inglesa)\n",
263 | "#AvgD = probablidade média de empate provida pela OddsPortal (casa de aposta inglesa)\n",
264 | "#AvgA = probablidade média de vitória pelo time visitante provida pela OddsPortal (casa de aposta inglesa)\n"
265 | ]
266 | },
267 | {
268 | "cell_type": "code",
269 | "execution_count": 3,
270 | "metadata": {},
271 | "outputs": [
272 | {
273 | "name": "stdout",
274 | "output_type": "stream",
275 | "text": [
276 | "Total de jogos: 2279\n",
277 | "Total de colunas: 21\n",
278 | "Total de jogos ganhos em casa: 1133\n",
279 | "Total de jogos ganhos pelo visitante: 553\n",
280 | "Total de jogos empatados: 593\n",
281 | "Percentual de jogos ganhos em casa: 49.71%\n"
282 | ]
283 | }
284 | ],
285 | "source": [
286 | "#Explorando os dados\n",
287 | "#Base de dados de dos campeonatos de 2012 a 2017\n",
288 | "\n",
289 | "matches = data.shape[0]\n",
290 | "\n",
291 | "features = data.shape[1] -1 #Retirando a coluna Resultado\n",
292 | "\n",
293 | "home_win = len(data[data.Res==1])\n",
294 | "away_win = len(data[data.Res==2])\n",
295 | "draw = len(data[data.Res==0])\n",
296 | "val=[home_win,away_win,draw]\n",
297 | "\n",
298 | "win_rate = (float(home_win)/(matches)) *100\n",
299 | "\n",
300 | "print ('Total de jogos: ', matches)\n",
301 | "print ('Total de colunas: ', features)\n",
302 | "print ('Total de jogos ganhos em casa: ', home_win)\n",
303 | "print ('Total de jogos ganhos pelo visitante: ', away_win)\n",
304 | "print ('Total de jogos empatados: ', draw)\n",
305 | "print ('Percentual de jogos ganhos em casa: {:.2f}%'.format( win_rate ))\n",
306 | "\n",
307 | "#Podemos notar que o \"fator casa\" é importante , sendo que em quase 50% do jogos são ganhos pelo time da casa."
308 | ]
309 | },
310 | {
311 | "cell_type": "code",
312 | "execution_count": 4,
313 | "metadata": {},
314 | "outputs": [
315 | {
316 | "data": {
317 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAPk0lEQVR4nO3cf6zddX3H8edrVERlUn5cCGs7i7ObU7cxvEEYc1PqnKCxmMgiM1pZs2YJisoW7cwyMl0WTDa7ETeWRtCyMSZDHfgjKisYUYF4QQSxKg06uIPRa0CmMn+g7/1xPh2H9vbXPbfntv08H8nN+Xw/n8/3fD+n397X+ZzP+X5vqgpJUh9+ZqEHIEkaH0Nfkjpi6EtSRwx9SeqIoS9JHVm00APYlWOOOaaWL1++0MOQpAPKrbfe+u2qmpitbb8O/eXLlzM1NbXQw5CkA0qS/9xZm8s7ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUkf36jtxRLV/38YUewkHrWxe9fKGHIGkOnOlLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjuw39JJcl2ZrkK0N1RyW5Lsnd7fHIVp8kFyfZkuSOJCcN7bO69b87yep983IkSbuyJzP9DwAv265uHbCpqlYAm9o2wBnAivazFrgEBm8SwIXAC4CTgQu3vVFIksZnt6FfVZ8FHtquehWwsZU3AmcN1V9eAzcDi5McD/wucF1VPVRVDwPXseMbiSRpH5vrmv5xVfUAQHs8ttUvAe4b6jfd6nZWv4Mka5NMJZmamZmZ4/AkSbOZ7y9yM0td7aJ+x8qqDVU1WVWTExMT8zo4SerdXEP/wbZsQ3vc2uqngWVD/ZYC9++iXpI0RnMN/WuBbVfgrAauGap/fbuK5xTgkbb88yngpUmObF/gvrTVSZLGaNHuOiS5EngRcEySaQZX4VwEXJVkDXAvcHbr/gngTGAL8ChwLkBVPZTkXcAXW793VtX2Xw5Lkvax3YZ+VZ2zk6aVs/Qt4LydPM9lwGV7NTpJ0rzyjlxJ6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSMjhX6Stya5K8lXklyZ5LAkJyS5JcndST6Y5NDW98lte0trXz4fL0CStOfmHPpJlgDnA5NV9TzgEOA1wLuB9VW1AngYWNN2WQM8XFXPAta3fpKkMRp1eWcR8JQki4CnAg8ApwNXt/aNwFmtvKpt09pXJsmIx5ck7YU5h35V/Rfw18C9DML+EeBW4DtV9VjrNg0saeUlwH1t38da/6PnenxJ0t4bZXnnSAaz9xOAnwOeBpwxS9fatssu2oafd22SqSRTMzMzcx2eJGkWoyzvvAT4ZlXNVNWPgQ8DvwEsbss9AEuB+1t5GlgG0NqPAB7a/kmrakNVTVbV5MTExAjDkyRtb5TQvxc4JclT29r8SuCrwA3Aq1uf1cA1rXxt26a1X19VO8z0JUn7zihr+rcw+EL2NuDO9lwbgLcDFyTZwmDN/tK2y6XA0a3+AmDdCOOWJM3Bot132bmquhC4cLvqe4CTZ+n7A+DsUY4nSRqNd+RKUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpIyOFfpLFSa5O8rUkm5OcmuSoJNclubs9Htn6JsnFSbYkuSPJSfPzEiRJe2rUmf7fAZ+sqmcDvwZsBtYBm6pqBbCpbQOcAaxoP2uBS0Y8tiRpL8059JM8Hfgt4FKAqvpRVX0HWAVsbN02Ame18irg8hq4GVic5Pg5j1yStNdGmek/E5gB3p/kS0nel+RpwHFV9QBAezy29V8C3De0/3Sre4Ika5NMJZmamZkZYXiSpO0tGnHfk4A3VdUtSf6Ox5dyZpNZ6mqHiqoNwAaAycnJHdol7T+Wr/v4Qg/hoPWti16+T553lJn+NDBdVbe07asZvAk8uG3Zpj1uHeq/bGj/pcD9IxxfkrSX5hz6VfXfwH1JfqlVrQS+ClwLrG51q4FrWvla4PXtKp5TgEe2LQNJksZjlOUdgDcBVyQ5FLgHOJfBG8lVSdYA9wJnt76fAM4EtgCPtr6SpDEaKfSr6nZgcpamlbP0LeC8UY4nSRqNd+RKUkcMfUnqiKEvSR0Z9Ytcad54zfe+s6+u+daBx5m+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpIyOHfpJDknwpycfa9glJbklyd5IPJjm01T+5bW9p7ctHPbYkae/Mx0z/zcDmoe13A+uragXwMLCm1a8BHq6qZwHrWz9J0hiNFPpJlgIvB97XtgOcDlzdumwEzmrlVW2b1r6y9ZckjcmoM/2/Bd4G/LRtHw18p6oea9vTwJJWXgLcB9DaH2n9nyDJ2iRTSaZmZmZGHJ4kadicQz/JK4CtVXXrcPUsXWsP2h6vqNpQVZNVNTkxMTHX4UmSZrFohH1PA16Z5EzgMODpDGb+i5MsarP5pcD9rf80sAyYTrIIOAJ4aITjS5L20pxn+lX1p1W1tKqWA68Brq+q1wI3AK9u3VYD17TytW2b1n59Ve0w05ck7Tv74jr9twMXJNnCYM3+0lZ/KXB0q78AWLcPji1J2oVRlnf+X1V9BvhMK98DnDxLnx8AZ8/H8SRJc+MduZLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSR+Yc+kmWJbkhyeYkdyV5c6s/Ksl1Se5uj0e2+iS5OMmWJHckOWm+XoQkac+MMtN/DPjjqvpl4BTgvCTPAdYBm6pqBbCpbQOcAaxoP2uBS0Y4tiRpDuYc+lX1QFXd1srfBTYDS4BVwMbWbSNwViuvAi6vgZuBxUmOn/PIJUl7bV7W9JMsB34duAU4rqoegMEbA3Bs67YEuG9ot+lWt/1zrU0ylWRqZmZmPoYnSWpGDv0khwMfAt5SVf+zq66z1NUOFVUbqmqyqiYnJiZGHZ4kachIoZ/kSQwC/4qq+nCrfnDbsk173Nrqp4FlQ7svBe4f5fiSpL0zytU7AS4FNlfVe4aargVWt/Jq4Jqh+te3q3hOAR7ZtgwkSRqPRSPsexrwOuDOJLe3uncAFwFXJVkD3Auc3do+AZwJbAEeBc4d4diSpDmYc+hX1eeYfZ0eYOUs/Qs4b67HkySNzjtyJakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1JGxh36SlyX5epItSdaN+/iS1LOxhn6SQ4C/B84AngOck+Q54xyDJPVs3DP9k4EtVXVPVf0I+Fdg1ZjHIEndWjTm4y0B7hvangZeMNwhyVpgbdv8XpKvj2lsC+0Y4NsLPYg9lXcv9Aj2CwfMOfN8AQfQ+YKRz9kzdtYw7tDPLHX1hI2qDcCG8Qxn/5FkqqomF3oc2nOeswOL52tg3Ms708Cyoe2lwP1jHoMkdWvcof9FYEWSE5IcCrwGuHbMY5Ckbo11eaeqHkvyRuBTwCHAZVV11zjHsB/rbknrIOA5O7B4voBU1e57SZIOCt6RK0kdMfQlqSOG/jxL8r3ttt+Q5L0LNR7tuSSvSlJJnr3QY9HuJflJktuT3JXky0kuSGKm7Yb/QNLjzgE+x+CqMu3//reqTqyq5wK/A5wJXLh9pyTjvh9pv2boj1GSZyTZlOSO9vjzrf4DSS5JckOSe5L8dpLLkmxO8oGh/V+a5KYktyX5tySHL9iLOci0f8vTgDW00E/yD0le2cofSXJZK69J8pet/O9Jbm2zzbVD7euHnvsPk7xnzC+pK1W1lcGd/G/MwBva78hHgU8nObz9zt2W5M4kqwCSvC3J+a28Psn1rbwyyT8v2Avahwz9+feU9pHz9iS3A+8cansvcHlV/SpwBXDxUNuRwOnAW4GPAuuB5wK/kuTEJMcAfwa8pKpOAqaAC/b9y+nGWcAnq+obwENJTgI+C7ywtS9h8EcCAX4TuLGV/6Cqng9MAucnOZrB35R6ZZIntT7nAu8fw2voWlXdwyDTjm1VpwKrq+p04AfAq9rvzouBv0kSnniOJ4HD23kbPscHFUN//m37yHliVZ0I/PlQ26nAv7TyPzH4j7XNR2tw/eydwINVdWdV/RS4C1gOnMIgdD7f3kxWs4u/r6G9dg6DsKY9nsPgl/6F7S/BfhV4MMnxDM7jF1rf85N8GbiZwd3mK6rq+8D1wCva9wNPqqo7x/dSujb8p16uq6qHhur/KskdwH8weBM/DrgVeH6SnwV+CNzEIPxfyEEa+q51LazhmyR+2B5/OlTetr0I+AmD/8TnjGls3Wiz89OB5yUpBjcOFvA2Bp/AXsZgRngU8HvA96rqu0leBLwEOLWqHk3yGeCw9rTvA94BfA1n+WOR5JkMfk+2tqrvDzW/FpgAnl9VP07yLeCwofK5DN7I72DwSeAXgM1jGvpYOdMfry/w+JeEr2XwpeGeuhk4LcmzAJI8NckvzvP4evVqBstuz6iq5VW1DPgmg09iNwFvYRD6NwJ/wuMzwCOAh1vgP5vBpzEAquoWBjP/3weuHNsr6VSSCeAfgffW7HecHgFsbSH/Yp74KfmzDM7rtnP8R8DtO3meA56hP17nA+e2j5ivA968pztW1QzwBuDKtv/NgJcWzo9zgI9sV/chBoF9I7CoqrYAtzGY7W8L/U8Ci9r5eBeDczLsKuDzVfXwvhp457Z9f3YXgyWbTwN/sZO+VwCTSaYYTLi+NtR2I3A8cFNVPchg/f+gXNoB/wyDtM8k+Riwvqo2LfRYpG2c6UvzLMniJN9g8KW+ga/9ijN9SeqIM31J6oihL0kdMfQlqSOGviR1xNCXpI78HxsTuoenibfwAAAAAElFTkSuQmCC\n",
318 | "text/plain": [
319 | ""
320 | ]
321 | },
322 | "metadata": {
323 | "needs_background": "light"
324 | },
325 | "output_type": "display_data"
326 | }
327 | ],
328 | "source": [
329 | "#Visualizando os dados\n",
330 | "\n",
331 | "x = np.arange(3)\n",
332 | "plt.bar(x, val)\n",
333 | "plt.xticks(x, ('Home', 'Away', 'Draw'))\n",
334 | "plt.show()\n"
335 | ]
336 | },
337 | {
338 | "cell_type": "code",
339 | "execution_count": 5,
340 | "metadata": {},
341 | "outputs": [
342 | {
343 | "data": {
344 | "text/html": [
345 | "\n",
346 | "\n",
359 | "
\n",
360 | " \n",
361 | " \n",
362 | " | \n",
363 | " game_id | \n",
364 | " home_id | \n",
365 | " Away_id | \n",
366 | " HG | \n",
367 | " AG | \n",
368 | " Res | \n",
369 | " PH | \n",
370 | " PD | \n",
371 | " PA | \n",
372 | " MaxH | \n",
373 | " MaxD | \n",
374 | " MaxA | \n",
375 | " AvgH | \n",
376 | " AvgD | \n",
377 | " AvgA | \n",
378 | "
\n",
379 | " \n",
380 | " \n",
381 | " \n",
382 | " | 0 | \n",
383 | " 1 | \n",
384 | " 21 | \n",
385 | " 21 | \n",
386 | " 1 | \n",
387 | " 1 | \n",
388 | " 0 | \n",
389 | " 1.75 | \n",
390 | " 3.86 | \n",
391 | " 5.25 | \n",
392 | " 1.76 | \n",
393 | " 3.87 | \n",
394 | " 5.31 | \n",
395 | " 1.69 | \n",
396 | " 3.50 | \n",
397 | " 4.90 | \n",
398 | "
\n",
399 | " \n",
400 | " | 1 | \n",
401 | " 2 | \n",
402 | " 27 | \n",
403 | " 27 | \n",
404 | " 1 | \n",
405 | " 1 | \n",
406 | " 0 | \n",
407 | " 2.83 | \n",
408 | " 3.39 | \n",
409 | " 2.68 | \n",
410 | " 2.83 | \n",
411 | " 3.42 | \n",
412 | " 2.70 | \n",
413 | " 2.59 | \n",
414 | " 3.23 | \n",
415 | " 2.58 | \n",
416 | "
\n",
417 | " \n",
418 | " | 2 | \n",
419 | " 3 | \n",
420 | " 13 | \n",
421 | " 13 | \n",
422 | " 2 | \n",
423 | " 1 | \n",
424 | " 1 | \n",
425 | " 1.60 | \n",
426 | " 4.04 | \n",
427 | " 6.72 | \n",
428 | " 1.67 | \n",
429 | " 4.05 | \n",
430 | " 7.22 | \n",
431 | " 1.59 | \n",
432 | " 3.67 | \n",
433 | " 5.64 | \n",
434 | "
\n",
435 | " \n",
436 | " | 3 | \n",
437 | " 4 | \n",
438 | " 7 | \n",
439 | " 7 | \n",
440 | " 4 | \n",
441 | " 2 | \n",
442 | " 1 | \n",
443 | " 2.49 | \n",
444 | " 3.35 | \n",
445 | " 3.15 | \n",
446 | " 2.49 | \n",
447 | " 3.39 | \n",
448 | " 3.15 | \n",
449 | " 2.35 | \n",
450 | " 3.26 | \n",
451 | " 2.84 | \n",
452 | "
\n",
453 | " \n",
454 | " | 4 | \n",
455 | " 5 | \n",
456 | " 9 | \n",
457 | " 9 | \n",
458 | " 0 | \n",
459 | " 1 | \n",
460 | " 2 | \n",
461 | " 1.96 | \n",
462 | " 3.53 | \n",
463 | " 4.41 | \n",
464 | " 1.96 | \n",
465 | " 3.53 | \n",
466 | " 4.41 | \n",
467 | " 1.89 | \n",
468 | " 3.33 | \n",
469 | " 3.89 | \n",
470 | "
\n",
471 | " \n",
472 | "
\n",
473 | "
"
474 | ],
475 | "text/plain": [
476 | " game_id home_id Away_id HG AG Res PH PD PA MaxH MaxD MaxA \\\n",
477 | "0 1 21 21 1 1 0 1.75 3.86 5.25 1.76 3.87 5.31 \n",
478 | "1 2 27 27 1 1 0 2.83 3.39 2.68 2.83 3.42 2.70 \n",
479 | "2 3 13 13 2 1 1 1.60 4.04 6.72 1.67 4.05 7.22 \n",
480 | "3 4 7 7 4 2 1 2.49 3.35 3.15 2.49 3.39 3.15 \n",
481 | "4 5 9 9 0 1 2 1.96 3.53 4.41 1.96 3.53 4.41 \n",
482 | "\n",
483 | " AvgH AvgD AvgA \n",
484 | "0 1.69 3.50 4.90 \n",
485 | "1 2.59 3.23 2.58 \n",
486 | "2 1.59 3.67 5.64 \n",
487 | "3 2.35 3.26 2.84 \n",
488 | "4 1.89 3.33 3.89 "
489 | ]
490 | },
491 | "metadata": {},
492 | "output_type": "display_data"
493 | }
494 | ],
495 | "source": [
496 | "#Preparando os dados\n",
497 | "\n",
498 | "\n",
499 | "#Deixar somente as variáveis numericas \n",
500 | "num_data = data.drop(['Country','League','Season','Date','Time','Home','Away'],1)\n",
501 | "\n",
502 | "display(num_data.head())\n"
503 | ]
504 | },
505 | {
506 | "cell_type": "code",
507 | "execution_count": 6,
508 | "metadata": {},
509 | "outputs": [
510 | {
511 | "name": "stdout",
512 | "output_type": "stream",
513 | "text": [
514 | "Features\n",
515 | " game_id home_id Away_id HG AG PH PD PA MaxH MaxD MaxA \\\n",
516 | "0 1 21 21 1 1 1.75 3.86 5.25 1.76 3.87 5.31 \n",
517 | "1 2 27 27 1 1 2.83 3.39 2.68 2.83 3.42 2.70 \n",
518 | "2 3 13 13 2 1 1.60 4.04 6.72 1.67 4.05 7.22 \n",
519 | "3 4 7 7 4 2 2.49 3.35 3.15 2.49 3.39 3.15 \n",
520 | "4 5 9 9 0 1 1.96 3.53 4.41 1.96 3.53 4.41 \n",
521 | "\n",
522 | " AvgH AvgD AvgA \n",
523 | "0 1.69 3.50 4.90 \n",
524 | "1 2.59 3.23 2.58 \n",
525 | "2 1.59 3.67 5.64 \n",
526 | "3 2.35 3.26 2.84 \n",
527 | "4 1.89 3.33 3.89 \n",
528 | "=========\n",
529 | "Labels\n",
530 | "0 0\n",
531 | "1 0\n",
532 | "2 1\n",
533 | "3 1\n",
534 | "4 2\n",
535 | "Name: Res, dtype: int64\n"
536 | ]
537 | }
538 | ],
539 | "source": [
540 | "#separa as features \n",
541 | "features = num_data.drop(['Res'],1)\n",
542 | "\n",
543 | "\n",
544 | "#separa as labels\n",
545 | "labels = num_data['Res']\n",
546 | "\n",
547 | "print('Features')\n",
548 | "print (features.head())\n",
549 | "\n",
550 | "print ('=========')\n",
551 | "\n",
552 | "print ('Labels')\n",
553 | "print (labels.head())"
554 | ]
555 | },
556 | {
557 | "cell_type": "code",
558 | "execution_count": 7,
559 | "metadata": {},
560 | "outputs": [
561 | {
562 | "name": "stdout",
563 | "output_type": "stream",
564 | "text": [
565 | "\n",
566 | "Melhores features:\n",
567 | "{'PA': 740.4065621193932, 'MaxH': 731.7074700471329, 'MaxD': 81.51748572743963, 'AvgD': 80.67054047107476, 'AvgH': 63.290872018071454, 'MaxA': 40.62018755299711, 'AvgA': 40.54491193487282, 'AG': 0.6533077251586757, 'PD': 0.027363118243961364, 'PH': 0.027363118243961364}\n"
568 | ]
569 | }
570 | ],
571 | "source": [
572 | "#Escoolhendo as melhores features com Kbest\n",
573 | "\n",
574 | "features_list = ('HG','AG','PH','PD','PA','MaxH','MaxD','MaxA','AvgH','AvgD','AvgA')\n",
575 | "\n",
576 | "k_best_features = SelectKBest(k='all')\n",
577 | "k_best_features.fit_transform(features, labels)\n",
578 | "k_best_features_scores = k_best_features.scores_\n",
579 | "raw_pairs = zip(features_list[1:], k_best_features_scores)\n",
580 | "ordered_pairs = list(reversed(sorted(raw_pairs, key=lambda x: x[1])))\n",
581 | "\n",
582 | "k_best_features_final = dict(ordered_pairs[:15])\n",
583 | "best_features = k_best_features_final.keys()\n",
584 | "print ('')\n",
585 | "print (\"Melhores features:\")\n",
586 | "print (k_best_features_final)"
587 | ]
588 | },
589 | {
590 | "cell_type": "code",
591 | "execution_count": 8,
592 | "metadata": {},
593 | "outputs": [
594 | {
595 | "name": "stdout",
596 | "output_type": "stream",
597 | "text": [
598 | "Features\n",
599 | " HG PA MaxH MaxD MaxA AvgH AvgD AvgA\n",
600 | "0 1 5.25 1.76 3.87 5.31 1.69 3.50 4.90\n",
601 | "1 1 2.68 2.83 3.42 2.70 2.59 3.23 2.58\n",
602 | "2 2 6.72 1.67 4.05 7.22 1.59 3.67 5.64\n",
603 | "3 4 3.15 2.49 3.39 3.15 2.35 3.26 2.84\n",
604 | "4 0 4.41 1.96 3.53 4.41 1.89 3.33 3.89\n",
605 | "=========\n",
606 | "Labels\n",
607 | "0 0\n",
608 | "1 0\n",
609 | "2 1\n",
610 | "3 1\n",
611 | "4 2\n",
612 | "Name: Res, dtype: int64\n"
613 | ]
614 | }
615 | ],
616 | "source": [
617 | "#separa as features com base nas melhores features para treinamento\n",
618 | "features = num_data.drop(['Res','game_id','home_id','Away_id', 'AG','PD','PH'],1)\n",
619 | "\n",
620 | "\n",
621 | "#separa as labels para treinamento\n",
622 | "labels = num_data['Res']\n",
623 | "\n",
624 | "print('Features')\n",
625 | "print (features.head())\n",
626 | "\n",
627 | "print ('=========')\n",
628 | "\n",
629 | "print ('Labels')\n",
630 | "print (labels.head())\n"
631 | ]
632 | },
633 | {
634 | "cell_type": "code",
635 | "execution_count": 9,
636 | "metadata": {},
637 | "outputs": [
638 | {
639 | "name": "stdout",
640 | "output_type": "stream",
641 | "text": [
642 | "Features: (2279, 8)\n",
643 | "[[0.16666667 0.21853547 0.07060334 ... 0.08239095 0.15945946 0.26920093]\n",
644 | " [0.16666667 0.0715103 0.20795892 ... 0.22778675 0.08648649 0.08921645]\n",
645 | " [0.33333333 0.30263158 0.05905006 ... 0.06623586 0.20540541 0.32660978]\n",
646 | " ...\n",
647 | " [0.16666667 0.41647597 0.02695764 ... 0.03069467 0.50540541 0.48875097]\n",
648 | " [0.33333333 0.50171625 0.02439024 ... 0.02423263 0.51351351 0.60822343]\n",
649 | " [0.16666667 0.10526316 0.13863928 ... 0.15831987 0.13783784 0.14041893]]\n"
650 | ]
651 | }
652 | ],
653 | "source": [
654 | "# Normalizando os dados de entrada(features)\n",
655 | "\n",
656 | "# Gerando o novo padrão\n",
657 | "scaler = MinMaxScaler().fit(features)\n",
658 | "features_scale = scaler.transform(features)\n",
659 | "\n",
660 | "print ('Features: ',features_scale.shape)\n",
661 | "print (features_scale)"
662 | ]
663 | },
664 | {
665 | "cell_type": "code",
666 | "execution_count": 10,
667 | "metadata": {},
668 | "outputs": [
669 | {
670 | "name": "stdout",
671 | "output_type": "stream",
672 | "text": [
673 | "1932 1932\n",
674 | "223 223\n"
675 | ]
676 | }
677 | ],
678 | "source": [
679 | "#Separa em treinamento e teste\n",
680 | "#Separação manual para manter a ordem cronológica, uma vez que temos informação temporal. \n",
681 | "#Treino linhas [:1932]\n",
682 | "#Teste linhas [1932:2155]\n",
683 | "#previsão linhas [2155:2280]\n",
684 | "\n",
685 | "\n",
686 | "X_train = features_scale[:1932]\n",
687 | "X_test = features_scale[1932:2155]\n",
688 | "y_train = labels[:1932]\n",
689 | "y_test = labels[1932:2155]\n",
690 | "\n",
691 | "print( len(X_train), len(y_train))\n",
692 | "\n",
693 | "print( len(X_test), len(y_test))\n"
694 | ]
695 | },
696 | {
697 | "cell_type": "code",
698 | "execution_count": 11,
699 | "metadata": {},
700 | "outputs": [
701 | {
702 | "name": "stdout",
703 | "output_type": "stream",
704 | "text": [
705 | "LogisticRegression\n",
706 | "Acurácia LogisticRegression:0.57847533632287\n",
707 | "F1 Score:0.57847533632287\n"
708 | ]
709 | }
710 | ],
711 | "source": [
712 | "#Treinando e testando os modelos\n",
713 | "print ('LogisticRegression')\n",
714 | "\n",
715 | "\n",
716 | "clf_LR = LogisticRegression(multi_class='multinomial',max_iter=2000)\n",
717 | "clf_LR.fit(X_train, y_train)\n",
718 | "pred= clf_LR.predict(X_test)\n",
719 | "\n",
720 | "lg_acc = accuracy_score(y_test, pred)\n",
721 | "f1=f1_score(y_test,pred,average = 'micro')\n",
722 | "print ('Acurácia LogisticRegression:{}'.format(lg_acc))\n",
723 | "print ('F1 Score:{}'.format(f1) )"
724 | ]
725 | },
726 | {
727 | "cell_type": "code",
728 | "execution_count": 12,
729 | "metadata": {},
730 | "outputs": [
731 | {
732 | "name": "stdout",
733 | "output_type": "stream",
734 | "text": [
735 | "Acurácia LogisticRegression:0.57847533632287\n",
736 | "F1 Score:0.5282488231236308\n",
737 | "LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,\n",
738 | " intercept_scaling=1, l1_ratio=None, max_iter=1000,\n",
739 | " multi_class='auto', n_jobs=None, penalty='l2',\n",
740 | " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
741 | " warm_start=False)\n"
742 | ]
743 | }
744 | ],
745 | "source": [
746 | "#Testando LogistRegression hyper parameters\n",
747 | "\n",
748 | "param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000] }\n",
749 | "\n",
750 | "search = GridSearchCV(LogisticRegression(max_iter=1000), param_grid)\n",
751 | "\n",
752 | "search.fit(X_train,y_train)\n",
753 | "clf = search.best_estimator_\n",
754 | "pred= clf.predict(X_test)\n",
755 | "lg_acc = accuracy_score(y_test, pred)\n",
756 | "\n",
757 | "\n",
758 | "f1=f1_score(y_test,pred,average = 'macro')\n",
759 | "\n",
760 | "print ('Acurácia LogisticRegression:{}'.format(lg_acc))\n",
761 | "print ('F1 Score:{}'.format(f1) )\n",
762 | "\n",
763 | "print (clf)"
764 | ]
765 | },
766 | {
767 | "cell_type": "code",
768 | "execution_count": 13,
769 | "metadata": {},
770 | "outputs": [
771 | {
772 | "name": "stdout",
773 | "output_type": "stream",
774 | "text": [
775 | "SVC\n",
776 | "Acurácia SVC:0.57847533632287\n",
777 | "F1 Score:0.57847533632287\n"
778 | ]
779 | }
780 | ],
781 | "source": [
782 | "#Treinando e testando os modelos\n",
783 | "print ('SVC')\n",
784 | "\n",
785 | "\n",
786 | "clf = SVC()\n",
787 | "clf.fit(X_train, y_train)\n",
788 | "pred= clf.predict(X_test)\n",
789 | "\n",
790 | "svc_acc = accuracy_score(y_test, pred)\n",
791 | "f1=f1_score(y_test,pred, average='micro')\n",
792 | "print ('Acurácia SVC:{}'.format(svc_acc))\n",
793 | "print ('F1 Score:{}'.format(f1) )"
794 | ]
795 | },
796 | {
797 | "cell_type": "code",
798 | "execution_count": 14,
799 | "metadata": {},
800 | "outputs": [
801 | {
802 | "name": "stdout",
803 | "output_type": "stream",
804 | "text": [
805 | "F1 Score:0.5919282511210763\n",
806 | "Acurácia LogisticRegression:0.5919282511210763\n",
807 | "SVC(C=100, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,\n",
808 | " decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',\n",
809 | " max_iter=-1, probability=False, random_state=None, shrinking=True,\n",
810 | " tol=0.001, verbose=False)\n"
811 | ]
812 | }
813 | ],
814 | "source": [
815 | "#Testando SVC hyper parameters\n",
816 | "\n",
817 | "param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000] }\n",
818 | "\n",
819 | "search = GridSearchCV(SVC(), param_grid)\n",
820 | "\n",
821 | "search.fit(X_train,y_train)\n",
822 | "clf_SVC = search.best_estimator_\n",
823 | "pred= clf_SVC.predict(X_test)\n",
824 | "acc = accuracy_score(y_test, pred)\n",
825 | "\n",
826 | "\n",
827 | "f1=f1_score(y_test,pred,average = 'micro')\n",
828 | "\n",
829 | "print ('F1 Score:{}'.format(f1))\n",
830 | "\n",
831 | "print ('Acurácia LogisticRegression:{}'.format(acc))\n",
832 | "\n",
833 | "print(clf_SVC)"
834 | ]
835 | },
836 | {
837 | "cell_type": "code",
838 | "execution_count": 15,
839 | "metadata": {},
840 | "outputs": [
841 | {
842 | "name": "stdout",
843 | "output_type": "stream",
844 | "text": [
845 | "Decision Tree\n",
846 | "Acurácia Tree:0.45739910313901344\n",
847 | "F1 Score:0.42806053301191915\n"
848 | ]
849 | }
850 | ],
851 | "source": [
852 | "#Treinando e testando os modelos\n",
853 | "print ('Decision Tree')\n",
854 | "\n",
855 | "\n",
856 | "clf = DecisionTreeClassifier()\n",
857 | "clf.fit(X_train, y_train)\n",
858 | "pred= clf.predict(X_test)\n",
859 | "\n",
860 | "dt_acc = accuracy_score(y_test, pred)\n",
861 | "f1=f1_score(y_test,pred, average='macro')\n",
862 | "print ('Acurácia Tree:{}'.format(dt_acc))\n",
863 | "print ('F1 Score:{}'.format(f1) )\n",
864 | "\n",
865 | "\n",
866 | "n_estimators = [10, 50, 100, 200]\n",
867 | "max_depth = [3, 10, 20, 40]"
868 | ]
869 | },
870 | {
871 | "cell_type": "code",
872 | "execution_count": 16,
873 | "metadata": {},
874 | "outputs": [
875 | {
876 | "name": "stdout",
877 | "output_type": "stream",
878 | "text": [
879 | "Decision Tree\n",
880 | "Acurácia Decision Tree:0.47533632286995514\n",
881 | "F1 Score:0.47533632286995514\n",
882 | "DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',\n",
883 | " max_depth=80, max_features=3, max_leaf_nodes=None,\n",
884 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
885 | " min_samples_leaf=5, min_samples_split=12,\n",
886 | " min_weight_fraction_leaf=0.0, presort='deprecated',\n",
887 | " random_state=None, splitter='best')\n"
888 | ]
889 | }
890 | ],
891 | "source": [
892 | "#Testando Decision tree hyper parameters\n",
893 | "print ('Decision Tree')\n",
894 | "\n",
895 | "param_grid = {\n",
896 | " 'max_depth': [80, 90, 100, 110],\n",
897 | " 'max_features': [2, 3],\n",
898 | " 'min_samples_leaf': [3, 4, 5],\n",
899 | " 'min_samples_split': [8, 10, 12]\n",
900 | " \n",
901 | " \n",
902 | "}\n",
903 | "\n",
904 | "search = GridSearchCV(DecisionTreeClassifier(), param_grid)\n",
905 | "\n",
906 | "search.fit(X_train,y_train)\n",
907 | "clf = search.best_estimator_\n",
908 | "pred= clf.predict(X_test)\n",
909 | "dt_acc = accuracy_score(y_test, pred)\n",
910 | "\n",
911 | "\n",
912 | "f1=f1_score(y_test,pred,average = 'micro')\n",
913 | "\n",
914 | "print ('Acurácia Decision Tree:{}'.format(dt_acc))\n",
915 | "print ('F1 Score:{}'.format(f1) )\n",
916 | "\n",
917 | "print (clf)"
918 | ]
919 | },
920 | {
921 | "cell_type": "code",
922 | "execution_count": 17,
923 | "metadata": {},
924 | "outputs": [
925 | {
926 | "name": "stdout",
927 | "output_type": "stream",
928 | "text": [
929 | "Naive baeys\n",
930 | "Acurácia Naive baeys:0.5201793721973094\n",
931 | "F1 Score:0.5201793721973094\n"
932 | ]
933 | }
934 | ],
935 | "source": [
936 | "#Treinando e testando os modelos\n",
937 | "print ('Naive baeys')\n",
938 | "\n",
939 | "\n",
940 | "clf = GaussianNB()\n",
941 | "clf.fit(X_train, y_train)\n",
942 | "pred= clf.predict(X_test)\n",
943 | "\n",
944 | "nb_acc = accuracy_score(y_test, pred)\n",
945 | "f1=f1_score(y_test,pred, average='micro')\n",
946 | "print ('Acurácia Naive baeys:{}'.format(nb_acc))\n",
947 | "print ('F1 Score:{}'.format(f1) )"
948 | ]
949 | },
950 | {
951 | "cell_type": "code",
952 | "execution_count": 23,
953 | "metadata": {},
954 | "outputs": [
955 | {
956 | "name": "stdout",
957 | "output_type": "stream",
958 | "text": [
959 | " real previsao game_id\n",
960 | "2155 1 1 2156\n",
961 | "2156 1 1 2157\n",
962 | "2157 2 2 2158\n",
963 | "2158 1 1 2159\n",
964 | "2159 1 1 2160\n",
965 | "... ... ... ...\n",
966 | "2274 0 0 2275\n",
967 | "2275 0 0 2276\n",
968 | "2276 1 1 2277\n",
969 | "2277 1 1 2278\n",
970 | "2278 2 1 2279\n",
971 | "\n",
972 | "[124 rows x 3 columns]\n"
973 | ]
974 | }
975 | ],
976 | "source": [
977 | "#Executando a previsao\n",
978 | "\n",
979 | "previsao=features_scale[2155:]\n",
980 | "\n",
981 | "game_id_full=data['game_id']\n",
982 | "game_id=game_id_full[2155:]\n",
983 | "\n",
984 | "res_full=data['Res']\n",
985 | "res=res_full[2155:]\n",
986 | "\n",
987 | "\n",
988 | "pred=clf_SVC.predict(previsao)\n",
989 | "\n",
990 | "df=pd.DataFrame({'real': res, 'previsao':pred, 'game_id':game_id})\n",
991 | "\n",
992 | "print(df)"
993 | ]
994 | },
995 | {
996 | "cell_type": "code",
997 | "execution_count": 24,
998 | "metadata": {},
999 | "outputs": [
1000 | {
1001 | "data": {
1002 | "text/plain": [
1003 | ""
1004 | ]
1005 | },
1006 | "execution_count": 24,
1007 | "metadata": {},
1008 | "output_type": "execute_result"
1009 | },
1010 | {
1011 | "data": {
1012 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWgAAAEGCAYAAABIGw//AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAYbUlEQVR4nO3deZhU5Zn+8e/dTTNIQASVFmWRKCouqHGPjls0P00kopFofk6CEe04CYlLFrcrjppEcU80cQhxw2gUNXFUdIgOSiBGEXDBhUTcJSK4oSwi0DzzRx2dlkBXNV3V5+3q++N1rqo6VXXqqbram7eec95TigjMzCw9NXkXYGZma+aANjNLlAPazCxRDmgzs0Q5oM3MEtUp7wLW5vV3P/LhJRU2ZtqreZdQ9U7eZ2DeJXQIvbvXqbXbWG/nUSVnzodP/KrVr1cKj6DNzBKV7AjazKxNKb3xqgPazAygpjbvCv6JA9rMDEBt0lZuEQe0mRm4xWFmliyPoM3MEuURtJlZojyCNjNLlI/iMDNLlFscZmaJcovDzCxRHkGbmSXKAW1mlqha7yQ0M0uTe9BmZolyi8PMLFEeQZuZJcojaDOzRHkEbWaWqDJO9Zb0CrAIaARWRsSuknoB44HNgVeAr0XEe82WVLaKzMzaM9WUvpTmgIjYKSJ2zW6fAUyKiEHApOx2sxzQZmZQaHGUuqybw4Fx2fVxwLBiT3BAm5lBuUfQAdwvaaakhmxdfUTMA8guexfbiHvQZmbQoqM4stBtaLJqbESMbXJ774h4Q1Jv4AFJf1uXkhzQZmbQop2EWRiPbeb+N7LLBZLuBHYH5kvqExHzJPUBFhQtqeSKzMyqWZl60JI+I6n7x9eBLwLPAHcDI7KHjQDuKlaSR9BmZlDOiSr1wJ0qBHkn4PcRMVHSdOA2SSOB14DhxTbkgDYzg7JNVImIl4Ad17D+HeALLdmWA9rMDJBnEpqZpckBbWaWKNU4oKvS4kUfcNmF5/LKiy8giR+efT7b7vBPLShrgaXvvcX0m69g2QfvoRoxcK9DGLTfVwB4Yco9vDD1Xmpqa9hk290Y8pVv5VxtdWlsbOTEbxzNRr17c/Evrs67nDbjEXSV+vUVF7HbnnvzHxdczooVK/ho2Yd5l9TuqaaWIYcfT89+W7Ji2VImXXYq9VvvxLJFC3njmWkcfPpV1HaqY9mihXmXWnVuv+UmBgz8LEuWLM67lDaVYkD7OOhWWrJkMU8/OZNDhx4JQF1dHd26r59zVe3fej160bPflgDUdelK9/p+fPj+O7z08H1s/YWjqO1UB0CX7hvkWWbVWTD/TR55eAqHDftq3qW0OUklL22lYiNoSdtQODnIZhTmpb8B3B0Rsyv1mnmY94+59NigF5f87Ce8OOd5ttpmMN859XTWW69r3qVVjSXvzGfh3BfpNWBrZt11PW+/9CzP3Ps7auvqGHL48fTqv1XeJVaNKy+7iO98/zSWLlmSdyltL70BdGVG0JJOB26l8JYfA6Zn12+RVPQUe+1JY2Mjc56fzdAjv8ZvbryNLuutx603Xpd3WVVj5Ucf8sj1F7LTESdS16UrsaqRFUsXc+CplzLkK8fz6A0XERF5l1kVHp46mZ69erH14O3yLiUXKY6gK9XiGAnsFhGjI+KmbBlNYT76yLU9SVKDpBmSZtw87poKlVZeG/euZ+ON6xm83RAA9j3gYOY8X1VfEnKzqnElj1x3If132Z/Ndvw8AOttsBGbDvk8kug1YCukGpYv+SDnSqvD0089wcNTJjN86Bc59+wf8fj0xzj/J6fnXVabqampKXlpK5VqcawCNgVeXW19n+y+NWp6ApLX3/2oXQyLem24ERvX1/P6qy/Tb8BAHp8xjQGbfzbvstq9iGDGLVfSvb4fWx3wf6fN3XSHPXlrzlP0HrQDixb8g1WNK+n8Gff8y+GkUady0qhTAXhixmPcctMNnPPTi3Kuqu2kuJOwUgF9CjBJ0hzg9Wxdf2BLYFSFXjM3o047kwvPPZMVK1bQZ7O+/Ojsn+ZdUrv3zsvP8dqMh+jRZ3MeuPj7AGx/2DcZuMdBzLjlSu4f/V1qOnVit/9/SpL/Y1k7lOCfkSrVv5NUQ6GlsRmFtz4XmB4RjaU8v72MoNuzMdNW/4Jj5XbyPgPzLqFD6N29rtXxutFxt5acOW/fcEybxHnFjuKIiFXAo5XavplZOaX4TcwTVczM8FRvM7NkeQRtZpYoB7SZWaIc0GZmiXJAm5mlKr18dkCbmQFtOoW7VA5oMzPc4jAzS1d6+eyANjMDj6DNzJLlgDYzS5QD2swsUT4Xh5lZojyCNjNLlAPazCxRCeazA9rMDNIcQac3t9HMLAc1NSp5KYWkWklPSJqQ3R4oaZqkOZLGS+pctKZWviczs6oglb6U6GRgdpPbFwFXRMQg4D1gZLENOKDNzCjvCFpSX+DLwDXZbQEHAndkDxkHDCta0zq/GzOzKtKSEbSkBkkzmiwNq23uF8CPgVXZ7Q2BhRGxMrs9F9isWE3eSWhmRst2EkbEWGDsWrZzGLAgImZK2v/j1WvaTLHXcUCbmVHWw+z2Br4i6UtAF2B9CiPqDSR1ykbRfYE3im3ILQ4zMwon7C91aU5EnBkRfSNic+AY4MGIOBZ4CDgqe9gI4K6iNbXuLZmZVYcKHMWxutOB0yS9QKEnfW2xJ7jFYWZGZSaqRMRkYHJ2/SVg95Y83wFtZoanepuZJSvFqd4OaDMzPII2M0tWqefYaEvJBnSvbkXPI2KtdPlZV+ZdQtU7a9pVeZdgJXKLw8wsUQnmswPazAw8gjYzS1aC+eyANjMD7yQ0M0uWWxxmZolyQJuZJSrBfHZAm5mBR9BmZslKMJ8d0GZm4KM4zMySVZPgENoBbWaGWxxmZsnyTkIzs0Ql2IJ2QJuZgXcSmpklSzigzcySlOAA2gFtZgbeSWhmlqwE89kBbWYGnqhiZpYsH8VhZpaoBAfQDmgzM3CLw8wsWenFswPazAxI8zC7mrwLMDNLQY1KX5ojqYukxyQ9JelZSedl6wdKmiZpjqTxkjoXrak8b83MrH2rqVHJSxEfAQdGxI7ATsAhkvYELgKuiIhBwHvAyKI1tfI9mZlVBUklL82JgsXZzbpsCeBA4I5s/ThgWLGaHNBmZrSsxSGpQdKMJktD021JqpX0JLAAeAB4EVgYESuzh8wFNitWk3cSmpnRsp2EETEWGNvM/Y3ATpI2AO4EBq/pYcVep9mAlnRkkSL/WOwFzMzag0ocwxERCyVNBvYENpDUKRtF9wXeKPb8YiPooc29NuCANrOqUFumqd6SNgZWZOG8HnAQhR2EDwFHAbcCI4C7im2r2YCOiG+1vtzqd+5PzmLqlMn06rUht995T97lVJW/3Xsei5Z8ROOqVaxsXMU+x14MwL8fsx8nHb0vKxtXMXHqM5z9y6J/61ZER/87LuNx0H2AcZJqKeznuy0iJkh6DrhV0s+AJ4Bri22o5B60pC8D2wFdPl4XEee3tPJqNPTwIzj668dyztln5F1KVTqk4Ze8s3DJJ7f33XUQh+2/A7t97UKWr1jJxj275Vhd9ejof8flyueImAXsvIb1LwG7t2RbJR3FIWkMcDTwPQqtmuHAgJa8UDXbZdfd6NGjR95ldBgNw/+VS69/gOUrCjvE33pvcZFnWCk6+t9xjVTy0mY1lfi4z0fEN4H3IuI8YC+g37q8oCS3TaxkEcE9V4/i4Zt/zPFH7g3AlgN6s/fOWzDlxh9y/zUns8u2/XOu0qqBVPrSVkptcXyYXS6VtCnwDjBwHV/zPOD6Nd2RHUvYAHDlr8dw/AkNa3qYdSAHfusK5r31Phv37MaEMaP4+ytv0qm2hp7rd2Xfb17KrtsN4KaLj2fwYefmXaq1cymei6PUgJ6QHc93CfA4hSM4rlnbgyXNWttdQP3antf02MIly6PoMYJW/ea99T5QaGPc/eAsdttuc/4xfyH/NekpAGY8+yqrVgUb9ezG2251WCvUtteAjoifZlf/IGkC0CUi3m/mKfXA/6Mw37wpAX9tcZXWIXXt0pmaGrF46Ud07dKZg/bahgvG/jeLP/yI/Xffiqkz57Bl/950ruvkcLZWS/AHVUoLaEldgR8A/SPiREn9Jf1rRExYy1MmAN0i4sk1bGvyOlebqDN/fBozp09n4cL3OOQL+3HSd7/HsCOPyrusdq/3ht0Zf/mJAHSqrWX8f8/ggb/Opq5TLb8591hm3H4Wy1c0csI5v8u50urQ0f+OUwxoRQmdBEnjgZnANyNi++zg60ciYqdKFeYWR+VttMf38i6h6r097aq8S+gQPtO59f2JH9zz95Iz57KhW7dJnJd6FMcWEXExsAIgIj4kzR8gMDNbJ+U6H3Q5lbqTcHk2ag4ASVtQOOepmVlVSHAfYfGAVuHYkzHARKCfpJuBvYHjKluamVnb6ZRgQhcN6IgISScDX6RwRiYBJ0fE25UuzsysrSSYzyW3OB4FPhsR91ayGDOzvLTlFO5SlRrQBwDflvQqsITCKDoiYkjFKjMza0MJ5nPJAX1oRaswM8tZisdBlzqT8NVKF2JmlqdynbC/nPybhGZmtOMRtJlZtVOCc+8c0GZmeARtZpYsB7SZWaLa8wn7zcyqWm2pp45rQw5oMzPa90xCM7Oq5h60mVmiEhxAO6DNzABqfBy0mVmaPII2M0tUpwSb0A5oMzM8gjYzS5YPszMzS1SC+UyCc2fMzNpeTQuW5kjqJ+khSbMlPZv9piuSekl6QNKc7LJnKTWZmXV4NVLJSxErgR9ExGAKP7T9XUnbAmcAkyJiEDApu918Ta18T2ZmVaFcAR0R8yLi8ez6ImA2sBlwODAue9g4YFjRmlr1jszMqoRaskgNkmY0WRrWuE1pc2BnYBpQHxHzoBDiQO9iNXknoZkZLdtJGBFjgbHNb0/dgD8Ap0TEB+tyOlMHtJkZ5T0ftKQ6CuF8c0T8MVs9X1KfiJgnqQ+woNh23OIwM6OsR3EIuBaYHRGXN7nrbmBEdn0EcFexmjyCNjOjrBNV9ga+ATwt6cls3VnAaOA2SSOB14DhxTakiChXUWW1bCVpFlZFJj73Zt4lVL3+PbrmXUKH8LkB67c6Xe94al7JmXPUjn3aZFqLR9BmZqTZ73VAm5nhH401M0tWevHsgDYzA6DWI2gzszQlmM8OaDMzACXY5HBAm5nhEbSZWbL8q95mZonyCNrMLFH+TUIzs0TVpJfPDmgzM/BRHGZmyUqww+GANjMDj6DNzJLlHrSZWaJ8FIeZWaLSi2cHtJkZ4BG0mVmy0otnB7SZWUGCCe2ANjPDLQ4zs2SlF88OaDOzggQT2gFtZoZnEpqZJSvBFrQD2swMkuxwOKDNzACU4BDaAW1mhlscZmbJSjCfHdBmZkCSCV2TdwFmZilQC/4rui3pOkkLJD3TZF0vSQ9ImpNd9iy2HY+gy+DhqVO4aPTPWdW4iiO+OpyRJzbkXVJVuP3q0cye+QjdevTktMtvAOCNl+fwx99ezsrly6mpreWIE06l36DB+Rbajo257HyeePQvrL9BTy757XgAbh77Sx5/dCq1dXXU9+nLST88h890655zpZVX5h70DcCvgBubrDsDmBQRoyWdkd0+vbmNeATdSo2NjVzw8/O5esw13Hn3vUy8bwIvvvBC3mVVhV32P5SRZ1/yqXX33TSGg4aP4JRLr+WLRx/PfTeNyam66rDfwYdxxgVXfmrdDp/bg4t/eysX/+YW+vTtz1233pBPcW1MKn0pJiKmAO+utvpwYFx2fRwwrNh2HNCt9MzTs+jXbwB9+/WjrnNnDvnSl5n80KS8y6oKn912R9ZbfeQm8dHSpQAsW7qY9XtumENl1WPwkM/Rrfv6n1o3ZNc9qa0tfLketM32vPvW/DxKa3MtaXFIapA0o8lSytfm+oiYB5Bd9i72hIq1OCRtA2wGTIuIxU3WHxIREyv1um1twfz5bNJnk09u966v5+lZs3KsqLoNPW4U1/7sR9z7u6uJVcF3fv7rvEuqapP/dDd77ndw3mW0iZa0OCJiLDC2YsVkKjKClvR94C7ge8Azkg5vcvcFlXjNvATxT+tSPOC9Wjx6/10MPW4UZ425g8OO+y53/OfFeZdUte78/XXU1HZiny8cmncpbUItWNbRfEl9ALLLBcWeUKkWx4nALhExDNgf+Imkk7P71vr+mn5tuPa3Ff/HqSzq6zfhzXlvfnJ7wfz59O5d9JuLraOZk//E9nvsC8CQvQ7g9Rdm51xRdfrz/RN4YtpfGHXGTzvOgKPyCX03MCK7PoLCILZZlWpx1H7c1oiIVyTtD9whaQDNvL2mXxuWrVzD0DRB222/A6+99gpz575Ofe96Jt53LxdeclneZVWt9XttyEvPPckW2+3Mi888zkab9M27pKrz5PS/cs9tN3LOpb/hX7p0ybucNlPOE/ZLuoXC4HQjSXOB/wBGA7dJGgm8Bgwvup2I8uegpAeB0yLiySbrOgHXAcdGRG2xbbSXgAaYOuXPXDz6AlatamTYEV/lxG//e94llWTic28Wf1COfv+L83jp2SdZsuh9uvXoxcFf+xYbb9qPe66/ilWrGulU15lhJ5xK3y22zrvUterfo2veJTTrygvOZvasmSx6fyE9em7IUd9o4K7xN7Bi+XK6r98DgC0H78AJJ5+Zc6XN+9yA9Vudrs+/ubTkzNlqk65t8rWiUgHdF1gZEf+UAJL2joiHi22jPQV0e5V6QFeD1AO6WpQloOe3IKDr2yagK9LiiIi5zdxXNJzNzNqaT9hvZpaoFPeFOqDNzEjyXEkOaDMzSHP+ggPazAy3OMzMkpVgPjugzcyAJBPaAW1mhg+zMzNLlnvQZmaJqnFAm5mlKr2EdkCbmeEWh5lZshLMZwe0mRl4BG1mlixP9TYzS1R68eyANjMD3OIwM0uWZxKamaUqvXx2QJuZQZL57IA2MwOoSbAJ7YA2MyPNnYQ1eRdgZmZr5hG0mRlpjqAd0GZm+DA7M7NkeQRtZpYoB7SZWaLc4jAzS1SKI2gfZmdmRmEmYalL0W1Jh0j6u6QXJJ2xrjU5oM3MoGwJLakW+DVwKLAt8HVJ265LSW5xmJlR1qneuwMvRMRLAJJuBQ4HnmvphpIN6C6dEuzYFyGpISLG5l1HqYYN2STvElqsvX3G7VFH/YxbkjmSGoCGJqvGNvnMNgNeb3LfXGCPdanJLY7yaij+EGslf8aV58+4iIgYGxG7Nlma/oO2pqCPdXkdB7SZWXnNBfo1ud0XeGNdNuSANjMrr+nAIEkDJXUGjgHuXpcNJduDbqc6XN8uB/6MK8+fcStExEpJo4A/AbXAdRHx7LpsSxHr1BoxM7MKc4vDzCxRDmgzs0Q5oMugXNM6be0kXSdpgaRn8q6lWknqJ+khSbMlPSvp5Lxr6ujcg26lbFrn88DBFA6vmQ58PSJaPGvI1k7SvsBi4MaI2D7veqqRpD5An4h4XFJ3YCYwzH/L+fEIuvU+mdYZEcuBj6d1WhlFxBTg3bzrqGYRMS8iHs+uLwJmU5gVZzlxQLfemqZ1+o/a2jVJmwM7A9PyraRjc0C3XtmmdZqlQFI34A/AKRHxQd71dGQO6NYr27ROs7xJqqMQzjdHxB/zrqejc0C3XtmmdZrlSZKAa4HZEXF53vWYA7rVImIl8PG0ztnAbes6rdPWTtItwCPA1pLmShqZd01VaG/gG8CBkp7Mli/lXVRH5sPszMwS5RG0mVmiHNBmZolyQJuZJcoBbWaWKAe0mVmiHNDWLkk6X9JBeddhVkk+zM5yJ6k2IhrzrsMsNR5BW0VJ2lzS3ySNkzRL0h2Sukp6RdI5kv4CDJe0haSJkmZKmippG0k9ssfVZNvqKul1SXWSbpB0VLZ+tKTnsu1fmq0bKmmapCck/Y+k+mx9L0n/lT32UUlDcvtwzIrwj8ZaW9gaGBkRD0u6DvhOtn5ZROwDIGkScFJEzJG0B3B1RBwo6SlgP+AhYCjwp4hYUZiVXAhc4Ahgm4gISRtk2/4LsGe27gTgx8APgPOAJyJimKQDgRuBnSr/EZi1nAPa2sLrEfFwdv0m4PvZ9fHwydnTPg/c/nHwAv/S5DFHUwjoY4CrV9v2B8Ay4BpJ9wITsvV9gfHZSeg7Ay9n6/cBvgoQEQ9K2lBSj4h4vxxv1Kyc3OKwtrD6jo6Pby/JLmuAhRGxU5NlcHbf3cCh2Uh5F+DBT22ocC6U3SmcgW0YMDG76yrgVxGxA/BtoEu23qeHtXbDAW1tob+kvbLrX6fQfvhEds7hlyUNh8JZ1STtmN23GHgM+CUwYfWdidnou0dE3Aecwv+1K3oA/8iuj2jylCnAsdlz9wfe9jmPLVUOaGsLs4ERkmYBvYD/XMNjjgVGZj3nZ/n0z4aNB/4tu1xdd2BCtu0/A6dm68+l0DKZCrzd5PHnArtmjx/Np8PbLCk+zM4qKvvppAn+oVezlvMI2swsUR5Bm5klyiNoM7NEOaDNzBLlgDYzS5QD2swsUQ5oM7NE/S9c90U0gg6p+gAAAABJRU5ErkJggg==\n",
1013 | "text/plain": [
1014 | ""
1015 | ]
1016 | },
1017 | "metadata": {
1018 | "needs_background": "light"
1019 | },
1020 | "output_type": "display_data"
1021 | }
1022 | ],
1023 | "source": [
1024 | "#confusion Matrix\n",
1025 | "\n",
1026 | "df=pd.DataFrame(df,columns=['real','previsao' ])\n",
1027 | "\n",
1028 | "cf_matrix=pd.crosstab(df['real'], df['previsao'], rownames=['real'] , colnames=['previsao'])\n",
1029 | "\n",
1030 | "sns.heatmap(cf_matrix, annot=True, cmap='Blues')"
1031 | ]
1032 | },
1033 | {
1034 | "cell_type": "code",
1035 | "execution_count": null,
1036 | "metadata": {},
1037 | "outputs": [],
1038 | "source": []
1039 | }
1040 | ],
1041 | "metadata": {
1042 | "kernelspec": {
1043 | "display_name": "Python 3",
1044 | "language": "python",
1045 | "name": "python3"
1046 | },
1047 | "language_info": {
1048 | "codemirror_mode": {
1049 | "name": "ipython",
1050 | "version": 3
1051 | },
1052 | "file_extension": ".py",
1053 | "mimetype": "text/x-python",
1054 | "name": "python",
1055 | "nbconvert_exporter": "python",
1056 | "pygments_lexer": "ipython3",
1057 | "version": "3.7.6"
1058 | }
1059 | },
1060 | "nbformat": 4,
1061 | "nbformat_minor": 4
1062 | }
1063 |
--------------------------------------------------------------------------------
/Previsão preço ações.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#importando as bibliotecas\n",
10 | "import pandas as pd\n",
11 | "from sklearn.feature_selection import SelectKBest\n",
12 | "from sklearn.model_selection import GridSearchCV\n",
13 | "from sklearn.neural_network import MLPRegressor\n",
14 | "from sklearn.preprocessing import MinMaxScaler\n",
15 | "from sklearn import datasets, linear_model\n",
16 | "from sklearn.metrics import mean_squared_error, r2_score\n",
17 | "import matplotlib.pyplot as plt\n",
18 | "%matplotlib inline"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "data": {
28 | "text/html": [
29 | "\n",
30 | "\n",
43 | "
\n",
44 | " \n",
45 | " \n",
46 | " | \n",
47 | " data_pregao | \n",
48 | " sigla_acao | \n",
49 | " nome_acao | \n",
50 | " preco_abertura | \n",
51 | " preco_max | \n",
52 | " preco_minimo | \n",
53 | " preco_fechamento | \n",
54 | " qtd_negocios | \n",
55 | " volume_negocios | \n",
56 | "
\n",
57 | " \n",
58 | " \n",
59 | " \n",
60 | " | 0 | \n",
61 | " 2020-01-02 | \n",
62 | " AALR3 | \n",
63 | " ALLIAR | \n",
64 | " 18.29 | \n",
65 | " 19.00 | \n",
66 | " 18.28 | \n",
67 | " 19.00 | \n",
68 | " 585800.0 | \n",
69 | " 1.094620e+09 | \n",
70 | "
\n",
71 | " \n",
72 | " | 1 | \n",
73 | " 2020-01-02 | \n",
74 | " AAPL34 | \n",
75 | " APPLE | \n",
76 | " 120.00 | \n",
77 | " 121.34 | \n",
78 | " 120.00 | \n",
79 | " 121.34 | \n",
80 | " 12700.0 | \n",
81 | " 1.533380e+08 | \n",
82 | "
\n",
83 | " \n",
84 | " | 2 | \n",
85 | " 2020-01-02 | \n",
86 | " ABCB4 | \n",
87 | " ABC BRASIL | \n",
88 | " 20.00 | \n",
89 | " 20.30 | \n",
90 | " 19.82 | \n",
91 | " 20.30 | \n",
92 | " 870400.0 | \n",
93 | " 1.745788e+09 | \n",
94 | "
\n",
95 | " \n",
96 | " | 3 | \n",
97 | " 2020-01-02 | \n",
98 | " ABEV3 | \n",
99 | " AMBEV S/A | \n",
100 | " 18.86 | \n",
101 | " 19.25 | \n",
102 | " 18.78 | \n",
103 | " 19.20 | \n",
104 | " 16011300.0 | \n",
105 | " 3.062348e+10 | \n",
106 | "
\n",
107 | " \n",
108 | " | 4 | \n",
109 | " 2020-01-02 | \n",
110 | " ADHM3 | \n",
111 | " ADVANCED-DH | \n",
112 | " 2.55 | \n",
113 | " 2.55 | \n",
114 | " 2.35 | \n",
115 | " 2.48 | \n",
116 | " 193400.0 | \n",
117 | " 4.719270e+07 | \n",
118 | "
\n",
119 | " \n",
120 | " | ... | \n",
121 | " ... | \n",
122 | " ... | \n",
123 | " ... | \n",
124 | " ... | \n",
125 | " ... | \n",
126 | " ... | \n",
127 | " ... | \n",
128 | " ... | \n",
129 | " ... | \n",
130 | "
\n",
131 | " \n",
132 | " | 130451 | \n",
133 | " 2021-01-13 | \n",
134 | " Z1TO34 | \n",
135 | " ZTO EXPRESS | \n",
136 | " 39.74 | \n",
137 | " 39.74 | \n",
138 | " 39.74 | \n",
139 | " 39.74 | \n",
140 | " 79.0 | \n",
141 | " 3.139460e+05 | \n",
142 | "
\n",
143 | " \n",
144 | " | 130452 | \n",
145 | " 2021-01-14 | \n",
146 | " Z1TO34 | \n",
147 | " ZTO EXPRESS | \n",
148 | " 38.96 | \n",
149 | " 38.96 | \n",
150 | " 38.88 | \n",
151 | " 38.88 | \n",
152 | " 919.0 | \n",
153 | " 3.574432e+06 | \n",
154 | "
\n",
155 | " \n",
156 | " | 130453 | \n",
157 | " 2021-01-15 | \n",
158 | " Z1TO34 | \n",
159 | " ZTO EXPRESS | \n",
160 | " 39.03 | \n",
161 | " 39.03 | \n",
162 | " 39.03 | \n",
163 | " 39.03 | \n",
164 | " 930.0 | \n",
165 | " 3.629790e+06 | \n",
166 | "
\n",
167 | " \n",
168 | " | 130454 | \n",
169 | " 2021-01-11 | \n",
170 | " Z1TS34 | \n",
171 | " ZOETIS INC | \n",
172 | " 233.41 | \n",
173 | " 233.41 | \n",
174 | " 233.41 | \n",
175 | " 233.41 | \n",
176 | " 400.0 | \n",
177 | " 9.336400e+06 | \n",
178 | "
\n",
179 | " \n",
180 | " | 130455 | \n",
181 | " 2021-01-12 | \n",
182 | " Z1TS34 | \n",
183 | " ZOETIS INC | \n",
184 | " 234.00 | \n",
185 | " 234.00 | \n",
186 | " 231.19 | \n",
187 | " 231.19 | \n",
188 | " 34.0 | \n",
189 | " 7.867520e+05 | \n",
190 | "
\n",
191 | " \n",
192 | "
\n",
193 | "
130456 rows × 9 columns
\n",
194 | "
"
195 | ],
196 | "text/plain": [
197 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n",
198 | "0 2020-01-02 AALR3 ALLIAR 18.29 19.00 \n",
199 | "1 2020-01-02 AAPL34 APPLE 120.00 121.34 \n",
200 | "2 2020-01-02 ABCB4 ABC BRASIL 20.00 20.30 \n",
201 | "3 2020-01-02 ABEV3 AMBEV S/A 18.86 19.25 \n",
202 | "4 2020-01-02 ADHM3 ADVANCED-DH 2.55 2.55 \n",
203 | "... ... ... ... ... ... \n",
204 | "130451 2021-01-13 Z1TO34 ZTO EXPRESS 39.74 39.74 \n",
205 | "130452 2021-01-14 Z1TO34 ZTO EXPRESS 38.96 38.96 \n",
206 | "130453 2021-01-15 Z1TO34 ZTO EXPRESS 39.03 39.03 \n",
207 | "130454 2021-01-11 Z1TS34 ZOETIS INC 233.41 233.41 \n",
208 | "130455 2021-01-12 Z1TS34 ZOETIS INC 234.00 234.00 \n",
209 | "\n",
210 | " preco_minimo preco_fechamento qtd_negocios volume_negocios \n",
211 | "0 18.28 19.00 585800.0 1.094620e+09 \n",
212 | "1 120.00 121.34 12700.0 1.533380e+08 \n",
213 | "2 19.82 20.30 870400.0 1.745788e+09 \n",
214 | "3 18.78 19.20 16011300.0 3.062348e+10 \n",
215 | "4 2.35 2.48 193400.0 4.719270e+07 \n",
216 | "... ... ... ... ... \n",
217 | "130451 39.74 39.74 79.0 3.139460e+05 \n",
218 | "130452 38.88 38.88 919.0 3.574432e+06 \n",
219 | "130453 39.03 39.03 930.0 3.629790e+06 \n",
220 | "130454 233.41 233.41 400.0 9.336400e+06 \n",
221 | "130455 231.19 231.19 34.0 7.867520e+05 \n",
222 | "\n",
223 | "[130456 rows x 9 columns]"
224 | ]
225 | },
226 | "execution_count": 2,
227 | "metadata": {},
228 | "output_type": "execute_result"
229 | }
230 | ],
231 | "source": [
232 | "#lendo o arquivo de ações\n",
233 | "#Lendo csv\n",
234 | "\n",
235 | "df = pd.read_csv(\"D:\\\\acoes\\\\all_bovespa.csv\", delimiter=';')\n",
236 | "df"
237 | ]
238 | },
239 | {
240 | "cell_type": "code",
241 | "execution_count": 3,
242 | "metadata": {},
243 | "outputs": [],
244 | "source": [
245 | "#Itau\n",
246 | "df_itau = df[df['sigla_acao'] == 'ITUB4' ]"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": 4,
252 | "metadata": {},
253 | "outputs": [
254 | {
255 | "data": {
256 | "text/html": [
257 | "\n",
258 | "\n",
271 | "
\n",
272 | " \n",
273 | " \n",
274 | " | \n",
275 | " data_pregao | \n",
276 | " sigla_acao | \n",
277 | " nome_acao | \n",
278 | " preco_abertura | \n",
279 | " preco_max | \n",
280 | " preco_minimo | \n",
281 | " preco_fechamento | \n",
282 | " qtd_negocios | \n",
283 | " volume_negocios | \n",
284 | "
\n",
285 | " \n",
286 | " \n",
287 | " \n",
288 | " | 122738 | \n",
289 | " 2021-01-04 | \n",
290 | " ITUB4 | \n",
291 | " ITAUUNIBANCO | \n",
292 | " 31.98 | \n",
293 | " 31.99 | \n",
294 | " 30.84 | \n",
295 | " 30.90 | \n",
296 | " 31347800.0 | \n",
297 | " 9.744949e+10 | \n",
298 | "
\n",
299 | " \n",
300 | " | 123816 | \n",
301 | " 2021-01-05 | \n",
302 | " ITUB4 | \n",
303 | " ITAUUNIBANCO | \n",
304 | " 30.73 | \n",
305 | " 31.06 | \n",
306 | " 30.17 | \n",
307 | " 30.70 | \n",
308 | " 28249800.0 | \n",
309 | " 8.648705e+10 | \n",
310 | "
\n",
311 | " \n",
312 | " | 125331 | \n",
313 | " 2021-01-06 | \n",
314 | " ITUB4 | \n",
315 | " ITAUUNIBANCO | \n",
316 | " 30.98 | \n",
317 | " 32.06 | \n",
318 | " 30.79 | \n",
319 | " 31.55 | \n",
320 | " 43061900.0 | \n",
321 | " 1.365655e+11 | \n",
322 | "
\n",
323 | " \n",
324 | " | 125332 | \n",
325 | " 2021-01-07 | \n",
326 | " ITUB4 | \n",
327 | " ITAUUNIBANCO | \n",
328 | " 31.65 | \n",
329 | " 33.06 | \n",
330 | " 31.50 | \n",
331 | " 32.83 | \n",
332 | " 46129800.0 | \n",
333 | " 1.502078e+11 | \n",
334 | "
\n",
335 | " \n",
336 | " | 125333 | \n",
337 | " 2021-01-08 | \n",
338 | " ITUB4 | \n",
339 | " ITAUUNIBANCO | \n",
340 | " 32.93 | \n",
341 | " 33.44 | \n",
342 | " 32.43 | \n",
343 | " 32.82 | \n",
344 | " 52532500.0 | \n",
345 | " 1.721607e+11 | \n",
346 | "
\n",
347 | " \n",
348 | " | 128673 | \n",
349 | " 2021-01-11 | \n",
350 | " ITUB4 | \n",
351 | " ITAUUNIBANCO | \n",
352 | " 32.47 | \n",
353 | " 32.70 | \n",
354 | " 31.72 | \n",
355 | " 32.08 | \n",
356 | " 31564300.0 | \n",
357 | " 1.016568e+11 | \n",
358 | "
\n",
359 | " \n",
360 | " | 128674 | \n",
361 | " 2021-01-12 | \n",
362 | " ITUB4 | \n",
363 | " ITAUUNIBANCO | \n",
364 | " 32.18 | \n",
365 | " 32.45 | \n",
366 | " 32.02 | \n",
367 | " 32.18 | \n",
368 | " 33089300.0 | \n",
369 | " 1.066563e+11 | \n",
370 | "
\n",
371 | " \n",
372 | " | 128675 | \n",
373 | " 2021-01-13 | \n",
374 | " ITUB4 | \n",
375 | " ITAUUNIBANCO | \n",
376 | " 32.18 | \n",
377 | " 32.53 | \n",
378 | " 31.26 | \n",
379 | " 31.65 | \n",
380 | " 42722600.0 | \n",
381 | " 1.354698e+11 | \n",
382 | "
\n",
383 | " \n",
384 | " | 128676 | \n",
385 | " 2021-01-14 | \n",
386 | " ITUB4 | \n",
387 | " ITAUUNIBANCO | \n",
388 | " 32.02 | \n",
389 | " 32.94 | \n",
390 | " 31.67 | \n",
391 | " 32.59 | \n",
392 | " 28222500.0 | \n",
393 | " 9.166648e+10 | \n",
394 | "
\n",
395 | " \n",
396 | " | 128677 | \n",
397 | " 2021-01-15 | \n",
398 | " ITUB4 | \n",
399 | " ITAUUNIBANCO | \n",
400 | " 32.05 | \n",
401 | " 32.39 | \n",
402 | " 31.35 | \n",
403 | " 31.36 | \n",
404 | " 37757100.0 | \n",
405 | " 1.193353e+11 | \n",
406 | "
\n",
407 | " \n",
408 | "
\n",
409 | "
"
410 | ],
411 | "text/plain": [
412 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n",
413 | "122738 2021-01-04 ITUB4 ITAUUNIBANCO 31.98 31.99 \n",
414 | "123816 2021-01-05 ITUB4 ITAUUNIBANCO 30.73 31.06 \n",
415 | "125331 2021-01-06 ITUB4 ITAUUNIBANCO 30.98 32.06 \n",
416 | "125332 2021-01-07 ITUB4 ITAUUNIBANCO 31.65 33.06 \n",
417 | "125333 2021-01-08 ITUB4 ITAUUNIBANCO 32.93 33.44 \n",
418 | "128673 2021-01-11 ITUB4 ITAUUNIBANCO 32.47 32.70 \n",
419 | "128674 2021-01-12 ITUB4 ITAUUNIBANCO 32.18 32.45 \n",
420 | "128675 2021-01-13 ITUB4 ITAUUNIBANCO 32.18 32.53 \n",
421 | "128676 2021-01-14 ITUB4 ITAUUNIBANCO 32.02 32.94 \n",
422 | "128677 2021-01-15 ITUB4 ITAUUNIBANCO 32.05 32.39 \n",
423 | "\n",
424 | " preco_minimo preco_fechamento qtd_negocios volume_negocios \n",
425 | "122738 30.84 30.90 31347800.0 9.744949e+10 \n",
426 | "123816 30.17 30.70 28249800.0 8.648705e+10 \n",
427 | "125331 30.79 31.55 43061900.0 1.365655e+11 \n",
428 | "125332 31.50 32.83 46129800.0 1.502078e+11 \n",
429 | "125333 32.43 32.82 52532500.0 1.721607e+11 \n",
430 | "128673 31.72 32.08 31564300.0 1.016568e+11 \n",
431 | "128674 32.02 32.18 33089300.0 1.066563e+11 \n",
432 | "128675 31.26 31.65 42722600.0 1.354698e+11 \n",
433 | "128676 31.67 32.59 28222500.0 9.166648e+10 \n",
434 | "128677 31.35 31.36 37757100.0 1.193353e+11 "
435 | ]
436 | },
437 | "execution_count": 4,
438 | "metadata": {},
439 | "output_type": "execute_result"
440 | }
441 | ],
442 | "source": [
443 | "df_itau.tail(10)"
444 | ]
445 | },
446 | {
447 | "cell_type": "code",
448 | "execution_count": 5,
449 | "metadata": {},
450 | "outputs": [
451 | {
452 | "data": {
453 | "text/plain": [
454 | "data_pregao object\n",
455 | "sigla_acao object\n",
456 | "nome_acao object\n",
457 | "preco_abertura float64\n",
458 | "preco_max float64\n",
459 | "preco_minimo float64\n",
460 | "preco_fechamento float64\n",
461 | "qtd_negocios float64\n",
462 | "volume_negocios float64\n",
463 | "dtype: object"
464 | ]
465 | },
466 | "execution_count": 5,
467 | "metadata": {},
468 | "output_type": "execute_result"
469 | }
470 | ],
471 | "source": [
472 | "#verificar o tipo do arquivo\n",
473 | "df_itau.dtypes"
474 | ]
475 | },
476 | {
477 | "cell_type": "code",
478 | "execution_count": 6,
479 | "metadata": {},
480 | "outputs": [
481 | {
482 | "name": "stderr",
483 | "output_type": "stream",
484 | "text": [
485 | "C:\\Users\\fabri\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
486 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
487 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
488 | "\n",
489 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
490 | " \n"
491 | ]
492 | }
493 | ],
494 | "source": [
495 | "#Mudar o tipo data\n",
496 | "df_itau['data_pregao'] = pd.to_datetime(df_itau['data_pregao'], format='%Y-%m-%d')\n"
497 | ]
498 | },
499 | {
500 | "cell_type": "code",
501 | "execution_count": 7,
502 | "metadata": {},
503 | "outputs": [
504 | {
505 | "data": {
506 | "text/plain": [
507 | "data_pregao datetime64[ns]\n",
508 | "sigla_acao object\n",
509 | "nome_acao object\n",
510 | "preco_abertura float64\n",
511 | "preco_max float64\n",
512 | "preco_minimo float64\n",
513 | "preco_fechamento float64\n",
514 | "qtd_negocios float64\n",
515 | "volume_negocios float64\n",
516 | "dtype: object"
517 | ]
518 | },
519 | "execution_count": 7,
520 | "metadata": {},
521 | "output_type": "execute_result"
522 | }
523 | ],
524 | "source": [
525 | "df_itau.dtypes"
526 | ]
527 | },
528 | {
529 | "cell_type": "code",
530 | "execution_count": 8,
531 | "metadata": {},
532 | "outputs": [
533 | {
534 | "data": {
535 | "text/html": [
536 | "\n",
537 | "\n",
550 | "
\n",
551 | " \n",
552 | " \n",
553 | " | \n",
554 | " data_pregao | \n",
555 | " sigla_acao | \n",
556 | " nome_acao | \n",
557 | " preco_abertura | \n",
558 | " preco_max | \n",
559 | " preco_minimo | \n",
560 | " preco_fechamento | \n",
561 | " qtd_negocios | \n",
562 | " volume_negocios | \n",
563 | "
\n",
564 | " \n",
565 | " \n",
566 | " \n",
567 | " | 128673 | \n",
568 | " 2021-01-11 | \n",
569 | " ITUB4 | \n",
570 | " ITAUUNIBANCO | \n",
571 | " 32.47 | \n",
572 | " 32.70 | \n",
573 | " 31.72 | \n",
574 | " 32.08 | \n",
575 | " 31564300.0 | \n",
576 | " 1.016568e+11 | \n",
577 | "
\n",
578 | " \n",
579 | " | 128674 | \n",
580 | " 2021-01-12 | \n",
581 | " ITUB4 | \n",
582 | " ITAUUNIBANCO | \n",
583 | " 32.18 | \n",
584 | " 32.45 | \n",
585 | " 32.02 | \n",
586 | " 32.18 | \n",
587 | " 33089300.0 | \n",
588 | " 1.066563e+11 | \n",
589 | "
\n",
590 | " \n",
591 | " | 128675 | \n",
592 | " 2021-01-13 | \n",
593 | " ITUB4 | \n",
594 | " ITAUUNIBANCO | \n",
595 | " 32.18 | \n",
596 | " 32.53 | \n",
597 | " 31.26 | \n",
598 | " 31.65 | \n",
599 | " 42722600.0 | \n",
600 | " 1.354698e+11 | \n",
601 | "
\n",
602 | " \n",
603 | " | 128676 | \n",
604 | " 2021-01-14 | \n",
605 | " ITUB4 | \n",
606 | " ITAUUNIBANCO | \n",
607 | " 32.02 | \n",
608 | " 32.94 | \n",
609 | " 31.67 | \n",
610 | " 32.59 | \n",
611 | " 28222500.0 | \n",
612 | " 9.166648e+10 | \n",
613 | "
\n",
614 | " \n",
615 | " | 128677 | \n",
616 | " 2021-01-15 | \n",
617 | " ITUB4 | \n",
618 | " ITAUUNIBANCO | \n",
619 | " 32.05 | \n",
620 | " 32.39 | \n",
621 | " 31.35 | \n",
622 | " 31.36 | \n",
623 | " 37757100.0 | \n",
624 | " 1.193353e+11 | \n",
625 | "
\n",
626 | " \n",
627 | "
\n",
628 | "
"
629 | ],
630 | "text/plain": [
631 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n",
632 | "128673 2021-01-11 ITUB4 ITAUUNIBANCO 32.47 32.70 \n",
633 | "128674 2021-01-12 ITUB4 ITAUUNIBANCO 32.18 32.45 \n",
634 | "128675 2021-01-13 ITUB4 ITAUUNIBANCO 32.18 32.53 \n",
635 | "128676 2021-01-14 ITUB4 ITAUUNIBANCO 32.02 32.94 \n",
636 | "128677 2021-01-15 ITUB4 ITAUUNIBANCO 32.05 32.39 \n",
637 | "\n",
638 | " preco_minimo preco_fechamento qtd_negocios volume_negocios \n",
639 | "128673 31.72 32.08 31564300.0 1.016568e+11 \n",
640 | "128674 32.02 32.18 33089300.0 1.066563e+11 \n",
641 | "128675 31.26 31.65 42722600.0 1.354698e+11 \n",
642 | "128676 31.67 32.59 28222500.0 9.166648e+10 \n",
643 | "128677 31.35 31.36 37757100.0 1.193353e+11 "
644 | ]
645 | },
646 | "execution_count": 8,
647 | "metadata": {},
648 | "output_type": "execute_result"
649 | }
650 | ],
651 | "source": [
652 | "df_itau.tail()"
653 | ]
654 | },
655 | {
656 | "cell_type": "code",
657 | "execution_count": 9,
658 | "metadata": {},
659 | "outputs": [
660 | {
661 | "name": "stderr",
662 | "output_type": "stream",
663 | "text": [
664 | "C:\\Users\\fabri\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
665 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
666 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
667 | "\n",
668 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
669 | " \n",
670 | "C:\\Users\\fabri\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: SettingWithCopyWarning: \n",
671 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
672 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
673 | "\n",
674 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
675 | " This is separate from the ipykernel package so we can avoid doing imports until\n"
676 | ]
677 | }
678 | ],
679 | "source": [
680 | "#criando novos campos de medias móveis\n",
681 | "df_itau['mm5d'] = df_itau['preco_fechamento'].rolling(5).mean()\n",
682 | "df_itau['mm21d'] = df_itau['preco_fechamento'].rolling(21).mean()"
683 | ]
684 | },
685 | {
686 | "cell_type": "code",
687 | "execution_count": 10,
688 | "metadata": {},
689 | "outputs": [
690 | {
691 | "data": {
692 | "text/html": [
693 | "\n",
694 | "\n",
707 | "
\n",
708 | " \n",
709 | " \n",
710 | " | \n",
711 | " data_pregao | \n",
712 | " sigla_acao | \n",
713 | " nome_acao | \n",
714 | " preco_abertura | \n",
715 | " preco_max | \n",
716 | " preco_minimo | \n",
717 | " preco_fechamento | \n",
718 | " qtd_negocios | \n",
719 | " volume_negocios | \n",
720 | " mm5d | \n",
721 | " mm21d | \n",
722 | "
\n",
723 | " \n",
724 | " \n",
725 | " \n",
726 | " | 202 | \n",
727 | " 2020-01-02 | \n",
728 | " ITUB4 | \n",
729 | " ITAUUNIBANCO | \n",
730 | " 37.28 | \n",
731 | " 38.03 | \n",
732 | " 36.99 | \n",
733 | " 38.03 | \n",
734 | " 20666100.0 | \n",
735 | " 7.812151e+10 | \n",
736 | " NaN | \n",
737 | " NaN | \n",
738 | "
\n",
739 | " \n",
740 | " | 844 | \n",
741 | " 2020-01-03 | \n",
742 | " ITUB4 | \n",
743 | " ITAUUNIBANCO | \n",
744 | " 37.50 | \n",
745 | " 38.24 | \n",
746 | " 37.45 | \n",
747 | " 37.63 | \n",
748 | " 24891400.0 | \n",
749 | " 9.400384e+10 | \n",
750 | " NaN | \n",
751 | " NaN | \n",
752 | "
\n",
753 | " \n",
754 | " | 845 | \n",
755 | " 2020-01-06 | \n",
756 | " ITUB4 | \n",
757 | " ITAUUNIBANCO | \n",
758 | " 37.55 | \n",
759 | " 37.58 | \n",
760 | " 36.91 | \n",
761 | " 37.07 | \n",
762 | " 22294700.0 | \n",
763 | " 8.294725e+10 | \n",
764 | " NaN | \n",
765 | " NaN | \n",
766 | "
\n",
767 | " \n",
768 | " | 2273 | \n",
769 | " 2020-01-07 | \n",
770 | " ITUB4 | \n",
771 | " ITAUUNIBANCO | \n",
772 | " 37.07 | \n",
773 | " 37.24 | \n",
774 | " 36.21 | \n",
775 | " 36.21 | \n",
776 | " 20000900.0 | \n",
777 | " 7.294927e+10 | \n",
778 | " NaN | \n",
779 | " NaN | \n",
780 | "
\n",
781 | " \n",
782 | " | 2274 | \n",
783 | " 2020-01-08 | \n",
784 | " ITUB4 | \n",
785 | " ITAUUNIBANCO | \n",
786 | " 36.45 | \n",
787 | " 36.81 | \n",
788 | " 35.62 | \n",
789 | " 35.62 | \n",
790 | " 25980900.0 | \n",
791 | " 9.345243e+10 | \n",
792 | " 36.912 | \n",
793 | " NaN | \n",
794 | "
\n",
795 | " \n",
796 | " | 2275 | \n",
797 | " 2020-01-09 | \n",
798 | " ITUB4 | \n",
799 | " ITAUUNIBANCO | \n",
800 | " 35.79 | \n",
801 | " 35.93 | \n",
802 | " 34.52 | \n",
803 | " 34.91 | \n",
804 | " 48423800.0 | \n",
805 | " 1.694589e+11 | \n",
806 | " 36.288 | \n",
807 | " NaN | \n",
808 | "
\n",
809 | " \n",
810 | " | 2276 | \n",
811 | " 2020-01-10 | \n",
812 | " ITUB4 | \n",
813 | " ITAUUNIBANCO | \n",
814 | " 35.08 | \n",
815 | " 35.45 | \n",
816 | " 34.57 | \n",
817 | " 34.60 | \n",
818 | " 25521600.0 | \n",
819 | " 8.900229e+10 | \n",
820 | " 35.682 | \n",
821 | " NaN | \n",
822 | "
\n",
823 | " \n",
824 | " | 4821 | \n",
825 | " 2020-01-13 | \n",
826 | " ITUB4 | \n",
827 | " ITAUUNIBANCO | \n",
828 | " 35.02 | \n",
829 | " 35.40 | \n",
830 | " 34.74 | \n",
831 | " 35.06 | \n",
832 | " 29200700.0 | \n",
833 | " 1.025651e+11 | \n",
834 | " 35.280 | \n",
835 | " NaN | \n",
836 | "
\n",
837 | " \n",
838 | " | 4822 | \n",
839 | " 2020-01-14 | \n",
840 | " ITUB4 | \n",
841 | " ITAUUNIBANCO | \n",
842 | " 34.92 | \n",
843 | " 35.06 | \n",
844 | " 34.63 | \n",
845 | " 35.06 | \n",
846 | " 18433300.0 | \n",
847 | " 6.424604e+10 | \n",
848 | " 35.050 | \n",
849 | " NaN | \n",
850 | "
\n",
851 | " \n",
852 | " | 4823 | \n",
853 | " 2020-01-15 | \n",
854 | " ITUB4 | \n",
855 | " ITAUUNIBANCO | \n",
856 | " 34.89 | \n",
857 | " 34.94 | \n",
858 | " 34.40 | \n",
859 | " 34.63 | \n",
860 | " 32757500.0 | \n",
861 | " 1.134815e+11 | \n",
862 | " 34.852 | \n",
863 | " NaN | \n",
864 | "
\n",
865 | " \n",
866 | " | 4824 | \n",
867 | " 2020-01-16 | \n",
868 | " ITUB4 | \n",
869 | " ITAUUNIBANCO | \n",
870 | " 34.90 | \n",
871 | " 35.07 | \n",
872 | " 34.42 | \n",
873 | " 34.70 | \n",
874 | " 22857500.0 | \n",
875 | " 7.940644e+10 | \n",
876 | " 34.810 | \n",
877 | " NaN | \n",
878 | "
\n",
879 | " \n",
880 | " | 4825 | \n",
881 | " 2020-01-17 | \n",
882 | " ITUB4 | \n",
883 | " ITAUUNIBANCO | \n",
884 | " 34.90 | \n",
885 | " 35.30 | \n",
886 | " 34.73 | \n",
887 | " 34.94 | \n",
888 | " 19722900.0 | \n",
889 | " 6.900594e+10 | \n",
890 | " 34.878 | \n",
891 | " NaN | \n",
892 | "
\n",
893 | " \n",
894 | " | 4826 | \n",
895 | " 2020-01-20 | \n",
896 | " ITUB4 | \n",
897 | " ITAUUNIBANCO | \n",
898 | " 34.80 | \n",
899 | " 35.00 | \n",
900 | " 34.16 | \n",
901 | " 34.23 | \n",
902 | " 18322200.0 | \n",
903 | " 6.311801e+10 | \n",
904 | " 34.712 | \n",
905 | " NaN | \n",
906 | "
\n",
907 | " \n",
908 | " | 4827 | \n",
909 | " 2020-01-21 | \n",
910 | " ITUB4 | \n",
911 | " ITAUUNIBANCO | \n",
912 | " 34.01 | \n",
913 | " 34.48 | \n",
914 | " 33.44 | \n",
915 | " 33.50 | \n",
916 | " 26878100.0 | \n",
917 | " 9.077382e+10 | \n",
918 | " 34.400 | \n",
919 | " NaN | \n",
920 | "
\n",
921 | " \n",
922 | " | 4828 | \n",
923 | " 2020-01-22 | \n",
924 | " ITUB4 | \n",
925 | " ITAUUNIBANCO | \n",
926 | " 33.85 | \n",
927 | " 34.07 | \n",
928 | " 33.50 | \n",
929 | " 33.70 | \n",
930 | " 17443500.0 | \n",
931 | " 5.874532e+10 | \n",
932 | " 34.214 | \n",
933 | " NaN | \n",
934 | "
\n",
935 | " \n",
936 | " | 4829 | \n",
937 | " 2020-01-23 | \n",
938 | " ITUB4 | \n",
939 | " ITAUUNIBANCO | \n",
940 | " 33.50 | \n",
941 | " 34.81 | \n",
942 | " 33.40 | \n",
943 | " 34.50 | \n",
944 | " 33077600.0 | \n",
945 | " 1.135770e+11 | \n",
946 | " 34.174 | \n",
947 | " NaN | \n",
948 | "
\n",
949 | " \n",
950 | " | 8317 | \n",
951 | " 2020-01-24 | \n",
952 | " ITUB4 | \n",
953 | " ITAUUNIBANCO | \n",
954 | " 34.61 | \n",
955 | " 34.62 | \n",
956 | " 33.95 | \n",
957 | " 34.24 | \n",
958 | " 14756100.0 | \n",
959 | " 5.052616e+10 | \n",
960 | " 34.034 | \n",
961 | " NaN | \n",
962 | "
\n",
963 | " \n",
964 | " | 8318 | \n",
965 | " 2020-01-27 | \n",
966 | " ITUB4 | \n",
967 | " ITAUUNIBANCO | \n",
968 | " 33.74 | \n",
969 | " 34.09 | \n",
970 | " 33.41 | \n",
971 | " 33.50 | \n",
972 | " 14387100.0 | \n",
973 | " 4.849840e+10 | \n",
974 | " 33.888 | \n",
975 | " NaN | \n",
976 | "
\n",
977 | " \n",
978 | " | 8319 | \n",
979 | " 2020-01-28 | \n",
980 | " ITUB4 | \n",
981 | " ITAUUNIBANCO | \n",
982 | " 33.77 | \n",
983 | " 33.97 | \n",
984 | " 33.30 | \n",
985 | " 33.49 | \n",
986 | " 17185400.0 | \n",
987 | " 5.758163e+10 | \n",
988 | " 33.886 | \n",
989 | " NaN | \n",
990 | "
\n",
991 | " \n",
992 | " | 8320 | \n",
993 | " 2020-01-29 | \n",
994 | " ITUB4 | \n",
995 | " ITAUUNIBANCO | \n",
996 | " 33.69 | \n",
997 | " 33.79 | \n",
998 | " 32.92 | \n",
999 | " 32.97 | \n",
1000 | " 20071400.0 | \n",
1001 | " 6.680223e+10 | \n",
1002 | " 33.740 | \n",
1003 | " NaN | \n",
1004 | "
\n",
1005 | " \n",
1006 | " | 8321 | \n",
1007 | " 2020-01-30 | \n",
1008 | " ITUB4 | \n",
1009 | " ITAUUNIBANCO | \n",
1010 | " 32.65 | \n",
1011 | " 33.45 | \n",
1012 | " 32.58 | \n",
1013 | " 33.45 | \n",
1014 | " 41789800.0 | \n",
1015 | " 1.374939e+11 | \n",
1016 | " 33.530 | \n",
1017 | " 34.859048 | \n",
1018 | "
\n",
1019 | " \n",
1020 | " | 8322 | \n",
1021 | " 2020-01-31 | \n",
1022 | " ITUB4 | \n",
1023 | " ITAUUNIBANCO | \n",
1024 | " 32.99 | \n",
1025 | " 33.39 | \n",
1026 | " 32.54 | \n",
1027 | " 32.82 | \n",
1028 | " 25459800.0 | \n",
1029 | " 8.358342e+10 | \n",
1030 | " 33.246 | \n",
1031 | " 34.610952 | \n",
1032 | "
\n",
1033 | " \n",
1034 | "
\n",
1035 | "
"
1036 | ],
1037 | "text/plain": [
1038 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n",
1039 | "202 2020-01-02 ITUB4 ITAUUNIBANCO 37.28 38.03 \n",
1040 | "844 2020-01-03 ITUB4 ITAUUNIBANCO 37.50 38.24 \n",
1041 | "845 2020-01-06 ITUB4 ITAUUNIBANCO 37.55 37.58 \n",
1042 | "2273 2020-01-07 ITUB4 ITAUUNIBANCO 37.07 37.24 \n",
1043 | "2274 2020-01-08 ITUB4 ITAUUNIBANCO 36.45 36.81 \n",
1044 | "2275 2020-01-09 ITUB4 ITAUUNIBANCO 35.79 35.93 \n",
1045 | "2276 2020-01-10 ITUB4 ITAUUNIBANCO 35.08 35.45 \n",
1046 | "4821 2020-01-13 ITUB4 ITAUUNIBANCO 35.02 35.40 \n",
1047 | "4822 2020-01-14 ITUB4 ITAUUNIBANCO 34.92 35.06 \n",
1048 | "4823 2020-01-15 ITUB4 ITAUUNIBANCO 34.89 34.94 \n",
1049 | "4824 2020-01-16 ITUB4 ITAUUNIBANCO 34.90 35.07 \n",
1050 | "4825 2020-01-17 ITUB4 ITAUUNIBANCO 34.90 35.30 \n",
1051 | "4826 2020-01-20 ITUB4 ITAUUNIBANCO 34.80 35.00 \n",
1052 | "4827 2020-01-21 ITUB4 ITAUUNIBANCO 34.01 34.48 \n",
1053 | "4828 2020-01-22 ITUB4 ITAUUNIBANCO 33.85 34.07 \n",
1054 | "4829 2020-01-23 ITUB4 ITAUUNIBANCO 33.50 34.81 \n",
1055 | "8317 2020-01-24 ITUB4 ITAUUNIBANCO 34.61 34.62 \n",
1056 | "8318 2020-01-27 ITUB4 ITAUUNIBANCO 33.74 34.09 \n",
1057 | "8319 2020-01-28 ITUB4 ITAUUNIBANCO 33.77 33.97 \n",
1058 | "8320 2020-01-29 ITUB4 ITAUUNIBANCO 33.69 33.79 \n",
1059 | "8321 2020-01-30 ITUB4 ITAUUNIBANCO 32.65 33.45 \n",
1060 | "8322 2020-01-31 ITUB4 ITAUUNIBANCO 32.99 33.39 \n",
1061 | "\n",
1062 | " preco_minimo preco_fechamento qtd_negocios volume_negocios mm5d \\\n",
1063 | "202 36.99 38.03 20666100.0 7.812151e+10 NaN \n",
1064 | "844 37.45 37.63 24891400.0 9.400384e+10 NaN \n",
1065 | "845 36.91 37.07 22294700.0 8.294725e+10 NaN \n",
1066 | "2273 36.21 36.21 20000900.0 7.294927e+10 NaN \n",
1067 | "2274 35.62 35.62 25980900.0 9.345243e+10 36.912 \n",
1068 | "2275 34.52 34.91 48423800.0 1.694589e+11 36.288 \n",
1069 | "2276 34.57 34.60 25521600.0 8.900229e+10 35.682 \n",
1070 | "4821 34.74 35.06 29200700.0 1.025651e+11 35.280 \n",
1071 | "4822 34.63 35.06 18433300.0 6.424604e+10 35.050 \n",
1072 | "4823 34.40 34.63 32757500.0 1.134815e+11 34.852 \n",
1073 | "4824 34.42 34.70 22857500.0 7.940644e+10 34.810 \n",
1074 | "4825 34.73 34.94 19722900.0 6.900594e+10 34.878 \n",
1075 | "4826 34.16 34.23 18322200.0 6.311801e+10 34.712 \n",
1076 | "4827 33.44 33.50 26878100.0 9.077382e+10 34.400 \n",
1077 | "4828 33.50 33.70 17443500.0 5.874532e+10 34.214 \n",
1078 | "4829 33.40 34.50 33077600.0 1.135770e+11 34.174 \n",
1079 | "8317 33.95 34.24 14756100.0 5.052616e+10 34.034 \n",
1080 | "8318 33.41 33.50 14387100.0 4.849840e+10 33.888 \n",
1081 | "8319 33.30 33.49 17185400.0 5.758163e+10 33.886 \n",
1082 | "8320 32.92 32.97 20071400.0 6.680223e+10 33.740 \n",
1083 | "8321 32.58 33.45 41789800.0 1.374939e+11 33.530 \n",
1084 | "8322 32.54 32.82 25459800.0 8.358342e+10 33.246 \n",
1085 | "\n",
1086 | " mm21d \n",
1087 | "202 NaN \n",
1088 | "844 NaN \n",
1089 | "845 NaN \n",
1090 | "2273 NaN \n",
1091 | "2274 NaN \n",
1092 | "2275 NaN \n",
1093 | "2276 NaN \n",
1094 | "4821 NaN \n",
1095 | "4822 NaN \n",
1096 | "4823 NaN \n",
1097 | "4824 NaN \n",
1098 | "4825 NaN \n",
1099 | "4826 NaN \n",
1100 | "4827 NaN \n",
1101 | "4828 NaN \n",
1102 | "4829 NaN \n",
1103 | "8317 NaN \n",
1104 | "8318 NaN \n",
1105 | "8319 NaN \n",
1106 | "8320 NaN \n",
1107 | "8321 34.859048 \n",
1108 | "8322 34.610952 "
1109 | ]
1110 | },
1111 | "execution_count": 10,
1112 | "metadata": {},
1113 | "output_type": "execute_result"
1114 | }
1115 | ],
1116 | "source": [
1117 | "df_itau.head(22)"
1118 | ]
1119 | },
1120 | {
1121 | "cell_type": "code",
1122 | "execution_count": 11,
1123 | "metadata": {},
1124 | "outputs": [
1125 | {
1126 | "name": "stderr",
1127 | "output_type": "stream",
1128 | "text": [
1129 | "C:\\Users\\fabri\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
1130 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
1131 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
1132 | "\n",
1133 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
1134 | " \n"
1135 | ]
1136 | },
1137 | {
1138 | "data": {
1139 | "text/html": [
1140 | "\n",
1141 | "\n",
1154 | "
\n",
1155 | " \n",
1156 | " \n",
1157 | " | \n",
1158 | " data_pregao | \n",
1159 | " sigla_acao | \n",
1160 | " nome_acao | \n",
1161 | " preco_abertura | \n",
1162 | " preco_max | \n",
1163 | " preco_minimo | \n",
1164 | " preco_fechamento | \n",
1165 | " qtd_negocios | \n",
1166 | " volume_negocios | \n",
1167 | " mm5d | \n",
1168 | " mm21d | \n",
1169 | "
\n",
1170 | " \n",
1171 | " \n",
1172 | " \n",
1173 | " | 202 | \n",
1174 | " 2020-01-02 | \n",
1175 | " ITUB4 | \n",
1176 | " ITAUUNIBANCO | \n",
1177 | " 37.28 | \n",
1178 | " 38.03 | \n",
1179 | " 36.99 | \n",
1180 | " 37.63 | \n",
1181 | " 20666100.0 | \n",
1182 | " 7.812151e+10 | \n",
1183 | " NaN | \n",
1184 | " NaN | \n",
1185 | "
\n",
1186 | " \n",
1187 | " | 844 | \n",
1188 | " 2020-01-03 | \n",
1189 | " ITUB4 | \n",
1190 | " ITAUUNIBANCO | \n",
1191 | " 37.50 | \n",
1192 | " 38.24 | \n",
1193 | " 37.45 | \n",
1194 | " 37.07 | \n",
1195 | " 24891400.0 | \n",
1196 | " 9.400384e+10 | \n",
1197 | " NaN | \n",
1198 | " NaN | \n",
1199 | "
\n",
1200 | " \n",
1201 | " | 845 | \n",
1202 | " 2020-01-06 | \n",
1203 | " ITUB4 | \n",
1204 | " ITAUUNIBANCO | \n",
1205 | " 37.55 | \n",
1206 | " 37.58 | \n",
1207 | " 36.91 | \n",
1208 | " 36.21 | \n",
1209 | " 22294700.0 | \n",
1210 | " 8.294725e+10 | \n",
1211 | " NaN | \n",
1212 | " NaN | \n",
1213 | "
\n",
1214 | " \n",
1215 | " | 2273 | \n",
1216 | " 2020-01-07 | \n",
1217 | " ITUB4 | \n",
1218 | " ITAUUNIBANCO | \n",
1219 | " 37.07 | \n",
1220 | " 37.24 | \n",
1221 | " 36.21 | \n",
1222 | " 35.62 | \n",
1223 | " 20000900.0 | \n",
1224 | " 7.294927e+10 | \n",
1225 | " NaN | \n",
1226 | " NaN | \n",
1227 | "
\n",
1228 | " \n",
1229 | " | 2274 | \n",
1230 | " 2020-01-08 | \n",
1231 | " ITUB4 | \n",
1232 | " ITAUUNIBANCO | \n",
1233 | " 36.45 | \n",
1234 | " 36.81 | \n",
1235 | " 35.62 | \n",
1236 | " 34.91 | \n",
1237 | " 25980900.0 | \n",
1238 | " 9.345243e+10 | \n",
1239 | " 36.912 | \n",
1240 | " NaN | \n",
1241 | "
\n",
1242 | " \n",
1243 | "
\n",
1244 | "
"
1245 | ],
1246 | "text/plain": [
1247 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n",
1248 | "202 2020-01-02 ITUB4 ITAUUNIBANCO 37.28 38.03 \n",
1249 | "844 2020-01-03 ITUB4 ITAUUNIBANCO 37.50 38.24 \n",
1250 | "845 2020-01-06 ITUB4 ITAUUNIBANCO 37.55 37.58 \n",
1251 | "2273 2020-01-07 ITUB4 ITAUUNIBANCO 37.07 37.24 \n",
1252 | "2274 2020-01-08 ITUB4 ITAUUNIBANCO 36.45 36.81 \n",
1253 | "\n",
1254 | " preco_minimo preco_fechamento qtd_negocios volume_negocios mm5d \\\n",
1255 | "202 36.99 37.63 20666100.0 7.812151e+10 NaN \n",
1256 | "844 37.45 37.07 24891400.0 9.400384e+10 NaN \n",
1257 | "845 36.91 36.21 22294700.0 8.294725e+10 NaN \n",
1258 | "2273 36.21 35.62 20000900.0 7.294927e+10 NaN \n",
1259 | "2274 35.62 34.91 25980900.0 9.345243e+10 36.912 \n",
1260 | "\n",
1261 | " mm21d \n",
1262 | "202 NaN \n",
1263 | "844 NaN \n",
1264 | "845 NaN \n",
1265 | "2273 NaN \n",
1266 | "2274 NaN "
1267 | ]
1268 | },
1269 | "execution_count": 11,
1270 | "metadata": {},
1271 | "output_type": "execute_result"
1272 | }
1273 | ],
1274 | "source": [
1275 | "#Empurrando para frente os valores das ações\n",
1276 | "df_itau['preco_fechamento'] = df_itau['preco_fechamento'].shift(-1)\n",
1277 | "\n",
1278 | "df_itau.head()"
1279 | ]
1280 | },
1281 | {
1282 | "cell_type": "code",
1283 | "execution_count": 12,
1284 | "metadata": {},
1285 | "outputs": [
1286 | {
1287 | "name": "stderr",
1288 | "output_type": "stream",
1289 | "text": [
1290 | "C:\\Users\\fabri\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
1291 | "A value is trying to be set on a copy of a slice from a DataFrame\n",
1292 | "\n",
1293 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
1294 | " \n"
1295 | ]
1296 | },
1297 | {
1298 | "data": {
1299 | "text/html": [
1300 | "\n",
1301 | "\n",
1314 | "
\n",
1315 | " \n",
1316 | " \n",
1317 | " | \n",
1318 | " data_pregao | \n",
1319 | " sigla_acao | \n",
1320 | " nome_acao | \n",
1321 | " preco_abertura | \n",
1322 | " preco_max | \n",
1323 | " preco_minimo | \n",
1324 | " preco_fechamento | \n",
1325 | " qtd_negocios | \n",
1326 | " volume_negocios | \n",
1327 | " mm5d | \n",
1328 | " mm21d | \n",
1329 | "
\n",
1330 | " \n",
1331 | " \n",
1332 | " \n",
1333 | " | 8321 | \n",
1334 | " 2020-01-30 | \n",
1335 | " ITUB4 | \n",
1336 | " ITAUUNIBANCO | \n",
1337 | " 32.65 | \n",
1338 | " 33.45 | \n",
1339 | " 32.58 | \n",
1340 | " 32.82 | \n",
1341 | " 41789800.0 | \n",
1342 | " 1.374939e+11 | \n",
1343 | " 33.530 | \n",
1344 | " 34.859048 | \n",
1345 | "
\n",
1346 | " \n",
1347 | " | 8322 | \n",
1348 | " 2020-01-31 | \n",
1349 | " ITUB4 | \n",
1350 | " ITAUUNIBANCO | \n",
1351 | " 32.99 | \n",
1352 | " 33.39 | \n",
1353 | " 32.54 | \n",
1354 | " 32.90 | \n",
1355 | " 25459800.0 | \n",
1356 | " 8.358342e+10 | \n",
1357 | " 33.246 | \n",
1358 | " 34.610952 | \n",
1359 | "
\n",
1360 | " \n",
1361 | " | 8323 | \n",
1362 | " 2020-02-03 | \n",
1363 | " ITUB4 | \n",
1364 | " ITAUUNIBANCO | \n",
1365 | " 33.00 | \n",
1366 | " 33.31 | \n",
1367 | " 32.79 | \n",
1368 | " 33.16 | \n",
1369 | " 17586000.0 | \n",
1370 | " 5.817445e+10 | \n",
1371 | " 33.126 | \n",
1372 | " 34.385714 | \n",
1373 | "
\n",
1374 | " \n",
1375 | " | 8324 | \n",
1376 | " 2020-02-04 | \n",
1377 | " ITUB4 | \n",
1378 | " ITAUUNIBANCO | \n",
1379 | " 33.36 | \n",
1380 | " 33.46 | \n",
1381 | " 32.90 | \n",
1382 | " 33.36 | \n",
1383 | " 15730300.0 | \n",
1384 | " 5.220127e+10 | \n",
1385 | " 33.060 | \n",
1386 | " 34.199524 | \n",
1387 | "
\n",
1388 | " \n",
1389 | " | 8325 | \n",
1390 | " 2020-02-05 | \n",
1391 | " ITUB4 | \n",
1392 | " ITAUUNIBANCO | \n",
1393 | " 33.71 | \n",
1394 | " 34.12 | \n",
1395 | " 33.36 | \n",
1396 | " 33.28 | \n",
1397 | " 34791900.0 | \n",
1398 | " 1.169225e+11 | \n",
1399 | " 33.138 | \n",
1400 | " 34.063810 | \n",
1401 | "
\n",
1402 | " \n",
1403 | " | ... | \n",
1404 | " ... | \n",
1405 | " ... | \n",
1406 | " ... | \n",
1407 | " ... | \n",
1408 | " ... | \n",
1409 | " ... | \n",
1410 | " ... | \n",
1411 | " ... | \n",
1412 | " ... | \n",
1413 | " ... | \n",
1414 | " ... | \n",
1415 | "
\n",
1416 | " \n",
1417 | " | 125333 | \n",
1418 | " 2021-01-08 | \n",
1419 | " ITUB4 | \n",
1420 | " ITAUUNIBANCO | \n",
1421 | " 32.93 | \n",
1422 | " 33.44 | \n",
1423 | " 32.43 | \n",
1424 | " 32.08 | \n",
1425 | " 52532500.0 | \n",
1426 | " 1.721607e+11 | \n",
1427 | " 31.760 | \n",
1428 | " 31.444762 | \n",
1429 | "
\n",
1430 | " \n",
1431 | " | 128673 | \n",
1432 | " 2021-01-11 | \n",
1433 | " ITUB4 | \n",
1434 | " ITAUUNIBANCO | \n",
1435 | " 32.47 | \n",
1436 | " 32.70 | \n",
1437 | " 31.72 | \n",
1438 | " 32.18 | \n",
1439 | " 31564300.0 | \n",
1440 | " 1.016568e+11 | \n",
1441 | " 31.996 | \n",
1442 | " 31.522857 | \n",
1443 | "
\n",
1444 | " \n",
1445 | " | 128674 | \n",
1446 | " 2021-01-12 | \n",
1447 | " ITUB4 | \n",
1448 | " ITAUUNIBANCO | \n",
1449 | " 32.18 | \n",
1450 | " 32.45 | \n",
1451 | " 32.02 | \n",
1452 | " 31.65 | \n",
1453 | " 33089300.0 | \n",
1454 | " 1.066563e+11 | \n",
1455 | " 32.292 | \n",
1456 | " 31.609524 | \n",
1457 | "
\n",
1458 | " \n",
1459 | " | 128675 | \n",
1460 | " 2021-01-13 | \n",
1461 | " ITUB4 | \n",
1462 | " ITAUUNIBANCO | \n",
1463 | " 32.18 | \n",
1464 | " 32.53 | \n",
1465 | " 31.26 | \n",
1466 | " 32.59 | \n",
1467 | " 42722600.0 | \n",
1468 | " 1.354698e+11 | \n",
1469 | " 32.312 | \n",
1470 | " 31.662381 | \n",
1471 | "
\n",
1472 | " \n",
1473 | " | 128676 | \n",
1474 | " 2021-01-14 | \n",
1475 | " ITUB4 | \n",
1476 | " ITAUUNIBANCO | \n",
1477 | " 32.02 | \n",
1478 | " 32.94 | \n",
1479 | " 31.67 | \n",
1480 | " 31.36 | \n",
1481 | " 28222500.0 | \n",
1482 | " 9.166648e+10 | \n",
1483 | " 32.264 | \n",
1484 | " 31.711905 | \n",
1485 | "
\n",
1486 | " \n",
1487 | "
\n",
1488 | "
238 rows × 11 columns
\n",
1489 | "
"
1490 | ],
1491 | "text/plain": [
1492 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n",
1493 | "8321 2020-01-30 ITUB4 ITAUUNIBANCO 32.65 33.45 \n",
1494 | "8322 2020-01-31 ITUB4 ITAUUNIBANCO 32.99 33.39 \n",
1495 | "8323 2020-02-03 ITUB4 ITAUUNIBANCO 33.00 33.31 \n",
1496 | "8324 2020-02-04 ITUB4 ITAUUNIBANCO 33.36 33.46 \n",
1497 | "8325 2020-02-05 ITUB4 ITAUUNIBANCO 33.71 34.12 \n",
1498 | "... ... ... ... ... ... \n",
1499 | "125333 2021-01-08 ITUB4 ITAUUNIBANCO 32.93 33.44 \n",
1500 | "128673 2021-01-11 ITUB4 ITAUUNIBANCO 32.47 32.70 \n",
1501 | "128674 2021-01-12 ITUB4 ITAUUNIBANCO 32.18 32.45 \n",
1502 | "128675 2021-01-13 ITUB4 ITAUUNIBANCO 32.18 32.53 \n",
1503 | "128676 2021-01-14 ITUB4 ITAUUNIBANCO 32.02 32.94 \n",
1504 | "\n",
1505 | " preco_minimo preco_fechamento qtd_negocios volume_negocios mm5d \\\n",
1506 | "8321 32.58 32.82 41789800.0 1.374939e+11 33.530 \n",
1507 | "8322 32.54 32.90 25459800.0 8.358342e+10 33.246 \n",
1508 | "8323 32.79 33.16 17586000.0 5.817445e+10 33.126 \n",
1509 | "8324 32.90 33.36 15730300.0 5.220127e+10 33.060 \n",
1510 | "8325 33.36 33.28 34791900.0 1.169225e+11 33.138 \n",
1511 | "... ... ... ... ... ... \n",
1512 | "125333 32.43 32.08 52532500.0 1.721607e+11 31.760 \n",
1513 | "128673 31.72 32.18 31564300.0 1.016568e+11 31.996 \n",
1514 | "128674 32.02 31.65 33089300.0 1.066563e+11 32.292 \n",
1515 | "128675 31.26 32.59 42722600.0 1.354698e+11 32.312 \n",
1516 | "128676 31.67 31.36 28222500.0 9.166648e+10 32.264 \n",
1517 | "\n",
1518 | " mm21d \n",
1519 | "8321 34.859048 \n",
1520 | "8322 34.610952 \n",
1521 | "8323 34.385714 \n",
1522 | "8324 34.199524 \n",
1523 | "8325 34.063810 \n",
1524 | "... ... \n",
1525 | "125333 31.444762 \n",
1526 | "128673 31.522857 \n",
1527 | "128674 31.609524 \n",
1528 | "128675 31.662381 \n",
1529 | "128676 31.711905 \n",
1530 | "\n",
1531 | "[238 rows x 11 columns]"
1532 | ]
1533 | },
1534 | "execution_count": 12,
1535 | "metadata": {},
1536 | "output_type": "execute_result"
1537 | }
1538 | ],
1539 | "source": [
1540 | "#retirando os dados nulos\n",
1541 | "df_itau.dropna(inplace=True)\n",
1542 | "df_itau"
1543 | ]
1544 | },
1545 | {
1546 | "cell_type": "code",
1547 | "execution_count": 13,
1548 | "metadata": {},
1549 | "outputs": [
1550 | {
1551 | "data": {
1552 | "text/html": [
1553 | "\n",
1554 | "\n",
1567 | "
\n",
1568 | " \n",
1569 | " \n",
1570 | " | \n",
1571 | " data_pregao | \n",
1572 | " sigla_acao | \n",
1573 | " nome_acao | \n",
1574 | " preco_abertura | \n",
1575 | " preco_max | \n",
1576 | " preco_minimo | \n",
1577 | " preco_fechamento | \n",
1578 | " qtd_negocios | \n",
1579 | " volume_negocios | \n",
1580 | " mm5d | \n",
1581 | " mm21d | \n",
1582 | "
\n",
1583 | " \n",
1584 | " \n",
1585 | " \n",
1586 | " | 0 | \n",
1587 | " 2020-01-30 | \n",
1588 | " ITUB4 | \n",
1589 | " ITAUUNIBANCO | \n",
1590 | " 32.65 | \n",
1591 | " 33.45 | \n",
1592 | " 32.58 | \n",
1593 | " 32.82 | \n",
1594 | " 41789800.0 | \n",
1595 | " 1.374939e+11 | \n",
1596 | " 33.530 | \n",
1597 | " 34.859048 | \n",
1598 | "
\n",
1599 | " \n",
1600 | " | 1 | \n",
1601 | " 2020-01-31 | \n",
1602 | " ITUB4 | \n",
1603 | " ITAUUNIBANCO | \n",
1604 | " 32.99 | \n",
1605 | " 33.39 | \n",
1606 | " 32.54 | \n",
1607 | " 32.90 | \n",
1608 | " 25459800.0 | \n",
1609 | " 8.358342e+10 | \n",
1610 | " 33.246 | \n",
1611 | " 34.610952 | \n",
1612 | "
\n",
1613 | " \n",
1614 | " | 2 | \n",
1615 | " 2020-02-03 | \n",
1616 | " ITUB4 | \n",
1617 | " ITAUUNIBANCO | \n",
1618 | " 33.00 | \n",
1619 | " 33.31 | \n",
1620 | " 32.79 | \n",
1621 | " 33.16 | \n",
1622 | " 17586000.0 | \n",
1623 | " 5.817445e+10 | \n",
1624 | " 33.126 | \n",
1625 | " 34.385714 | \n",
1626 | "
\n",
1627 | " \n",
1628 | " | 3 | \n",
1629 | " 2020-02-04 | \n",
1630 | " ITUB4 | \n",
1631 | " ITAUUNIBANCO | \n",
1632 | " 33.36 | \n",
1633 | " 33.46 | \n",
1634 | " 32.90 | \n",
1635 | " 33.36 | \n",
1636 | " 15730300.0 | \n",
1637 | " 5.220127e+10 | \n",
1638 | " 33.060 | \n",
1639 | " 34.199524 | \n",
1640 | "
\n",
1641 | " \n",
1642 | " | 4 | \n",
1643 | " 2020-02-05 | \n",
1644 | " ITUB4 | \n",
1645 | " ITAUUNIBANCO | \n",
1646 | " 33.71 | \n",
1647 | " 34.12 | \n",
1648 | " 33.36 | \n",
1649 | " 33.28 | \n",
1650 | " 34791900.0 | \n",
1651 | " 1.169225e+11 | \n",
1652 | " 33.138 | \n",
1653 | " 34.063810 | \n",
1654 | "
\n",
1655 | " \n",
1656 | " | ... | \n",
1657 | " ... | \n",
1658 | " ... | \n",
1659 | " ... | \n",
1660 | " ... | \n",
1661 | " ... | \n",
1662 | " ... | \n",
1663 | " ... | \n",
1664 | " ... | \n",
1665 | " ... | \n",
1666 | " ... | \n",
1667 | " ... | \n",
1668 | "
\n",
1669 | " \n",
1670 | " | 233 | \n",
1671 | " 2021-01-08 | \n",
1672 | " ITUB4 | \n",
1673 | " ITAUUNIBANCO | \n",
1674 | " 32.93 | \n",
1675 | " 33.44 | \n",
1676 | " 32.43 | \n",
1677 | " 32.08 | \n",
1678 | " 52532500.0 | \n",
1679 | " 1.721607e+11 | \n",
1680 | " 31.760 | \n",
1681 | " 31.444762 | \n",
1682 | "
\n",
1683 | " \n",
1684 | " | 234 | \n",
1685 | " 2021-01-11 | \n",
1686 | " ITUB4 | \n",
1687 | " ITAUUNIBANCO | \n",
1688 | " 32.47 | \n",
1689 | " 32.70 | \n",
1690 | " 31.72 | \n",
1691 | " 32.18 | \n",
1692 | " 31564300.0 | \n",
1693 | " 1.016568e+11 | \n",
1694 | " 31.996 | \n",
1695 | " 31.522857 | \n",
1696 | "
\n",
1697 | " \n",
1698 | " | 235 | \n",
1699 | " 2021-01-12 | \n",
1700 | " ITUB4 | \n",
1701 | " ITAUUNIBANCO | \n",
1702 | " 32.18 | \n",
1703 | " 32.45 | \n",
1704 | " 32.02 | \n",
1705 | " 31.65 | \n",
1706 | " 33089300.0 | \n",
1707 | " 1.066563e+11 | \n",
1708 | " 32.292 | \n",
1709 | " 31.609524 | \n",
1710 | "
\n",
1711 | " \n",
1712 | " | 236 | \n",
1713 | " 2021-01-13 | \n",
1714 | " ITUB4 | \n",
1715 | " ITAUUNIBANCO | \n",
1716 | " 32.18 | \n",
1717 | " 32.53 | \n",
1718 | " 31.26 | \n",
1719 | " 32.59 | \n",
1720 | " 42722600.0 | \n",
1721 | " 1.354698e+11 | \n",
1722 | " 32.312 | \n",
1723 | " 31.662381 | \n",
1724 | "
\n",
1725 | " \n",
1726 | " | 237 | \n",
1727 | " 2021-01-14 | \n",
1728 | " ITUB4 | \n",
1729 | " ITAUUNIBANCO | \n",
1730 | " 32.02 | \n",
1731 | " 32.94 | \n",
1732 | " 31.67 | \n",
1733 | " 31.36 | \n",
1734 | " 28222500.0 | \n",
1735 | " 9.166648e+10 | \n",
1736 | " 32.264 | \n",
1737 | " 31.711905 | \n",
1738 | "
\n",
1739 | " \n",
1740 | "
\n",
1741 | "
238 rows × 11 columns
\n",
1742 | "
"
1743 | ],
1744 | "text/plain": [
1745 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n",
1746 | "0 2020-01-30 ITUB4 ITAUUNIBANCO 32.65 33.45 \n",
1747 | "1 2020-01-31 ITUB4 ITAUUNIBANCO 32.99 33.39 \n",
1748 | "2 2020-02-03 ITUB4 ITAUUNIBANCO 33.00 33.31 \n",
1749 | "3 2020-02-04 ITUB4 ITAUUNIBANCO 33.36 33.46 \n",
1750 | "4 2020-02-05 ITUB4 ITAUUNIBANCO 33.71 34.12 \n",
1751 | ".. ... ... ... ... ... \n",
1752 | "233 2021-01-08 ITUB4 ITAUUNIBANCO 32.93 33.44 \n",
1753 | "234 2021-01-11 ITUB4 ITAUUNIBANCO 32.47 32.70 \n",
1754 | "235 2021-01-12 ITUB4 ITAUUNIBANCO 32.18 32.45 \n",
1755 | "236 2021-01-13 ITUB4 ITAUUNIBANCO 32.18 32.53 \n",
1756 | "237 2021-01-14 ITUB4 ITAUUNIBANCO 32.02 32.94 \n",
1757 | "\n",
1758 | " preco_minimo preco_fechamento qtd_negocios volume_negocios mm5d \\\n",
1759 | "0 32.58 32.82 41789800.0 1.374939e+11 33.530 \n",
1760 | "1 32.54 32.90 25459800.0 8.358342e+10 33.246 \n",
1761 | "2 32.79 33.16 17586000.0 5.817445e+10 33.126 \n",
1762 | "3 32.90 33.36 15730300.0 5.220127e+10 33.060 \n",
1763 | "4 33.36 33.28 34791900.0 1.169225e+11 33.138 \n",
1764 | ".. ... ... ... ... ... \n",
1765 | "233 32.43 32.08 52532500.0 1.721607e+11 31.760 \n",
1766 | "234 31.72 32.18 31564300.0 1.016568e+11 31.996 \n",
1767 | "235 32.02 31.65 33089300.0 1.066563e+11 32.292 \n",
1768 | "236 31.26 32.59 42722600.0 1.354698e+11 32.312 \n",
1769 | "237 31.67 31.36 28222500.0 9.166648e+10 32.264 \n",
1770 | "\n",
1771 | " mm21d \n",
1772 | "0 34.859048 \n",
1773 | "1 34.610952 \n",
1774 | "2 34.385714 \n",
1775 | "3 34.199524 \n",
1776 | "4 34.063810 \n",
1777 | ".. ... \n",
1778 | "233 31.444762 \n",
1779 | "234 31.522857 \n",
1780 | "235 31.609524 \n",
1781 | "236 31.662381 \n",
1782 | "237 31.711905 \n",
1783 | "\n",
1784 | "[238 rows x 11 columns]"
1785 | ]
1786 | },
1787 | "execution_count": 13,
1788 | "metadata": {},
1789 | "output_type": "execute_result"
1790 | }
1791 | ],
1792 | "source": [
1793 | "#reindexando o data frame\n",
1794 | "df_itau = df_itau.reset_index(drop=True)\n",
1795 | "df_itau"
1796 | ]
1797 | },
1798 | {
1799 | "cell_type": "code",
1800 | "execution_count": 14,
1801 | "metadata": {},
1802 | "outputs": [
1803 | {
1804 | "data": {
1805 | "text/plain": [
1806 | "'linhas treino= 0:167 linhas teste= 167:237 linhas validação= 237'"
1807 | ]
1808 | },
1809 | "execution_count": 14,
1810 | "metadata": {},
1811 | "output_type": "execute_result"
1812 | }
1813 | ],
1814 | "source": [
1815 | "#verificando quantidade de linhas\n",
1816 | "qtd_linhas = len(df_itau)\n",
1817 | "\n",
1818 | "qtd_linhas_treino= round(.70 * qtd_linhas)\n",
1819 | "qtd_linhas_teste= qtd_linhas - qtd_linhas_treino \n",
1820 | "qtd_linhas_validacao = qtd_linhas -1\n",
1821 | "\n",
1822 | "info = (\n",
1823 | " f\"linhas treino= 0:{qtd_linhas_treino}\"\n",
1824 | " f\" linhas teste= {qtd_linhas_treino}:{qtd_linhas_treino + qtd_linhas_teste -1}\"\n",
1825 | " f\" linhas validação= {qtd_linhas_validacao}\"\n",
1826 | ")\n",
1827 | "\n",
1828 | "info\n"
1829 | ]
1830 | },
1831 | {
1832 | "cell_type": "code",
1833 | "execution_count": 15,
1834 | "metadata": {},
1835 | "outputs": [],
1836 | "source": [
1837 | "#separando as features e labels\n",
1838 | "features = df_itau.drop(['sigla_acao', 'nome_acao', 'data_pregao', 'preco_fechamento'], 1)\n",
1839 | "labels = df_itau['preco_fechamento']"
1840 | ]
1841 | },
1842 | {
1843 | "cell_type": "code",
1844 | "execution_count": 16,
1845 | "metadata": {},
1846 | "outputs": [
1847 | {
1848 | "name": "stdout",
1849 | "output_type": "stream",
1850 | "text": [
1851 | "\n",
1852 | "Melhores features:\n",
1853 | "{'qtd_negocios': 18.799424892097544, 'preco_minimo': 15.959684908457135, 'preco_max': 10.97138027738372, 'mm21d': 7.809600323338402, 'mm5d': 2.2915178065342623, 'volume_negocios': 1.79780379509346}\n"
1854 | ]
1855 | }
1856 | ],
1857 | "source": [
1858 | "#Escolhendo as melhores features com Kbest\n",
1859 | "\n",
1860 | "features_list = ('preco_abertura','preco_max','preco_minimo','qtd_negocios','volume_negocios','mm5d','mm21d')\n",
1861 | "\n",
1862 | "k_best_features = SelectKBest(k='all')\n",
1863 | "k_best_features.fit_transform(features, labels)\n",
1864 | "k_best_features_scores = k_best_features.scores_\n",
1865 | "raw_pairs = zip(features_list[1:], k_best_features_scores)\n",
1866 | "ordered_pairs = list(reversed(sorted(raw_pairs, key=lambda x: x[1])))\n",
1867 | "\n",
1868 | "k_best_features_final = dict(ordered_pairs[:15])\n",
1869 | "best_features = k_best_features_final.keys()\n",
1870 | "print ('')\n",
1871 | "print (\"Melhores features:\")\n",
1872 | "print (k_best_features_final)"
1873 | ]
1874 | },
1875 | {
1876 | "cell_type": "code",
1877 | "execution_count": 17,
1878 | "metadata": {},
1879 | "outputs": [],
1880 | "source": [
1881 | "#separando as features escolhidas\n",
1882 | "features = df_itau.loc[:,['preco_max','preco_minimo','volume_negocios','mm5d']]"
1883 | ]
1884 | },
1885 | {
1886 | "cell_type": "code",
1887 | "execution_count": 18,
1888 | "metadata": {},
1889 | "outputs": [
1890 | {
1891 | "data": {
1892 | "text/html": [
1893 | "\n",
1894 | "\n",
1907 | "
\n",
1908 | " \n",
1909 | " \n",
1910 | " | \n",
1911 | " preco_max | \n",
1912 | " preco_minimo | \n",
1913 | " volume_negocios | \n",
1914 | " mm5d | \n",
1915 | "
\n",
1916 | " \n",
1917 | " \n",
1918 | " \n",
1919 | " | 0 | \n",
1920 | " 33.45 | \n",
1921 | " 32.58 | \n",
1922 | " 1.374939e+11 | \n",
1923 | " 33.530 | \n",
1924 | "
\n",
1925 | " \n",
1926 | " | 1 | \n",
1927 | " 33.39 | \n",
1928 | " 32.54 | \n",
1929 | " 8.358342e+10 | \n",
1930 | " 33.246 | \n",
1931 | "
\n",
1932 | " \n",
1933 | " | 2 | \n",
1934 | " 33.31 | \n",
1935 | " 32.79 | \n",
1936 | " 5.817445e+10 | \n",
1937 | " 33.126 | \n",
1938 | "
\n",
1939 | " \n",
1940 | " | 3 | \n",
1941 | " 33.46 | \n",
1942 | " 32.90 | \n",
1943 | " 5.220127e+10 | \n",
1944 | " 33.060 | \n",
1945 | "
\n",
1946 | " \n",
1947 | " | 4 | \n",
1948 | " 34.12 | \n",
1949 | " 33.36 | \n",
1950 | " 1.169225e+11 | \n",
1951 | " 33.138 | \n",
1952 | "
\n",
1953 | " \n",
1954 | " | ... | \n",
1955 | " ... | \n",
1956 | " ... | \n",
1957 | " ... | \n",
1958 | " ... | \n",
1959 | "
\n",
1960 | " \n",
1961 | " | 233 | \n",
1962 | " 33.44 | \n",
1963 | " 32.43 | \n",
1964 | " 1.721607e+11 | \n",
1965 | " 31.760 | \n",
1966 | "
\n",
1967 | " \n",
1968 | " | 234 | \n",
1969 | " 32.70 | \n",
1970 | " 31.72 | \n",
1971 | " 1.016568e+11 | \n",
1972 | " 31.996 | \n",
1973 | "
\n",
1974 | " \n",
1975 | " | 235 | \n",
1976 | " 32.45 | \n",
1977 | " 32.02 | \n",
1978 | " 1.066563e+11 | \n",
1979 | " 32.292 | \n",
1980 | "
\n",
1981 | " \n",
1982 | " | 236 | \n",
1983 | " 32.53 | \n",
1984 | " 31.26 | \n",
1985 | " 1.354698e+11 | \n",
1986 | " 32.312 | \n",
1987 | "
\n",
1988 | " \n",
1989 | " | 237 | \n",
1990 | " 32.94 | \n",
1991 | " 31.67 | \n",
1992 | " 9.166648e+10 | \n",
1993 | " 32.264 | \n",
1994 | "
\n",
1995 | " \n",
1996 | "
\n",
1997 | "
238 rows × 4 columns
\n",
1998 | "
"
1999 | ],
2000 | "text/plain": [
2001 | " preco_max preco_minimo volume_negocios mm5d\n",
2002 | "0 33.45 32.58 1.374939e+11 33.530\n",
2003 | "1 33.39 32.54 8.358342e+10 33.246\n",
2004 | "2 33.31 32.79 5.817445e+10 33.126\n",
2005 | "3 33.46 32.90 5.220127e+10 33.060\n",
2006 | "4 34.12 33.36 1.169225e+11 33.138\n",
2007 | ".. ... ... ... ...\n",
2008 | "233 33.44 32.43 1.721607e+11 31.760\n",
2009 | "234 32.70 31.72 1.016568e+11 31.996\n",
2010 | "235 32.45 32.02 1.066563e+11 32.292\n",
2011 | "236 32.53 31.26 1.354698e+11 32.312\n",
2012 | "237 32.94 31.67 9.166648e+10 32.264\n",
2013 | "\n",
2014 | "[238 rows x 4 columns]"
2015 | ]
2016 | },
2017 | "execution_count": 18,
2018 | "metadata": {},
2019 | "output_type": "execute_result"
2020 | }
2021 | ],
2022 | "source": [
2023 | "features"
2024 | ]
2025 | },
2026 | {
2027 | "cell_type": "code",
2028 | "execution_count": 19,
2029 | "metadata": {},
2030 | "outputs": [
2031 | {
2032 | "name": "stdout",
2033 | "output_type": "stream",
2034 | "text": [
2035 | "167 167\n",
2036 | "70 70\n"
2037 | ]
2038 | }
2039 | ],
2040 | "source": [
2041 | "#Separa os dados de treino teste e validação\n",
2042 | "X_train = features[:qtd_linhas_treino]\n",
2043 | "X_test = features[qtd_linhas_treino:qtd_linhas_treino + qtd_linhas_teste -1]\n",
2044 | "\n",
2045 | "y_train = labels[:qtd_linhas_treino]\n",
2046 | "y_test = labels[qtd_linhas_treino:qtd_linhas_treino + qtd_linhas_teste -1]\n",
2047 | "\n",
2048 | "print( len(X_train), len(y_train))\n",
2049 | "\n",
2050 | "print( len(X_test), len(y_test))"
2051 | ]
2052 | },
2053 | {
2054 | "cell_type": "code",
2055 | "execution_count": 20,
2056 | "metadata": {},
2057 | "outputs": [],
2058 | "source": [
2059 | "# Normalizando os dados de entrada(features)\n",
2060 | "\n",
2061 | "# Gerando o novo padrão\n",
2062 | "scaler = MinMaxScaler()\n",
2063 | "X_train_scale = scaler.fit_transform(X_train) # Normalizando os dados de entrada(treinamento)\n",
2064 | "X_test_scale = scaler.transform(X_test) # Normalizando os dados de entrada(teste)\n",
2065 | "\n"
2066 | ]
2067 | },
2068 | {
2069 | "cell_type": "code",
2070 | "execution_count": 21,
2071 | "metadata": {},
2072 | "outputs": [
2073 | {
2074 | "data": {
2075 | "text/plain": [
2076 | "'Coeficiente de determinação:95.39'"
2077 | ]
2078 | },
2079 | "execution_count": 21,
2080 | "metadata": {},
2081 | "output_type": "execute_result"
2082 | }
2083 | ],
2084 | "source": [
2085 | "#treinamento usando regressão linear\n",
2086 | "lr = linear_model.LinearRegression()\n",
2087 | "lr.fit(X_train_scale, y_train)\n",
2088 | "pred= lr.predict(X_test_scale)\n",
2089 | "cd =r2_score(y_test, pred)\n",
2090 | "\n",
2091 | "f'Coeficiente de determinação:{cd * 100:.2f}'"
2092 | ]
2093 | },
2094 | {
2095 | "cell_type": "code",
2096 | "execution_count": 22,
2097 | "metadata": {},
2098 | "outputs": [
2099 | {
2100 | "name": "stderr",
2101 | "output_type": "stream",
2102 | "text": [
2103 | "C:\\Users\\fabri\\anaconda3\\lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:571: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (2000) reached and the optimization hasn't converged yet.\n",
2104 | " % self.max_iter, ConvergenceWarning)\n"
2105 | ]
2106 | },
2107 | {
2108 | "data": {
2109 | "text/plain": [
2110 | "'Coeficiente de determinação:93.77'"
2111 | ]
2112 | },
2113 | "execution_count": 22,
2114 | "metadata": {},
2115 | "output_type": "execute_result"
2116 | }
2117 | ],
2118 | "source": [
2119 | "#rede neural\n",
2120 | "rn = MLPRegressor(max_iter=2000)\n",
2121 | "\n",
2122 | "rn.fit(X_train_scale, y_train)\n",
2123 | "pred= rn.predict(X_test_scale)\n",
2124 | "\n",
2125 | "cd = rn.score(X_test_scale, y_test)\n",
2126 | "\n",
2127 | "\n",
2128 | "f'Coeficiente de determinação:{cd * 100:.2f}'"
2129 | ]
2130 | },
2131 | {
2132 | "cell_type": "code",
2133 | "execution_count": 23,
2134 | "metadata": {},
2135 | "outputs": [
2136 | {
2137 | "data": {
2138 | "text/plain": [
2139 | "'Coeficiente de determinação:94.38'"
2140 | ]
2141 | },
2142 | "execution_count": 23,
2143 | "metadata": {},
2144 | "output_type": "execute_result"
2145 | }
2146 | ],
2147 | "source": [
2148 | "#rede neural com ajuste hyper parameters\n",
2149 | "\n",
2150 | "rn = MLPRegressor()\n",
2151 | "\n",
2152 | "parameter_space = {\n",
2153 | " 'hidden_layer_sizes': [(i,) for i in list(range(1, 21))],\n",
2154 | " 'activation': ['tanh', 'relu'],\n",
2155 | " 'solver': ['sgd', 'adam', 'lbfgs'], \n",
2156 | " 'alpha': [0.0001, 0.05],\n",
2157 | " 'learning_rate': ['constant', 'adaptive'],\n",
2158 | " }\n",
2159 | "\n",
2160 | "search = GridSearchCV(rn, parameter_space, n_jobs=-1, cv=5)\n",
2161 | "\n",
2162 | "\n",
2163 | "search.fit(X_train_scale,y_train)\n",
2164 | "clf = search.best_estimator_\n",
2165 | "pred= search.predict(X_test_scale)\n",
2166 | "\n",
2167 | "cd = search.score(X_test_scale, y_test)\n",
2168 | "\n",
2169 | "f'Coeficiente de determinação:{cd * 100:.2f}'"
2170 | ]
2171 | },
2172 | {
2173 | "cell_type": "code",
2174 | "execution_count": 24,
2175 | "metadata": {},
2176 | "outputs": [
2177 | {
2178 | "data": {
2179 | "text/html": [
2180 | "\n",
2181 | "\n",
2194 | "
\n",
2195 | " \n",
2196 | " \n",
2197 | " | \n",
2198 | " preco_max | \n",
2199 | " preco_minimo | \n",
2200 | " volume_negocios | \n",
2201 | " mm5d | \n",
2202 | "
\n",
2203 | " \n",
2204 | " \n",
2205 | " \n",
2206 | " | 237 | \n",
2207 | " 32.94 | \n",
2208 | " 31.67 | \n",
2209 | " 9.166648e+10 | \n",
2210 | " 32.264 | \n",
2211 | "
\n",
2212 | " \n",
2213 | "
\n",
2214 | "
"
2215 | ],
2216 | "text/plain": [
2217 | " preco_max preco_minimo volume_negocios mm5d\n",
2218 | "237 32.94 31.67 9.166648e+10 32.264"
2219 | ]
2220 | },
2221 | "execution_count": 24,
2222 | "metadata": {},
2223 | "output_type": "execute_result"
2224 | }
2225 | ],
2226 | "source": [
2227 | "valor_novo = features.tail(1)\n",
2228 | "valor_novo"
2229 | ]
2230 | },
2231 | {
2232 | "cell_type": "code",
2233 | "execution_count": 25,
2234 | "metadata": {},
2235 | "outputs": [
2236 | {
2237 | "data": {
2238 | "text/plain": [
2239 | "array([31.84759339])"
2240 | ]
2241 | },
2242 | "execution_count": 25,
2243 | "metadata": {},
2244 | "output_type": "execute_result"
2245 | }
2246 | ],
2247 | "source": [
2248 | "#executando a previsão\n",
2249 | "\n",
2250 | "\n",
2251 | "previsao=scaler.transform(valor_novo)\n",
2252 | "\n",
2253 | "\n",
2254 | "pred=lr.predict(previsao)\n",
2255 | "\n",
2256 | "pred"
2257 | ]
2258 | },
2259 | {
2260 | "cell_type": "code",
2261 | "execution_count": 26,
2262 | "metadata": {},
2263 | "outputs": [],
2264 | "source": [
2265 | "df = df[df['sigla_acao'] == 'ITUB4' ]"
2266 | ]
2267 | },
2268 | {
2269 | "cell_type": "code",
2270 | "execution_count": 27,
2271 | "metadata": {},
2272 | "outputs": [
2273 | {
2274 | "name": "stdout",
2275 | "output_type": "stream",
2276 | "text": [
2277 | " real previsao\n",
2278 | "data_pregao \n",
2279 | "2021-01-15 31.36 31.847593\n"
2280 | ]
2281 | }
2282 | ],
2283 | "source": [
2284 | "\n",
2285 | "data_pregao_full=df['data_pregao']\n",
2286 | "data_pregao=data_pregao_full.tail(1)\n",
2287 | "\n",
2288 | "res_full=df['preco_fechamento']\n",
2289 | "res=res_full.tail(1)\n",
2290 | "\n",
2291 | "df=pd.DataFrame({'data_pregao':data_pregao, 'real':res, 'previsao':pred})\n",
2292 | "\n",
2293 | "\n",
2294 | "df.set_index('data_pregao', inplace=True)\n",
2295 | "\n",
2296 | "print(df)"
2297 | ]
2298 | },
2299 | {
2300 | "cell_type": "code",
2301 | "execution_count": null,
2302 | "metadata": {},
2303 | "outputs": [],
2304 | "source": [
2305 | "\n"
2306 | ]
2307 | }
2308 | ],
2309 | "metadata": {
2310 | "kernelspec": {
2311 | "display_name": "Python 3",
2312 | "language": "python",
2313 | "name": "python3"
2314 | },
2315 | "language_info": {
2316 | "codemirror_mode": {
2317 | "name": "ipython",
2318 | "version": 3
2319 | },
2320 | "file_extension": ".py",
2321 | "mimetype": "text/x-python",
2322 | "name": "python",
2323 | "nbconvert_exporter": "python",
2324 | "pygments_lexer": "ipython3",
2325 | "version": "3.7.6"
2326 | }
2327 | },
2328 | "nbformat": 4,
2329 | "nbformat_minor": 4
2330 | }
2331 |
--------------------------------------------------------------------------------