├── README.md ├── prophet.py ├── pycaret.py ├── churn_de_clientes.py ├── Previsão Resultado Campeonato Brasileiro.ipynb └── Previsão preço ações.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # machine_learnig 2 | Notebook contendo o código utilizado para prever os resultados dos jogos do Campeonato Brasileiro, previsão de ações e churn de clientes 3 | 4 | Foi utilizado a linguagem python com as bibliotecas scikit learn, pandas, numpy, seabor e matplotlib. 5 | 6 | A base de dados foi alterada para refletir o resultado em formato numerico. 7 | 8 | Base original foi retitada do site: http://football-data.co.uk/ 9 | 10 | Base churn de clientes : https://www.kaggle.com/blastchar/telco-customer-churn 11 | 12 | -------------------------------------------------------------------------------- /prophet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #instalar o yfinance 4 | pip install yfinance 5 | 6 | #import bibliotecas 7 | import pandas as pd 8 | import yfinance as yf 9 | from datetime import datetime 10 | from datetime import timedelta 11 | import plotly.graph_objects as go 12 | from fbprophet import Prophet 13 | from fbprophet.plot import plot_plotly, plot_components_plotly 14 | import warnings 15 | warnings.filterwarnings('ignore') 16 | pd.options.display.float_format = '${:,.2f}'.format 17 | 18 | hj = datetime.today().strftime('%Y-%m-%d') 19 | data_ini = '2016-01-01' 20 | df_eth = yf.download('ETH-USD', data_ini, hj) 21 | df_eth.tail() 22 | 23 | df_eth.reset_index(inplace=True) 24 | 25 | df_eth 26 | 27 | df = df_eth[["Date", "Adj Close"]] 28 | df.rename(columns = {'Date': 'ds', 'Adj Close': 'y' }, inplace=True) 29 | 30 | df 31 | 32 | # Grafico Preço de fechamento 33 | fig = go.Figure() 34 | fig.add_trace(go.Scatter(x=df['ds'], y = df['y'])) 35 | 36 | model = Prophet( seasonality_mode='multiplicative') 37 | model.fit(df) 38 | 39 | #criar df com datas no futuro 40 | df_futuro = model.make_future_dataframe(periods=60) 41 | df_futuro.tail (60) 42 | 43 | #previsao 44 | previsao = model.predict(df_futuro) 45 | previsao 46 | 47 | previsao[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(60) 48 | 49 | #grafico 50 | plot_plotly(model, previsao) 51 | 52 | plot_components_plotly(model, previsao) -------------------------------------------------------------------------------- /pycaret.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #instalando as bibliotecas 4 | !pip install pycaret == 2.1.2 5 | !pip install yfinance 6 | 7 | from pycaret.utils import enable_colab 8 | enable_colab() 9 | 10 | #importar as bibliotecas 11 | import yfinance as yf 12 | import pandas as pd 13 | 14 | #escolher uma ação 15 | df = yf.Ticker('RADL3.SA') 16 | #escolher o intervalo de dados 17 | raia = df.history(period='2y') 18 | raia 19 | 20 | #retirando os campos 21 | raia = raia.drop(['Dividends','Stock Splits'], axis=1) 22 | raia 23 | 24 | #criando novos campos 25 | raia['MM7d'] = raia['Close'].rolling(window=7).mean().round(2) 26 | raia['MM30d'] = raia['Close'].rolling(window=30).mean().round(2) 27 | raia 28 | 29 | #5 dias para previsao 30 | raia_prever = raia.tail(5) 31 | raia_prever 32 | 33 | #retirar os ultimos 5 dias do df 34 | raia.drop(raia.tail(5).index, inplace=True) 35 | raia 36 | 37 | #empurra para frente os valores das ações 38 | raia['Close'] = raia['Close'].shift(-1) 39 | raia 40 | 41 | #Retirar os nulos 42 | raia.dropna(inplace=True) 43 | raia 44 | 45 | #drop index 46 | raia.reset_index(drop=True, inplace=True) 47 | raia_prever.reset_index(drop=True, inplace=True) 48 | 49 | raia 50 | 51 | #import regression lib pycaret 52 | from pycaret.regression import * 53 | setup(data= raia, target='Close', session_id=123) 54 | 55 | top3 = compare_models(n_select=3) 56 | 57 | print(top3) 58 | 59 | models() 60 | 61 | ridge = create_model('ridge', fold=10) 62 | 63 | lar = create_model('lar', fold=10) 64 | 65 | br = create_model('br', fold=10) 66 | 67 | #Tunning 68 | ridge_params = { 'alpha':[0.02, 0.024, 0.025, 0.026, 0.03]} 69 | tunne_ridge = tune_model(ridge, n_iter=1000, optimize='RMSE', custom_grid=ridge_params) 70 | 71 | tunne_lar = tune_model(lar, n_iter=1000, optimize = 'RMSE') 72 | 73 | tunne_br = tune_model(br, n_iter=1000, optimize = 'RMSE') 74 | 75 | #Grafico erros 76 | plot_model(tunne_ridge, plot='error') 77 | 78 | plot_model(tunne_ridge, plot='feature') 79 | 80 | #Testando com dados de treinameto 81 | predict_model(tunne_ridge) 82 | 83 | #Finalizar o modelo 84 | final_ridge_model = finalize_model(tunne_ridge) 85 | 86 | #Previsao 87 | prev = predict_model(final_ridge_model, data=raia_prever) 88 | prev 89 | 90 | #Salvando o modelo para utilizar com dados novos 91 | save_model(final_ridge_model, 'Modelo Final Ridge Pycaret') 92 | 93 | #Dados novos 94 | novo_dado = yf.download('RADL3.SA', period='45d') 95 | novo_dado 96 | 97 | #retira campos 98 | novo_dado = novo_dado.drop('Adj Close',axis = 1) 99 | #retirar index 100 | novo_dado.reset_index(drop=True, inplace=True) 101 | #criar novos campos 102 | novo_dado['MM7d'] = novo_dado['Close'].rolling(window=7).mean().round(2) 103 | novo_dado['MM30d'] = novo_dado['Close'].rolling(window=30).mean().round(2) 104 | novo_dado 105 | 106 | novo_dado = novo_dado.tail(1) 107 | novo_dado 108 | 109 | #Reutilizando o modelo 110 | saved_final_ridge_model = load_model('Modelo Final Ridge Pycaret') 111 | 112 | #Prevendo novo dado 113 | nova_previsao = predict_model(saved_final_ridge_model, data=novo_dado) 114 | nova_previsao.head() -------------------------------------------------------------------------------- /churn_de_clientes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Churn de clientes.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1cWTIyGJbuYPWP0iWl1QCrx2_2P3wtrsd 8 | 9 | ``` 10 | Prevendo churn de clientes com machine learning 11 | ``` 12 | """ 13 | 14 | import pandas as pd 15 | import numpy as np 16 | import matplotlib.pyplot as plt 17 | from sklearn.preprocessing import MinMaxScaler, LabelEncoder 18 | from sklearn.model_selection import train_test_split 19 | from sklearn.metrics import confusion_matrix, classification_report 20 | import tensorflow as tf 21 | from tensorflow import keras 22 | from tensorflow.keras.models import Sequential 23 | from tensorflow.keras.layers import Activation, Dense 24 | from tensorflow.keras.optimizers import Adam 25 | from tensorflow.keras.metrics import categorical_crossentropy 26 | 27 | from google.colab import files 28 | arquivo = files.upload() 29 | 30 | df =pd.read_csv('Telco-Customer-Churn.csv') 31 | 32 | df_val = df.iloc[:10] 33 | 34 | df_val 35 | 36 | #Podemos retirar a coluna de id 37 | df.drop('customerID', 1, inplace=True) 38 | 39 | #verifica os tipos 40 | df.dtypes 41 | 42 | #Parece que temos o campo Montlhy Charges como caracter, então teremos que ajustá-lo 43 | 44 | df.TotalCharges = pd.to_numeric(df.TotalCharges) 45 | 46 | #é possivel usar 'coerce' mas o valores que tem espço em branco vão ficar vazio, podemos retirá-los 47 | df.TotalCharges = pd.to_numeric(df.TotalCharges, errors='coerce') 48 | 49 | df.TotalCharges.isna() 50 | 51 | #Vamos ver existem campos que estão com campos vazios 52 | df[df.TotalCharges.isna()] 53 | 54 | #Podemos excluir os que estão vazios 55 | df.dropna(subset=['TotalCharges'], inplace=True) 56 | 57 | df 58 | 59 | #Graficos para explorar os dados 60 | #filtrar por churn no e yes e verificar os tenure 61 | dfno = df[df.Churn=='No'].tenure 62 | dfyes = df[df.Churn=='Yes'].tenure 63 | 64 | plt.hist([dfno,dfyes], color=['blue', 'red'], label=['Churn = no', 'Churn = yes']) 65 | plt.legend() 66 | 67 | #Graficos para explorar os dados 68 | #filtrar por churn no e yes e verificar os Dependents 69 | dfno = df[df.Churn=='No'].Dependents 70 | dfyes = df[df.Churn=='Yes'].Dependents 71 | 72 | plt.hist([dfno,dfyes], color=['blue', 'red'], label=['Churn = no', 'Churn = yes']) 73 | plt.legend() 74 | 75 | #Graficos para explorar os dados 76 | #filtrar por churn no e yes e verificar os MonthlyCharges 77 | dfno = df[df.Churn=='No'].MonthlyCharges 78 | dfyes = df[df.Churn=='Yes'].MonthlyCharges 79 | 80 | plt.hist([dfno,dfyes], color=['blue', 'red'], label=['Churn = no', 'Churn = yes']) 81 | plt.legend() 82 | 83 | #Verificar quão balanceado esta o dataset 84 | dfno = df[df.Churn=='No'].Churn 85 | dfyes = df[df.Churn=='Yes'].Churn 86 | 87 | total= dfno.count() + dfyes.count() 88 | percent = round ((dfyes.count() / total) * 100 , 2) 89 | 90 | print(f'YES: {dfno.count()}', f'No: {dfyes.count()}', f'% No: {percent}') 91 | 92 | plt.hist([dfno,dfyes], color=['blue', 'red'], label=['Churn = no', 'Churn = yes']) 93 | plt.legend() 94 | 95 | #Ajustas os camp que tem "No service" para No 96 | for i in df: 97 | if df[i].dtypes=='object': 98 | print(f'{i} : {df[i].unique()}') 99 | 100 | df.replace('No internet service', 'No', inplace=True) 101 | df.replace('No phone service', 'No', inplace=True) 102 | 103 | df 104 | 105 | #O que precisamos fazer agora e colocar os campos categoricos como numericos usando o hot enconding do panadas. 106 | #Vamosa fazer essa tranformação pois pois é muito mais facil para os modelos trabalharam com valores, pois são modelos matematicos 107 | 108 | dfnum= pd.get_dummies(data=df, columns=['gender','InternetService', 'Partner','Dependents','PhoneService','MultipleLines','InternetService','OnlineSecurity','OnlineBackup','DeviceProtection','TechSupport','StreamingTV','StreamingMovies','Contract','PaperlessBilling','PaymentMethod']) 109 | dfnum.columns 110 | 111 | #Podemos ver um example dos valores codificados 112 | dfnum.sample(5) 113 | 114 | #Label encoder coluna Churn 115 | le = LabelEncoder() 116 | dfnum['Churn'] = le.fit_transform(dfnum['Churn']) 117 | dfnum.Churn.sample(15) 118 | 119 | #Fazer o scale dos dados nas colunas de "tenure", monthly charges e total cherges' 120 | 121 | scaler = MinMaxScaler() 122 | dfnum[['tenure','MonthlyCharges','TotalCharges']] = scaler.fit_transform(dfnum[['tenure','MonthlyCharges','TotalCharges']]) 123 | dfnum.sample(5) 124 | 125 | #Separar os dados em features e labels 126 | features = dfnum.drop('Churn', 1) 127 | label = dfnum['Churn'] 128 | print(label) 129 | 130 | #Pegar uam parte para validação 131 | val = features.head(10) 132 | val 133 | 134 | #Novo featured 135 | features = features.iloc[10:] 136 | features 137 | 138 | label = label.iloc[10:] 139 | label 140 | 141 | #Separa train test e split 142 | X_Train, X_test, y_train, y_test = train_test_split(features,label,test_size=0.2, random_state=42) 143 | print(X_Train.shape, y_train.shape) 144 | print(X_test.shape, y_test.shape) 145 | 146 | #impotar tensor flow e keras 147 | 148 | model = Sequential([ 149 | Dense(20, input_shape=(41,), activation='relu'), 150 | Dense(52, activation='relu'), 151 | Dense(1,activation='sigmoid') 152 | ]) 153 | 154 | #Passando parametro de execução 155 | model.compile(optimizer='adam', 156 | loss='binary_crossentropy', 157 | metrics=['accuracy']) 158 | 159 | 160 | #Um detalhe importante a ser mencionado,e que estamos passando as camadas da rede neural, e na o primieiro comando na verdade já é a segunda camada 161 | # e neste caso estamos passando 20 neuronio(valor arbitrário) e o segundo parêmtro é a uqbntidade de entradas, que no nosso caso são 26 campos e a função 162 | #de ativação usada no neurônio que neste caso é a relu. 163 | 164 | #ReLU é a função de ativação mais amplamente utilizada ao projetar redes neurais atualmente. 165 | #Primeiramente, a função ReLU é não linear, o que significa que podemos facilmente copiar os erros para trás e ter várias camadas de 166 | #neurônios ativados pela função ReLU. 167 | 168 | #A principal vantagem de usar a função ReLU sobre outras funções de ativação é que ela não ativa todos os neurônios ao mesmo tempo. 169 | 170 | #Podemos ver como foi criado nosso modelo 171 | model.summary() 172 | 173 | model.fit(X_Train,y_train, epochs=100) 174 | 175 | model.evaluate(X_test,y_test) 176 | 177 | pred_val = model.predict(val) 178 | print(pred_val) 179 | 180 | y_pred = [] 181 | for i in pred_val: 182 | if i > 0.5: 183 | y_pred.append(1) 184 | else: 185 | y_pred.append(0) 186 | 187 | cliente = df_val['customerID'] 188 | real = df_val['Churn'] 189 | pred = list(pred_val.flatten()) 190 | 191 | df=pd.DataFrame({'cliente':cliente, 'previsao':pred, 'real':real, 'churn':y_pred}) 192 | 193 | print(df) 194 | 195 | import seaborn as sns 196 | 197 | df = pd.DataFrame(df, columns=['real','churn']) 198 | confusion_matrix = pd.crosstab(df['real'], df['churn'], rownames=['real'], colnames=['churn']) 199 | 200 | sns.heatmap(confusion_matrix, annot=True) -------------------------------------------------------------------------------- /Previsão Resultado Campeonato Brasileiro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#importar bibliotecas\n", 10 | "import pandas as pd\n", 11 | "import numpy as np\n", 12 | "from sklearn.linear_model import LogisticRegression\n", 13 | "from sklearn.tree import DecisionTreeClassifier\n", 14 | "from sklearn.naive_bayes import GaussianNB\n", 15 | "from sklearn.svm import SVC\n", 16 | "from sklearn.metrics import accuracy_score , f1_score, precision_score, recall_score\n", 17 | "from sklearn.feature_selection import SelectKBest\n", 18 | "from sklearn.model_selection import GridSearchCV\n", 19 | "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n", 20 | "from IPython.display import display\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "import seaborn as sns\n", 23 | "from sklearn.preprocessing import scale\n", 24 | "%matplotlib inline" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "text/html": [ 35 | "
\n", 36 | "\n", 49 | "\n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | "
CountryLeaguegame_idSeasonDateTimeHomehome_idAwayAway_id...ResPHPDPAMaxHMaxDMaxAAvgHAvgDAvgA
0BrazilSerie A1201219-05-1222:30Palmeiras21Portuguesa21...01.753.865.251.763.875.311.693.504.90
1BrazilSerie A2201219-05-1222:30Sport Recife27Flamengo RJ27...02.833.392.682.833.422.702.593.232.58
2BrazilSerie A3201220-05-121:00Figueirense13Nautico13...11.604.046.721.674.057.221.593.675.64
3BrazilSerie A4201220-05-1220:00Botafogo RJ7Sao Paulo7...12.493.353.152.493.393.152.353.262.84
4BrazilSerie A5201220-05-1220:00Corinthians9Fluminense9...21.963.534.411.963.534.411.893.333.89
\n", 199 | "

5 rows × 22 columns

\n", 200 | "
" 201 | ], 202 | "text/plain": [ 203 | " Country League game_id Season Date Time Home home_id \\\n", 204 | "0 Brazil Serie A 1 2012 19-05-12 22:30 Palmeiras 21 \n", 205 | "1 Brazil Serie A 2 2012 19-05-12 22:30 Sport Recife 27 \n", 206 | "2 Brazil Serie A 3 2012 20-05-12 1:00 Figueirense 13 \n", 207 | "3 Brazil Serie A 4 2012 20-05-12 20:00 Botafogo RJ 7 \n", 208 | "4 Brazil Serie A 5 2012 20-05-12 20:00 Corinthians 9 \n", 209 | "\n", 210 | " Away Away_id ... Res PH PD PA MaxH MaxD MaxA AvgH \\\n", 211 | "0 Portuguesa 21 ... 0 1.75 3.86 5.25 1.76 3.87 5.31 1.69 \n", 212 | "1 Flamengo RJ 27 ... 0 2.83 3.39 2.68 2.83 3.42 2.70 2.59 \n", 213 | "2 Nautico 13 ... 1 1.60 4.04 6.72 1.67 4.05 7.22 1.59 \n", 214 | "3 Sao Paulo 7 ... 1 2.49 3.35 3.15 2.49 3.39 3.15 2.35 \n", 215 | "4 Fluminense 9 ... 2 1.96 3.53 4.41 1.96 3.53 4.41 1.89 \n", 216 | "\n", 217 | " AvgD AvgA \n", 218 | "0 3.50 4.90 \n", 219 | "1 3.23 2.58 \n", 220 | "2 3.67 5.64 \n", 221 | "3 3.26 2.84 \n", 222 | "4 3.33 3.89 \n", 223 | "\n", 224 | "[5 rows x 22 columns]" 225 | ] 226 | }, 227 | "metadata": {}, 228 | "output_type": "display_data" 229 | } 230 | ], 231 | "source": [ 232 | "#lendo o arquivo BRAS.csv\n", 233 | "#O arquivo utilizado foi baixado do site www.football-data.co.uk, que agrega informações de diversos campeonatos\n", 234 | "#ao redor do mundo.\n", 235 | "\n", 236 | "data=pd.read_csv('BRA.csv',delimiter=',')\n", 237 | "\n", 238 | "#Verificando as 5 primeiras linhas do arquivo a ser utilizado\n", 239 | "display(data.head())\n", 240 | "\n", 241 | "#Notes for football data\n", 242 | "\n", 243 | "#Country = país do campeonato\n", 244 | "#League = nome da liga\n", 245 | "#game_id = id do jogo\n", 246 | "#Season = temporada\n", 247 | "#date = data do jogo\n", 248 | "#Time = hora do jogo\n", 249 | "#Home = Time da casa\n", 250 | "#home_id = id do time da casa\n", 251 | "#Away = time visitante\n", 252 | "#Away_id = id do time visitante\n", 253 | "#HG = Gols do time da casa\n", 254 | "#AG = Gols do time visitante\n", 255 | "#Res = Resultado do jogo (D=Draw, H=Home win, A=Away win)\n", 256 | "#PH = probablidade vitória time da casa provida pela Pinacle (casa de aposta inglesa)\n", 257 | "#PD = probablidade empate provida pela Pinacle (casa de aposta inglesa)\n", 258 | "#PA = probablidade vitória time visitante provida pela Pinacle (casa de aposta inglesa)\n", 259 | "#MaxH = probablidade vitória time da casa provida pela OddsPortal (casa de aposta inglesa)\n", 260 | "#MaxD = probablidade empate provida pela OddsPortal (casa de aposta inglesa)\n", 261 | "#MaxA = probablidade vitória time visitante provida pela OddsPortal (casa de aposta ingles\n", 262 | "#AvgH = probablidade média de vitória em casa provida pela OddsPortal (casa de aposta inglesa)\n", 263 | "#AvgD = probablidade média de empate provida pela OddsPortal (casa de aposta inglesa)\n", 264 | "#AvgA = probablidade média de vitória pelo time visitante provida pela OddsPortal (casa de aposta inglesa)\n" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 3, 270 | "metadata": {}, 271 | "outputs": [ 272 | { 273 | "name": "stdout", 274 | "output_type": "stream", 275 | "text": [ 276 | "Total de jogos: 2279\n", 277 | "Total de colunas: 21\n", 278 | "Total de jogos ganhos em casa: 1133\n", 279 | "Total de jogos ganhos pelo visitante: 553\n", 280 | "Total de jogos empatados: 593\n", 281 | "Percentual de jogos ganhos em casa: 49.71%\n" 282 | ] 283 | } 284 | ], 285 | "source": [ 286 | "#Explorando os dados\n", 287 | "#Base de dados de dos campeonatos de 2012 a 2017\n", 288 | "\n", 289 | "matches = data.shape[0]\n", 290 | "\n", 291 | "features = data.shape[1] -1 #Retirando a coluna Resultado\n", 292 | "\n", 293 | "home_win = len(data[data.Res==1])\n", 294 | "away_win = len(data[data.Res==2])\n", 295 | "draw = len(data[data.Res==0])\n", 296 | "val=[home_win,away_win,draw]\n", 297 | "\n", 298 | "win_rate = (float(home_win)/(matches)) *100\n", 299 | "\n", 300 | "print ('Total de jogos: ', matches)\n", 301 | "print ('Total de colunas: ', features)\n", 302 | "print ('Total de jogos ganhos em casa: ', home_win)\n", 303 | "print ('Total de jogos ganhos pelo visitante: ', away_win)\n", 304 | "print ('Total de jogos empatados: ', draw)\n", 305 | "print ('Percentual de jogos ganhos em casa: {:.2f}%'.format( win_rate ))\n", 306 | "\n", 307 | "#Podemos notar que o \"fator casa\" é importante , sendo que em quase 50% do jogos são ganhos pelo time da casa." 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 4, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAPk0lEQVR4nO3cf6zddX3H8edrVERlUn5cCGs7i7ObU7cxvEEYc1PqnKCxmMgiM1pZs2YJisoW7cwyMl0WTDa7ETeWRtCyMSZDHfgjKisYUYF4QQSxKg06uIPRa0CmMn+g7/1xPh2H9vbXPbfntv08H8nN+Xw/n8/3fD+n397X+ZzP+X5vqgpJUh9+ZqEHIEkaH0Nfkjpi6EtSRwx9SeqIoS9JHVm00APYlWOOOaaWL1++0MOQpAPKrbfe+u2qmpitbb8O/eXLlzM1NbXQw5CkA0qS/9xZm8s7ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUkf36jtxRLV/38YUewkHrWxe9fKGHIGkOnOlLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjuw39JJcl2ZrkK0N1RyW5Lsnd7fHIVp8kFyfZkuSOJCcN7bO69b87yep983IkSbuyJzP9DwAv265uHbCpqlYAm9o2wBnAivazFrgEBm8SwIXAC4CTgQu3vVFIksZnt6FfVZ8FHtquehWwsZU3AmcN1V9eAzcDi5McD/wucF1VPVRVDwPXseMbiSRpH5vrmv5xVfUAQHs8ttUvAe4b6jfd6nZWv4Mka5NMJZmamZmZ4/AkSbOZ7y9yM0td7aJ+x8qqDVU1WVWTExMT8zo4SerdXEP/wbZsQ3vc2uqngWVD/ZYC9++iXpI0RnMN/WuBbVfgrAauGap/fbuK5xTgkbb88yngpUmObF/gvrTVSZLGaNHuOiS5EngRcEySaQZX4VwEXJVkDXAvcHbr/gngTGAL8ChwLkBVPZTkXcAXW793VtX2Xw5Lkvax3YZ+VZ2zk6aVs/Qt4LydPM9lwGV7NTpJ0rzyjlxJ6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSMjhX6Stya5K8lXklyZ5LAkJyS5JcndST6Y5NDW98lte0trXz4fL0CStOfmHPpJlgDnA5NV9TzgEOA1wLuB9VW1AngYWNN2WQM8XFXPAta3fpKkMRp1eWcR8JQki4CnAg8ApwNXt/aNwFmtvKpt09pXJsmIx5ck7YU5h35V/Rfw18C9DML+EeBW4DtV9VjrNg0saeUlwH1t38da/6PnenxJ0t4bZXnnSAaz9xOAnwOeBpwxS9fatssu2oafd22SqSRTMzMzcx2eJGkWoyzvvAT4ZlXNVNWPgQ8DvwEsbss9AEuB+1t5GlgG0NqPAB7a/kmrakNVTVbV5MTExAjDkyRtb5TQvxc4JclT29r8SuCrwA3Aq1uf1cA1rXxt26a1X19VO8z0JUn7zihr+rcw+EL2NuDO9lwbgLcDFyTZwmDN/tK2y6XA0a3+AmDdCOOWJM3Bot132bmquhC4cLvqe4CTZ+n7A+DsUY4nSRqNd+RKUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpIyOFfpLFSa5O8rUkm5OcmuSoJNclubs9Htn6JsnFSbYkuSPJSfPzEiRJe2rUmf7fAZ+sqmcDvwZsBtYBm6pqBbCpbQOcAaxoP2uBS0Y8tiRpL8059JM8Hfgt4FKAqvpRVX0HWAVsbN02Ame18irg8hq4GVic5Pg5j1yStNdGmek/E5gB3p/kS0nel+RpwHFV9QBAezy29V8C3De0/3Sre4Ika5NMJZmamZkZYXiSpO0tGnHfk4A3VdUtSf6Ox5dyZpNZ6mqHiqoNwAaAycnJHdol7T+Wr/v4Qg/hoPWti16+T553lJn+NDBdVbe07asZvAk8uG3Zpj1uHeq/bGj/pcD9IxxfkrSX5hz6VfXfwH1JfqlVrQS+ClwLrG51q4FrWvla4PXtKp5TgEe2LQNJksZjlOUdgDcBVyQ5FLgHOJfBG8lVSdYA9wJnt76fAM4EtgCPtr6SpDEaKfSr6nZgcpamlbP0LeC8UY4nSRqNd+RKUkcMfUnqiKEvSR0Z9Ytcad54zfe+s6+u+daBx5m+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpIyOHfpJDknwpycfa9glJbklyd5IPJjm01T+5bW9p7ctHPbYkae/Mx0z/zcDmoe13A+uragXwMLCm1a8BHq6qZwHrWz9J0hiNFPpJlgIvB97XtgOcDlzdumwEzmrlVW2b1r6y9ZckjcmoM/2/Bd4G/LRtHw18p6oea9vTwJJWXgLcB9DaH2n9nyDJ2iRTSaZmZmZGHJ4kadicQz/JK4CtVXXrcPUsXWsP2h6vqNpQVZNVNTkxMTHX4UmSZrFohH1PA16Z5EzgMODpDGb+i5MsarP5pcD9rf80sAyYTrIIOAJ4aITjS5L20pxn+lX1p1W1tKqWA68Brq+q1wI3AK9u3VYD17TytW2b1n59Ve0w05ck7Tv74jr9twMXJNnCYM3+0lZ/KXB0q78AWLcPji1J2oVRlnf+X1V9BvhMK98DnDxLnx8AZ8/H8SRJc+MduZLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSR+Yc+kmWJbkhyeYkdyV5c6s/Ksl1Se5uj0e2+iS5OMmWJHckOWm+XoQkac+MMtN/DPjjqvpl4BTgvCTPAdYBm6pqBbCpbQOcAaxoP2uBS0Y4tiRpDuYc+lX1QFXd1srfBTYDS4BVwMbWbSNwViuvAi6vgZuBxUmOn/PIJUl7bV7W9JMsB34duAU4rqoegMEbA3Bs67YEuG9ot+lWt/1zrU0ylWRqZmZmPoYnSWpGDv0khwMfAt5SVf+zq66z1NUOFVUbqmqyqiYnJiZGHZ4kachIoZ/kSQwC/4qq+nCrfnDbsk173Nrqp4FlQ7svBe4f5fiSpL0zytU7AS4FNlfVe4aargVWt/Jq4Jqh+te3q3hOAR7ZtgwkSRqPRSPsexrwOuDOJLe3uncAFwFXJVkD3Auc3do+AZwJbAEeBc4d4diSpDmYc+hX1eeYfZ0eYOUs/Qs4b67HkySNzjtyJakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1JGxh36SlyX5epItSdaN+/iS1LOxhn6SQ4C/B84AngOck+Q54xyDJPVs3DP9k4EtVXVPVf0I+Fdg1ZjHIEndWjTm4y0B7hvangZeMNwhyVpgbdv8XpKvj2lsC+0Y4NsLPYg9lXcv9Aj2CwfMOfN8AQfQ+YKRz9kzdtYw7tDPLHX1hI2qDcCG8Qxn/5FkqqomF3oc2nOeswOL52tg3Ms708Cyoe2lwP1jHoMkdWvcof9FYEWSE5IcCrwGuHbMY5Ckbo11eaeqHkvyRuBTwCHAZVV11zjHsB/rbknrIOA5O7B4voBU1e57SZIOCt6RK0kdMfQlqSOG/jxL8r3ttt+Q5L0LNR7tuSSvSlJJnr3QY9HuJflJktuT3JXky0kuSGKm7Yb/QNLjzgE+x+CqMu3//reqTqyq5wK/A5wJXLh9pyTjvh9pv2boj1GSZyTZlOSO9vjzrf4DSS5JckOSe5L8dpLLkmxO8oGh/V+a5KYktyX5tySHL9iLOci0f8vTgDW00E/yD0le2cofSXJZK69J8pet/O9Jbm2zzbVD7euHnvsPk7xnzC+pK1W1lcGd/G/MwBva78hHgU8nObz9zt2W5M4kqwCSvC3J+a28Psn1rbwyyT8v2Avahwz9+feU9pHz9iS3A+8cansvcHlV/SpwBXDxUNuRwOnAW4GPAuuB5wK/kuTEJMcAfwa8pKpOAqaAC/b9y+nGWcAnq+obwENJTgI+C7ywtS9h8EcCAX4TuLGV/6Cqng9MAucnOZrB35R6ZZIntT7nAu8fw2voWlXdwyDTjm1VpwKrq+p04AfAq9rvzouBv0kSnniOJ4HD23kbPscHFUN//m37yHliVZ0I/PlQ26nAv7TyPzH4j7XNR2tw/eydwINVdWdV/RS4C1gOnMIgdD7f3kxWs4u/r6G9dg6DsKY9nsPgl/6F7S/BfhV4MMnxDM7jF1rf85N8GbiZwd3mK6rq+8D1wCva9wNPqqo7x/dSujb8p16uq6qHhur/KskdwH8weBM/DrgVeH6SnwV+CNzEIPxfyEEa+q51LazhmyR+2B5/OlTetr0I+AmD/8TnjGls3Wiz89OB5yUpBjcOFvA2Bp/AXsZgRngU8HvA96rqu0leBLwEOLWqHk3yGeCw9rTvA94BfA1n+WOR5JkMfk+2tqrvDzW/FpgAnl9VP07yLeCwofK5DN7I72DwSeAXgM1jGvpYOdMfry/w+JeEr2XwpeGeuhk4LcmzAJI8NckvzvP4evVqBstuz6iq5VW1DPgmg09iNwFvYRD6NwJ/wuMzwCOAh1vgP5vBpzEAquoWBjP/3weuHNsr6VSSCeAfgffW7HecHgFsbSH/Yp74KfmzDM7rtnP8R8DtO3meA56hP17nA+e2j5ivA968pztW1QzwBuDKtv/NgJcWzo9zgI9sV/chBoF9I7CoqrYAtzGY7W8L/U8Ci9r5eBeDczLsKuDzVfXwvhp457Z9f3YXgyWbTwN/sZO+VwCTSaYYTLi+NtR2I3A8cFNVPchg/f+gXNoB/wyDtM8k+Riwvqo2LfRYpG2c6UvzLMniJN9g8KW+ga/9ijN9SeqIM31J6oihL0kdMfQlqSOGviR1xNCXpI78HxsTuoenibfwAAAAAElFTkSuQmCC\n", 318 | "text/plain": [ 319 | "
" 320 | ] 321 | }, 322 | "metadata": { 323 | "needs_background": "light" 324 | }, 325 | "output_type": "display_data" 326 | } 327 | ], 328 | "source": [ 329 | "#Visualizando os dados\n", 330 | "\n", 331 | "x = np.arange(3)\n", 332 | "plt.bar(x, val)\n", 333 | "plt.xticks(x, ('Home', 'Away', 'Draw'))\n", 334 | "plt.show()\n" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 5, 340 | "metadata": {}, 341 | "outputs": [ 342 | { 343 | "data": { 344 | "text/html": [ 345 | "
\n", 346 | "\n", 359 | "\n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | "
game_idhome_idAway_idHGAGResPHPDPAMaxHMaxDMaxAAvgHAvgDAvgA
0121211101.753.865.251.763.875.311.693.504.90
1227271102.833.392.682.833.422.702.593.232.58
2313132111.604.046.721.674.057.221.593.675.64
34774212.493.353.152.493.393.152.353.262.84
45990121.963.534.411.963.534.411.893.333.89
\n", 473 | "
" 474 | ], 475 | "text/plain": [ 476 | " game_id home_id Away_id HG AG Res PH PD PA MaxH MaxD MaxA \\\n", 477 | "0 1 21 21 1 1 0 1.75 3.86 5.25 1.76 3.87 5.31 \n", 478 | "1 2 27 27 1 1 0 2.83 3.39 2.68 2.83 3.42 2.70 \n", 479 | "2 3 13 13 2 1 1 1.60 4.04 6.72 1.67 4.05 7.22 \n", 480 | "3 4 7 7 4 2 1 2.49 3.35 3.15 2.49 3.39 3.15 \n", 481 | "4 5 9 9 0 1 2 1.96 3.53 4.41 1.96 3.53 4.41 \n", 482 | "\n", 483 | " AvgH AvgD AvgA \n", 484 | "0 1.69 3.50 4.90 \n", 485 | "1 2.59 3.23 2.58 \n", 486 | "2 1.59 3.67 5.64 \n", 487 | "3 2.35 3.26 2.84 \n", 488 | "4 1.89 3.33 3.89 " 489 | ] 490 | }, 491 | "metadata": {}, 492 | "output_type": "display_data" 493 | } 494 | ], 495 | "source": [ 496 | "#Preparando os dados\n", 497 | "\n", 498 | "\n", 499 | "#Deixar somente as variáveis numericas \n", 500 | "num_data = data.drop(['Country','League','Season','Date','Time','Home','Away'],1)\n", 501 | "\n", 502 | "display(num_data.head())\n" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": 6, 508 | "metadata": {}, 509 | "outputs": [ 510 | { 511 | "name": "stdout", 512 | "output_type": "stream", 513 | "text": [ 514 | "Features\n", 515 | " game_id home_id Away_id HG AG PH PD PA MaxH MaxD MaxA \\\n", 516 | "0 1 21 21 1 1 1.75 3.86 5.25 1.76 3.87 5.31 \n", 517 | "1 2 27 27 1 1 2.83 3.39 2.68 2.83 3.42 2.70 \n", 518 | "2 3 13 13 2 1 1.60 4.04 6.72 1.67 4.05 7.22 \n", 519 | "3 4 7 7 4 2 2.49 3.35 3.15 2.49 3.39 3.15 \n", 520 | "4 5 9 9 0 1 1.96 3.53 4.41 1.96 3.53 4.41 \n", 521 | "\n", 522 | " AvgH AvgD AvgA \n", 523 | "0 1.69 3.50 4.90 \n", 524 | "1 2.59 3.23 2.58 \n", 525 | "2 1.59 3.67 5.64 \n", 526 | "3 2.35 3.26 2.84 \n", 527 | "4 1.89 3.33 3.89 \n", 528 | "=========\n", 529 | "Labels\n", 530 | "0 0\n", 531 | "1 0\n", 532 | "2 1\n", 533 | "3 1\n", 534 | "4 2\n", 535 | "Name: Res, dtype: int64\n" 536 | ] 537 | } 538 | ], 539 | "source": [ 540 | "#separa as features \n", 541 | "features = num_data.drop(['Res'],1)\n", 542 | "\n", 543 | "\n", 544 | "#separa as labels\n", 545 | "labels = num_data['Res']\n", 546 | "\n", 547 | "print('Features')\n", 548 | "print (features.head())\n", 549 | "\n", 550 | "print ('=========')\n", 551 | "\n", 552 | "print ('Labels')\n", 553 | "print (labels.head())" 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": 7, 559 | "metadata": {}, 560 | "outputs": [ 561 | { 562 | "name": "stdout", 563 | "output_type": "stream", 564 | "text": [ 565 | "\n", 566 | "Melhores features:\n", 567 | "{'PA': 740.4065621193932, 'MaxH': 731.7074700471329, 'MaxD': 81.51748572743963, 'AvgD': 80.67054047107476, 'AvgH': 63.290872018071454, 'MaxA': 40.62018755299711, 'AvgA': 40.54491193487282, 'AG': 0.6533077251586757, 'PD': 0.027363118243961364, 'PH': 0.027363118243961364}\n" 568 | ] 569 | } 570 | ], 571 | "source": [ 572 | "#Escoolhendo as melhores features com Kbest\n", 573 | "\n", 574 | "features_list = ('HG','AG','PH','PD','PA','MaxH','MaxD','MaxA','AvgH','AvgD','AvgA')\n", 575 | "\n", 576 | "k_best_features = SelectKBest(k='all')\n", 577 | "k_best_features.fit_transform(features, labels)\n", 578 | "k_best_features_scores = k_best_features.scores_\n", 579 | "raw_pairs = zip(features_list[1:], k_best_features_scores)\n", 580 | "ordered_pairs = list(reversed(sorted(raw_pairs, key=lambda x: x[1])))\n", 581 | "\n", 582 | "k_best_features_final = dict(ordered_pairs[:15])\n", 583 | "best_features = k_best_features_final.keys()\n", 584 | "print ('')\n", 585 | "print (\"Melhores features:\")\n", 586 | "print (k_best_features_final)" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": 8, 592 | "metadata": {}, 593 | "outputs": [ 594 | { 595 | "name": "stdout", 596 | "output_type": "stream", 597 | "text": [ 598 | "Features\n", 599 | " HG PA MaxH MaxD MaxA AvgH AvgD AvgA\n", 600 | "0 1 5.25 1.76 3.87 5.31 1.69 3.50 4.90\n", 601 | "1 1 2.68 2.83 3.42 2.70 2.59 3.23 2.58\n", 602 | "2 2 6.72 1.67 4.05 7.22 1.59 3.67 5.64\n", 603 | "3 4 3.15 2.49 3.39 3.15 2.35 3.26 2.84\n", 604 | "4 0 4.41 1.96 3.53 4.41 1.89 3.33 3.89\n", 605 | "=========\n", 606 | "Labels\n", 607 | "0 0\n", 608 | "1 0\n", 609 | "2 1\n", 610 | "3 1\n", 611 | "4 2\n", 612 | "Name: Res, dtype: int64\n" 613 | ] 614 | } 615 | ], 616 | "source": [ 617 | "#separa as features com base nas melhores features para treinamento\n", 618 | "features = num_data.drop(['Res','game_id','home_id','Away_id', 'AG','PD','PH'],1)\n", 619 | "\n", 620 | "\n", 621 | "#separa as labels para treinamento\n", 622 | "labels = num_data['Res']\n", 623 | "\n", 624 | "print('Features')\n", 625 | "print (features.head())\n", 626 | "\n", 627 | "print ('=========')\n", 628 | "\n", 629 | "print ('Labels')\n", 630 | "print (labels.head())\n" 631 | ] 632 | }, 633 | { 634 | "cell_type": "code", 635 | "execution_count": 9, 636 | "metadata": {}, 637 | "outputs": [ 638 | { 639 | "name": "stdout", 640 | "output_type": "stream", 641 | "text": [ 642 | "Features: (2279, 8)\n", 643 | "[[0.16666667 0.21853547 0.07060334 ... 0.08239095 0.15945946 0.26920093]\n", 644 | " [0.16666667 0.0715103 0.20795892 ... 0.22778675 0.08648649 0.08921645]\n", 645 | " [0.33333333 0.30263158 0.05905006 ... 0.06623586 0.20540541 0.32660978]\n", 646 | " ...\n", 647 | " [0.16666667 0.41647597 0.02695764 ... 0.03069467 0.50540541 0.48875097]\n", 648 | " [0.33333333 0.50171625 0.02439024 ... 0.02423263 0.51351351 0.60822343]\n", 649 | " [0.16666667 0.10526316 0.13863928 ... 0.15831987 0.13783784 0.14041893]]\n" 650 | ] 651 | } 652 | ], 653 | "source": [ 654 | "# Normalizando os dados de entrada(features)\n", 655 | "\n", 656 | "# Gerando o novo padrão\n", 657 | "scaler = MinMaxScaler().fit(features)\n", 658 | "features_scale = scaler.transform(features)\n", 659 | "\n", 660 | "print ('Features: ',features_scale.shape)\n", 661 | "print (features_scale)" 662 | ] 663 | }, 664 | { 665 | "cell_type": "code", 666 | "execution_count": 10, 667 | "metadata": {}, 668 | "outputs": [ 669 | { 670 | "name": "stdout", 671 | "output_type": "stream", 672 | "text": [ 673 | "1932 1932\n", 674 | "223 223\n" 675 | ] 676 | } 677 | ], 678 | "source": [ 679 | "#Separa em treinamento e teste\n", 680 | "#Separação manual para manter a ordem cronológica, uma vez que temos informação temporal. \n", 681 | "#Treino linhas [:1932]\n", 682 | "#Teste linhas [1932:2155]\n", 683 | "#previsão linhas [2155:2280]\n", 684 | "\n", 685 | "\n", 686 | "X_train = features_scale[:1932]\n", 687 | "X_test = features_scale[1932:2155]\n", 688 | "y_train = labels[:1932]\n", 689 | "y_test = labels[1932:2155]\n", 690 | "\n", 691 | "print( len(X_train), len(y_train))\n", 692 | "\n", 693 | "print( len(X_test), len(y_test))\n" 694 | ] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": 11, 699 | "metadata": {}, 700 | "outputs": [ 701 | { 702 | "name": "stdout", 703 | "output_type": "stream", 704 | "text": [ 705 | "LogisticRegression\n", 706 | "Acurácia LogisticRegression:0.57847533632287\n", 707 | "F1 Score:0.57847533632287\n" 708 | ] 709 | } 710 | ], 711 | "source": [ 712 | "#Treinando e testando os modelos\n", 713 | "print ('LogisticRegression')\n", 714 | "\n", 715 | "\n", 716 | "clf_LR = LogisticRegression(multi_class='multinomial',max_iter=2000)\n", 717 | "clf_LR.fit(X_train, y_train)\n", 718 | "pred= clf_LR.predict(X_test)\n", 719 | "\n", 720 | "lg_acc = accuracy_score(y_test, pred)\n", 721 | "f1=f1_score(y_test,pred,average = 'micro')\n", 722 | "print ('Acurácia LogisticRegression:{}'.format(lg_acc))\n", 723 | "print ('F1 Score:{}'.format(f1) )" 724 | ] 725 | }, 726 | { 727 | "cell_type": "code", 728 | "execution_count": 12, 729 | "metadata": {}, 730 | "outputs": [ 731 | { 732 | "name": "stdout", 733 | "output_type": "stream", 734 | "text": [ 735 | "Acurácia LogisticRegression:0.57847533632287\n", 736 | "F1 Score:0.5282488231236308\n", 737 | "LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,\n", 738 | " intercept_scaling=1, l1_ratio=None, max_iter=1000,\n", 739 | " multi_class='auto', n_jobs=None, penalty='l2',\n", 740 | " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n", 741 | " warm_start=False)\n" 742 | ] 743 | } 744 | ], 745 | "source": [ 746 | "#Testando LogistRegression hyper parameters\n", 747 | "\n", 748 | "param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000] }\n", 749 | "\n", 750 | "search = GridSearchCV(LogisticRegression(max_iter=1000), param_grid)\n", 751 | "\n", 752 | "search.fit(X_train,y_train)\n", 753 | "clf = search.best_estimator_\n", 754 | "pred= clf.predict(X_test)\n", 755 | "lg_acc = accuracy_score(y_test, pred)\n", 756 | "\n", 757 | "\n", 758 | "f1=f1_score(y_test,pred,average = 'macro')\n", 759 | "\n", 760 | "print ('Acurácia LogisticRegression:{}'.format(lg_acc))\n", 761 | "print ('F1 Score:{}'.format(f1) )\n", 762 | "\n", 763 | "print (clf)" 764 | ] 765 | }, 766 | { 767 | "cell_type": "code", 768 | "execution_count": 13, 769 | "metadata": {}, 770 | "outputs": [ 771 | { 772 | "name": "stdout", 773 | "output_type": "stream", 774 | "text": [ 775 | "SVC\n", 776 | "Acurácia SVC:0.57847533632287\n", 777 | "F1 Score:0.57847533632287\n" 778 | ] 779 | } 780 | ], 781 | "source": [ 782 | "#Treinando e testando os modelos\n", 783 | "print ('SVC')\n", 784 | "\n", 785 | "\n", 786 | "clf = SVC()\n", 787 | "clf.fit(X_train, y_train)\n", 788 | "pred= clf.predict(X_test)\n", 789 | "\n", 790 | "svc_acc = accuracy_score(y_test, pred)\n", 791 | "f1=f1_score(y_test,pred, average='micro')\n", 792 | "print ('Acurácia SVC:{}'.format(svc_acc))\n", 793 | "print ('F1 Score:{}'.format(f1) )" 794 | ] 795 | }, 796 | { 797 | "cell_type": "code", 798 | "execution_count": 14, 799 | "metadata": {}, 800 | "outputs": [ 801 | { 802 | "name": "stdout", 803 | "output_type": "stream", 804 | "text": [ 805 | "F1 Score:0.5919282511210763\n", 806 | "Acurácia LogisticRegression:0.5919282511210763\n", 807 | "SVC(C=100, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,\n", 808 | " decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',\n", 809 | " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", 810 | " tol=0.001, verbose=False)\n" 811 | ] 812 | } 813 | ], 814 | "source": [ 815 | "#Testando SVC hyper parameters\n", 816 | "\n", 817 | "param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000] }\n", 818 | "\n", 819 | "search = GridSearchCV(SVC(), param_grid)\n", 820 | "\n", 821 | "search.fit(X_train,y_train)\n", 822 | "clf_SVC = search.best_estimator_\n", 823 | "pred= clf_SVC.predict(X_test)\n", 824 | "acc = accuracy_score(y_test, pred)\n", 825 | "\n", 826 | "\n", 827 | "f1=f1_score(y_test,pred,average = 'micro')\n", 828 | "\n", 829 | "print ('F1 Score:{}'.format(f1))\n", 830 | "\n", 831 | "print ('Acurácia LogisticRegression:{}'.format(acc))\n", 832 | "\n", 833 | "print(clf_SVC)" 834 | ] 835 | }, 836 | { 837 | "cell_type": "code", 838 | "execution_count": 15, 839 | "metadata": {}, 840 | "outputs": [ 841 | { 842 | "name": "stdout", 843 | "output_type": "stream", 844 | "text": [ 845 | "Decision Tree\n", 846 | "Acurácia Tree:0.45739910313901344\n", 847 | "F1 Score:0.42806053301191915\n" 848 | ] 849 | } 850 | ], 851 | "source": [ 852 | "#Treinando e testando os modelos\n", 853 | "print ('Decision Tree')\n", 854 | "\n", 855 | "\n", 856 | "clf = DecisionTreeClassifier()\n", 857 | "clf.fit(X_train, y_train)\n", 858 | "pred= clf.predict(X_test)\n", 859 | "\n", 860 | "dt_acc = accuracy_score(y_test, pred)\n", 861 | "f1=f1_score(y_test,pred, average='macro')\n", 862 | "print ('Acurácia Tree:{}'.format(dt_acc))\n", 863 | "print ('F1 Score:{}'.format(f1) )\n", 864 | "\n", 865 | "\n", 866 | "n_estimators = [10, 50, 100, 200]\n", 867 | "max_depth = [3, 10, 20, 40]" 868 | ] 869 | }, 870 | { 871 | "cell_type": "code", 872 | "execution_count": 16, 873 | "metadata": {}, 874 | "outputs": [ 875 | { 876 | "name": "stdout", 877 | "output_type": "stream", 878 | "text": [ 879 | "Decision Tree\n", 880 | "Acurácia Decision Tree:0.47533632286995514\n", 881 | "F1 Score:0.47533632286995514\n", 882 | "DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',\n", 883 | " max_depth=80, max_features=3, max_leaf_nodes=None,\n", 884 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 885 | " min_samples_leaf=5, min_samples_split=12,\n", 886 | " min_weight_fraction_leaf=0.0, presort='deprecated',\n", 887 | " random_state=None, splitter='best')\n" 888 | ] 889 | } 890 | ], 891 | "source": [ 892 | "#Testando Decision tree hyper parameters\n", 893 | "print ('Decision Tree')\n", 894 | "\n", 895 | "param_grid = {\n", 896 | " 'max_depth': [80, 90, 100, 110],\n", 897 | " 'max_features': [2, 3],\n", 898 | " 'min_samples_leaf': [3, 4, 5],\n", 899 | " 'min_samples_split': [8, 10, 12]\n", 900 | " \n", 901 | " \n", 902 | "}\n", 903 | "\n", 904 | "search = GridSearchCV(DecisionTreeClassifier(), param_grid)\n", 905 | "\n", 906 | "search.fit(X_train,y_train)\n", 907 | "clf = search.best_estimator_\n", 908 | "pred= clf.predict(X_test)\n", 909 | "dt_acc = accuracy_score(y_test, pred)\n", 910 | "\n", 911 | "\n", 912 | "f1=f1_score(y_test,pred,average = 'micro')\n", 913 | "\n", 914 | "print ('Acurácia Decision Tree:{}'.format(dt_acc))\n", 915 | "print ('F1 Score:{}'.format(f1) )\n", 916 | "\n", 917 | "print (clf)" 918 | ] 919 | }, 920 | { 921 | "cell_type": "code", 922 | "execution_count": 17, 923 | "metadata": {}, 924 | "outputs": [ 925 | { 926 | "name": "stdout", 927 | "output_type": "stream", 928 | "text": [ 929 | "Naive baeys\n", 930 | "Acurácia Naive baeys:0.5201793721973094\n", 931 | "F1 Score:0.5201793721973094\n" 932 | ] 933 | } 934 | ], 935 | "source": [ 936 | "#Treinando e testando os modelos\n", 937 | "print ('Naive baeys')\n", 938 | "\n", 939 | "\n", 940 | "clf = GaussianNB()\n", 941 | "clf.fit(X_train, y_train)\n", 942 | "pred= clf.predict(X_test)\n", 943 | "\n", 944 | "nb_acc = accuracy_score(y_test, pred)\n", 945 | "f1=f1_score(y_test,pred, average='micro')\n", 946 | "print ('Acurácia Naive baeys:{}'.format(nb_acc))\n", 947 | "print ('F1 Score:{}'.format(f1) )" 948 | ] 949 | }, 950 | { 951 | "cell_type": "code", 952 | "execution_count": 23, 953 | "metadata": {}, 954 | "outputs": [ 955 | { 956 | "name": "stdout", 957 | "output_type": "stream", 958 | "text": [ 959 | " real previsao game_id\n", 960 | "2155 1 1 2156\n", 961 | "2156 1 1 2157\n", 962 | "2157 2 2 2158\n", 963 | "2158 1 1 2159\n", 964 | "2159 1 1 2160\n", 965 | "... ... ... ...\n", 966 | "2274 0 0 2275\n", 967 | "2275 0 0 2276\n", 968 | "2276 1 1 2277\n", 969 | "2277 1 1 2278\n", 970 | "2278 2 1 2279\n", 971 | "\n", 972 | "[124 rows x 3 columns]\n" 973 | ] 974 | } 975 | ], 976 | "source": [ 977 | "#Executando a previsao\n", 978 | "\n", 979 | "previsao=features_scale[2155:]\n", 980 | "\n", 981 | "game_id_full=data['game_id']\n", 982 | "game_id=game_id_full[2155:]\n", 983 | "\n", 984 | "res_full=data['Res']\n", 985 | "res=res_full[2155:]\n", 986 | "\n", 987 | "\n", 988 | "pred=clf_SVC.predict(previsao)\n", 989 | "\n", 990 | "df=pd.DataFrame({'real': res, 'previsao':pred, 'game_id':game_id})\n", 991 | "\n", 992 | "print(df)" 993 | ] 994 | }, 995 | { 996 | "cell_type": "code", 997 | "execution_count": 24, 998 | "metadata": {}, 999 | "outputs": [ 1000 | { 1001 | "data": { 1002 | "text/plain": [ 1003 | "" 1004 | ] 1005 | }, 1006 | "execution_count": 24, 1007 | "metadata": {}, 1008 | "output_type": "execute_result" 1009 | }, 1010 | { 1011 | "data": { 1012 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWgAAAEGCAYAAABIGw//AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAYbUlEQVR4nO3deZhU5Zn+8e/dTTNIQASVFmWRKCouqHGPjls0P00kopFofk6CEe04CYlLFrcrjppEcU80cQhxw2gUNXFUdIgOSiBGEXDBhUTcJSK4oSwi0DzzRx2dlkBXNV3V5+3q++N1rqo6VXXqqbram7eec95TigjMzCw9NXkXYGZma+aANjNLlAPazCxRDmgzs0Q5oM3MEtUp7wLW5vV3P/LhJRU2ZtqreZdQ9U7eZ2DeJXQIvbvXqbXbWG/nUSVnzodP/KrVr1cKj6DNzBKV7AjazKxNKb3xqgPazAygpjbvCv6JA9rMDEBt0lZuEQe0mRm4xWFmliyPoM3MEuURtJlZojyCNjNLlI/iMDNLlFscZmaJcovDzCxRHkGbmSXKAW1mlqha7yQ0M0uTe9BmZolyi8PMLFEeQZuZJcojaDOzRHkEbWaWqDJO9Zb0CrAIaARWRsSuknoB44HNgVeAr0XEe82WVLaKzMzaM9WUvpTmgIjYKSJ2zW6fAUyKiEHApOx2sxzQZmZQaHGUuqybw4Fx2fVxwLBiT3BAm5lBuUfQAdwvaaakhmxdfUTMA8guexfbiHvQZmbQoqM4stBtaLJqbESMbXJ774h4Q1Jv4AFJf1uXkhzQZmbQop2EWRiPbeb+N7LLBZLuBHYH5kvqExHzJPUBFhQtqeSKzMyqWZl60JI+I6n7x9eBLwLPAHcDI7KHjQDuKlaSR9BmZlDOiSr1wJ0qBHkn4PcRMVHSdOA2SSOB14DhxTbkgDYzg7JNVImIl4Ad17D+HeALLdmWA9rMDJBnEpqZpckBbWaWKNU4oKvS4kUfcNmF5/LKiy8giR+efT7b7vBPLShrgaXvvcX0m69g2QfvoRoxcK9DGLTfVwB4Yco9vDD1Xmpqa9hk290Y8pVv5VxtdWlsbOTEbxzNRr17c/Evrs67nDbjEXSV+vUVF7HbnnvzHxdczooVK/ho2Yd5l9TuqaaWIYcfT89+W7Ji2VImXXYq9VvvxLJFC3njmWkcfPpV1HaqY9mihXmXWnVuv+UmBgz8LEuWLM67lDaVYkD7OOhWWrJkMU8/OZNDhx4JQF1dHd26r59zVe3fej160bPflgDUdelK9/p+fPj+O7z08H1s/YWjqO1UB0CX7hvkWWbVWTD/TR55eAqHDftq3qW0OUklL22lYiNoSdtQODnIZhTmpb8B3B0Rsyv1mnmY94+59NigF5f87Ce8OOd5ttpmMN859XTWW69r3qVVjSXvzGfh3BfpNWBrZt11PW+/9CzP3Ps7auvqGHL48fTqv1XeJVaNKy+7iO98/zSWLlmSdyltL70BdGVG0JJOB26l8JYfA6Zn12+RVPQUe+1JY2Mjc56fzdAjv8ZvbryNLuutx603Xpd3WVVj5Ucf8sj1F7LTESdS16UrsaqRFUsXc+CplzLkK8fz6A0XERF5l1kVHp46mZ69erH14O3yLiUXKY6gK9XiGAnsFhGjI+KmbBlNYT76yLU9SVKDpBmSZtw87poKlVZeG/euZ+ON6xm83RAA9j3gYOY8X1VfEnKzqnElj1x3If132Z/Ndvw8AOttsBGbDvk8kug1YCukGpYv+SDnSqvD0089wcNTJjN86Bc59+wf8fj0xzj/J6fnXVabqampKXlpK5VqcawCNgVeXW19n+y+NWp6ApLX3/2oXQyLem24ERvX1/P6qy/Tb8BAHp8xjQGbfzbvstq9iGDGLVfSvb4fWx3wf6fN3XSHPXlrzlP0HrQDixb8g1WNK+n8Gff8y+GkUady0qhTAXhixmPcctMNnPPTi3Kuqu2kuJOwUgF9CjBJ0hzg9Wxdf2BLYFSFXjM3o047kwvPPZMVK1bQZ7O+/Ojsn+ZdUrv3zsvP8dqMh+jRZ3MeuPj7AGx/2DcZuMdBzLjlSu4f/V1qOnVit/9/SpL/Y1k7lOCfkSrVv5NUQ6GlsRmFtz4XmB4RjaU8v72MoNuzMdNW/4Jj5XbyPgPzLqFD6N29rtXxutFxt5acOW/fcEybxHnFjuKIiFXAo5XavplZOaX4TcwTVczM8FRvM7NkeQRtZpYoB7SZWaIc0GZmiXJAm5mlKr18dkCbmQFtOoW7VA5oMzPc4jAzS1d6+eyANjMDj6DNzJLlgDYzS5QD2swsUT4Xh5lZojyCNjNLlAPazCxRCeazA9rMDNIcQac3t9HMLAc1NSp5KYWkWklPSJqQ3R4oaZqkOZLGS+pctKZWviczs6oglb6U6GRgdpPbFwFXRMQg4D1gZLENOKDNzCjvCFpSX+DLwDXZbQEHAndkDxkHDCta0zq/GzOzKtKSEbSkBkkzmiwNq23uF8CPgVXZ7Q2BhRGxMrs9F9isWE3eSWhmRst2EkbEWGDsWrZzGLAgImZK2v/j1WvaTLHXcUCbmVHWw+z2Br4i6UtAF2B9CiPqDSR1ykbRfYE3im3ILQ4zMwon7C91aU5EnBkRfSNic+AY4MGIOBZ4CDgqe9gI4K6iNbXuLZmZVYcKHMWxutOB0yS9QKEnfW2xJ7jFYWZGZSaqRMRkYHJ2/SVg95Y83wFtZoanepuZJSvFqd4OaDMzPII2M0tWqefYaEvJBnSvbkXPI2KtdPlZV+ZdQtU7a9pVeZdgJXKLw8wsUQnmswPazAw8gjYzS1aC+eyANjMD7yQ0M0uWWxxmZolyQJuZJSrBfHZAm5mBR9BmZslKMJ8d0GZm4KM4zMySVZPgENoBbWaGWxxmZsnyTkIzs0Ql2IJ2QJuZgXcSmpklSzigzcySlOAA2gFtZgbeSWhmlqwE89kBbWYGnqhiZpYsH8VhZpaoBAfQDmgzM3CLw8wsWenFswPazAxI8zC7mrwLMDNLQY1KX5ojqYukxyQ9JelZSedl6wdKmiZpjqTxkjoXrak8b83MrH2rqVHJSxEfAQdGxI7ATsAhkvYELgKuiIhBwHvAyKI1tfI9mZlVBUklL82JgsXZzbpsCeBA4I5s/ThgWLGaHNBmZrSsxSGpQdKMJktD021JqpX0JLAAeAB4EVgYESuzh8wFNitWk3cSmpnRsp2EETEWGNvM/Y3ATpI2AO4EBq/pYcVep9mAlnRkkSL/WOwFzMzag0ocwxERCyVNBvYENpDUKRtF9wXeKPb8YiPooc29NuCANrOqUFumqd6SNgZWZOG8HnAQhR2EDwFHAbcCI4C7im2r2YCOiG+1vtzqd+5PzmLqlMn06rUht995T97lVJW/3Xsei5Z8ROOqVaxsXMU+x14MwL8fsx8nHb0vKxtXMXHqM5z9y6J/61ZER/87LuNx0H2AcZJqKeznuy0iJkh6DrhV0s+AJ4Bri22o5B60pC8D2wFdPl4XEee3tPJqNPTwIzj668dyztln5F1KVTqk4Ze8s3DJJ7f33XUQh+2/A7t97UKWr1jJxj275Vhd9ejof8flyueImAXsvIb1LwG7t2RbJR3FIWkMcDTwPQqtmuHAgJa8UDXbZdfd6NGjR95ldBgNw/+VS69/gOUrCjvE33pvcZFnWCk6+t9xjVTy0mY1lfi4z0fEN4H3IuI8YC+g37q8oCS3TaxkEcE9V4/i4Zt/zPFH7g3AlgN6s/fOWzDlxh9y/zUns8u2/XOu0qqBVPrSVkptcXyYXS6VtCnwDjBwHV/zPOD6Nd2RHUvYAHDlr8dw/AkNa3qYdSAHfusK5r31Phv37MaEMaP4+ytv0qm2hp7rd2Xfb17KrtsN4KaLj2fwYefmXaq1cymei6PUgJ6QHc93CfA4hSM4rlnbgyXNWttdQP3antf02MIly6PoMYJW/ea99T5QaGPc/eAsdttuc/4xfyH/NekpAGY8+yqrVgUb9ezG2251WCvUtteAjoifZlf/IGkC0CUi3m/mKfXA/6Mw37wpAX9tcZXWIXXt0pmaGrF46Ud07dKZg/bahgvG/jeLP/yI/Xffiqkz57Bl/950ruvkcLZWS/AHVUoLaEldgR8A/SPiREn9Jf1rRExYy1MmAN0i4sk1bGvyOlebqDN/fBozp09n4cL3OOQL+3HSd7/HsCOPyrusdq/3ht0Zf/mJAHSqrWX8f8/ggb/Opq5TLb8591hm3H4Wy1c0csI5v8u50urQ0f+OUwxoRQmdBEnjgZnANyNi++zg60ciYqdKFeYWR+VttMf38i6h6r097aq8S+gQPtO59f2JH9zz95Iz57KhW7dJnJd6FMcWEXExsAIgIj4kzR8gMDNbJ+U6H3Q5lbqTcHk2ag4ASVtQOOepmVlVSHAfYfGAVuHYkzHARKCfpJuBvYHjKluamVnb6ZRgQhcN6IgISScDX6RwRiYBJ0fE25UuzsysrSSYzyW3OB4FPhsR91ayGDOzvLTlFO5SlRrQBwDflvQqsITCKDoiYkjFKjMza0MJ5nPJAX1oRaswM8tZisdBlzqT8NVKF2JmlqdynbC/nPybhGZmtOMRtJlZtVOCc+8c0GZmeARtZpYsB7SZWaLa8wn7zcyqWm2pp45rQw5oMzPa90xCM7Oq5h60mVmiEhxAO6DNzABqfBy0mVmaPII2M0tUpwSb0A5oMzM8gjYzS5YPszMzS1SC+UyCc2fMzNpeTQuW5kjqJ+khSbMlPZv9piuSekl6QNKc7LJnKTWZmXV4NVLJSxErgR9ExGAKP7T9XUnbAmcAkyJiEDApu918Ta18T2ZmVaFcAR0R8yLi8ez6ImA2sBlwODAue9g4YFjRmlr1jszMqoRaskgNkmY0WRrWuE1pc2BnYBpQHxHzoBDiQO9iNXknoZkZLdtJGBFjgbHNb0/dgD8Ap0TEB+tyOlMHtJkZ5T0ftKQ6CuF8c0T8MVs9X1KfiJgnqQ+woNh23OIwM6OsR3EIuBaYHRGXN7nrbmBEdn0EcFexmjyCNjOjrBNV9ga+ATwt6cls3VnAaOA2SSOB14DhxTakiChXUWW1bCVpFlZFJj73Zt4lVL3+PbrmXUKH8LkB67c6Xe94al7JmXPUjn3aZFqLR9BmZqTZ73VAm5nhH401M0tWevHsgDYzA6DWI2gzszQlmM8OaDMzACXY5HBAm5nhEbSZWbL8q95mZonyCNrMLFH+TUIzs0TVpJfPDmgzM/BRHGZmyUqww+GANjMDj6DNzJLlHrSZWaJ8FIeZWaLSi2cHtJkZ4BG0mVmy0otnB7SZWUGCCe2ANjPDLQ4zs2SlF88OaDOzggQT2gFtZoZnEpqZJSvBFrQD2swMkuxwOKDNzACU4BDaAW1mhlscZmbJSjCfHdBmZkCSCV2TdwFmZilQC/4rui3pOkkLJD3TZF0vSQ9ImpNd9iy2HY+gy+DhqVO4aPTPWdW4iiO+OpyRJzbkXVJVuP3q0cye+QjdevTktMtvAOCNl+fwx99ezsrly6mpreWIE06l36DB+Rbajo257HyeePQvrL9BTy757XgAbh77Sx5/dCq1dXXU9+nLST88h890655zpZVX5h70DcCvgBubrDsDmBQRoyWdkd0+vbmNeATdSo2NjVzw8/O5esw13Hn3vUy8bwIvvvBC3mVVhV32P5SRZ1/yqXX33TSGg4aP4JRLr+WLRx/PfTeNyam66rDfwYdxxgVXfmrdDp/bg4t/eysX/+YW+vTtz1233pBPcW1MKn0pJiKmAO+utvpwYFx2fRwwrNh2HNCt9MzTs+jXbwB9+/WjrnNnDvnSl5n80KS8y6oKn912R9ZbfeQm8dHSpQAsW7qY9XtumENl1WPwkM/Rrfv6n1o3ZNc9qa0tfLketM32vPvW/DxKa3MtaXFIapA0o8lSytfm+oiYB5Bd9i72hIq1OCRtA2wGTIuIxU3WHxIREyv1um1twfz5bNJnk09u966v5+lZs3KsqLoNPW4U1/7sR9z7u6uJVcF3fv7rvEuqapP/dDd77ndw3mW0iZa0OCJiLDC2YsVkKjKClvR94C7ge8Azkg5vcvcFlXjNvATxT+tSPOC9Wjx6/10MPW4UZ425g8OO+y53/OfFeZdUte78/XXU1HZiny8cmncpbUItWNbRfEl9ALLLBcWeUKkWx4nALhExDNgf+Imkk7P71vr+mn5tuPa3Ff/HqSzq6zfhzXlvfnJ7wfz59O5d9JuLraOZk//E9nvsC8CQvQ7g9Rdm51xRdfrz/RN4YtpfGHXGTzvOgKPyCX03MCK7PoLCILZZlWpx1H7c1oiIVyTtD9whaQDNvL2mXxuWrVzD0DRB222/A6+99gpz575Ofe96Jt53LxdeclneZVWt9XttyEvPPckW2+3Mi888zkab9M27pKrz5PS/cs9tN3LOpb/hX7p0ybucNlPOE/ZLuoXC4HQjSXOB/wBGA7dJGgm8Bgwvup2I8uegpAeB0yLiySbrOgHXAcdGRG2xbbSXgAaYOuXPXDz6AlatamTYEV/lxG//e94llWTic28Wf1COfv+L83jp2SdZsuh9uvXoxcFf+xYbb9qPe66/ilWrGulU15lhJ5xK3y22zrvUterfo2veJTTrygvOZvasmSx6fyE9em7IUd9o4K7xN7Bi+XK6r98DgC0H78AJJ5+Zc6XN+9yA9Vudrs+/ubTkzNlqk65t8rWiUgHdF1gZEf+UAJL2joiHi22jPQV0e5V6QFeD1AO6WpQloOe3IKDr2yagK9LiiIi5zdxXNJzNzNqaT9hvZpaoFPeFOqDNzEjyXEkOaDMzSHP+ggPazAy3OMzMkpVgPjugzcyAJBPaAW1mhg+zMzNLlnvQZmaJqnFAm5mlKr2EdkCbmeEWh5lZshLMZwe0mRl4BG1mlixP9TYzS1R68eyANjMD3OIwM0uWZxKamaUqvXx2QJuZQZL57IA2MwOoSbAJ7YA2MyPNnYQ1eRdgZmZr5hG0mRlpjqAd0GZm+DA7M7NkeQRtZpYoB7SZWaLc4jAzS1SKI2gfZmdmRmEmYalL0W1Jh0j6u6QXJJ2xrjU5oM3MoGwJLakW+DVwKLAt8HVJ265LSW5xmJlR1qneuwMvRMRLAJJuBQ4HnmvphpIN6C6dEuzYFyGpISLG5l1HqYYN2STvElqsvX3G7VFH/YxbkjmSGoCGJqvGNvnMNgNeb3LfXGCPdanJLY7yaij+EGslf8aV58+4iIgYGxG7Nlma/oO2pqCPdXkdB7SZWXnNBfo1ud0XeGNdNuSANjMrr+nAIEkDJXUGjgHuXpcNJduDbqc6XN8uB/6MK8+fcStExEpJo4A/AbXAdRHx7LpsSxHr1BoxM7MKc4vDzCxRDmgzs0Q5oMugXNM6be0kXSdpgaRn8q6lWknqJ+khSbMlPSvp5Lxr6ujcg26lbFrn88DBFA6vmQ58PSJaPGvI1k7SvsBi4MaI2D7veqqRpD5An4h4XFJ3YCYwzH/L+fEIuvU+mdYZEcuBj6d1WhlFxBTg3bzrqGYRMS8iHs+uLwJmU5gVZzlxQLfemqZ1+o/a2jVJmwM7A9PyraRjc0C3XtmmdZqlQFI34A/AKRHxQd71dGQO6NYr27ROs7xJqqMQzjdHxB/zrqejc0C3XtmmdZrlSZKAa4HZEXF53vWYA7rVImIl8PG0ztnAbes6rdPWTtItwCPA1pLmShqZd01VaG/gG8CBkp7Mli/lXVRH5sPszMwS5RG0mVmiHNBmZolyQJuZJcoBbWaWKAe0mVmiHNDWLkk6X9JBeddhVkk+zM5yJ6k2IhrzrsMsNR5BW0VJ2lzS3ySNkzRL0h2Sukp6RdI5kv4CDJe0haSJkmZKmippG0k9ssfVZNvqKul1SXWSbpB0VLZ+tKTnsu1fmq0bKmmapCck/Y+k+mx9L0n/lT32UUlDcvtwzIrwj8ZaW9gaGBkRD0u6DvhOtn5ZROwDIGkScFJEzJG0B3B1RBwo6SlgP+AhYCjwp4hYUZiVXAhc4Ahgm4gISRtk2/4LsGe27gTgx8APgPOAJyJimKQDgRuBnSr/EZi1nAPa2sLrEfFwdv0m4PvZ9fHwydnTPg/c/nHwAv/S5DFHUwjoY4CrV9v2B8Ay4BpJ9wITsvV9gfHZSeg7Ay9n6/cBvgoQEQ9K2lBSj4h4vxxv1Kyc3OKwtrD6jo6Pby/JLmuAhRGxU5NlcHbf3cCh2Uh5F+DBT22ocC6U3SmcgW0YMDG76yrgVxGxA/BtoEu23qeHtXbDAW1tob+kvbLrX6fQfvhEds7hlyUNh8JZ1STtmN23GHgM+CUwYfWdidnou0dE3Aecwv+1K3oA/8iuj2jylCnAsdlz9wfe9jmPLVUOaGsLs4ERkmYBvYD/XMNjjgVGZj3nZ/n0z4aNB/4tu1xdd2BCtu0/A6dm68+l0DKZCrzd5PHnArtmjx/Np8PbLCk+zM4qKvvppAn+oVezlvMI2swsUR5Bm5klyiNoM7NEOaDNzBLlgDYzS5QD2swsUQ5oM7NE/S9c90U0gg6p+gAAAABJRU5ErkJggg==\n", 1013 | "text/plain": [ 1014 | "
" 1015 | ] 1016 | }, 1017 | "metadata": { 1018 | "needs_background": "light" 1019 | }, 1020 | "output_type": "display_data" 1021 | } 1022 | ], 1023 | "source": [ 1024 | "#confusion Matrix\n", 1025 | "\n", 1026 | "df=pd.DataFrame(df,columns=['real','previsao' ])\n", 1027 | "\n", 1028 | "cf_matrix=pd.crosstab(df['real'], df['previsao'], rownames=['real'] , colnames=['previsao'])\n", 1029 | "\n", 1030 | "sns.heatmap(cf_matrix, annot=True, cmap='Blues')" 1031 | ] 1032 | }, 1033 | { 1034 | "cell_type": "code", 1035 | "execution_count": null, 1036 | "metadata": {}, 1037 | "outputs": [], 1038 | "source": [] 1039 | } 1040 | ], 1041 | "metadata": { 1042 | "kernelspec": { 1043 | "display_name": "Python 3", 1044 | "language": "python", 1045 | "name": "python3" 1046 | }, 1047 | "language_info": { 1048 | "codemirror_mode": { 1049 | "name": "ipython", 1050 | "version": 3 1051 | }, 1052 | "file_extension": ".py", 1053 | "mimetype": "text/x-python", 1054 | "name": "python", 1055 | "nbconvert_exporter": "python", 1056 | "pygments_lexer": "ipython3", 1057 | "version": "3.7.6" 1058 | } 1059 | }, 1060 | "nbformat": 4, 1061 | "nbformat_minor": 4 1062 | } 1063 | -------------------------------------------------------------------------------- /Previsão preço ações.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#importando as bibliotecas\n", 10 | "import pandas as pd\n", 11 | "from sklearn.feature_selection import SelectKBest\n", 12 | "from sklearn.model_selection import GridSearchCV\n", 13 | "from sklearn.neural_network import MLPRegressor\n", 14 | "from sklearn.preprocessing import MinMaxScaler\n", 15 | "from sklearn import datasets, linear_model\n", 16 | "from sklearn.metrics import mean_squared_error, r2_score\n", 17 | "import matplotlib.pyplot as plt\n", 18 | "%matplotlib inline" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "
\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | "
data_pregaosigla_acaonome_acaopreco_aberturapreco_maxpreco_minimopreco_fechamentoqtd_negociosvolume_negocios
02020-01-02AALR3ALLIAR18.2919.0018.2819.00585800.01.094620e+09
12020-01-02AAPL34APPLE120.00121.34120.00121.3412700.01.533380e+08
22020-01-02ABCB4ABC BRASIL20.0020.3019.8220.30870400.01.745788e+09
32020-01-02ABEV3AMBEV S/A18.8619.2518.7819.2016011300.03.062348e+10
42020-01-02ADHM3ADVANCED-DH2.552.552.352.48193400.04.719270e+07
..............................
1304512021-01-13Z1TO34ZTO EXPRESS39.7439.7439.7439.7479.03.139460e+05
1304522021-01-14Z1TO34ZTO EXPRESS38.9638.9638.8838.88919.03.574432e+06
1304532021-01-15Z1TO34ZTO EXPRESS39.0339.0339.0339.03930.03.629790e+06
1304542021-01-11Z1TS34ZOETIS INC233.41233.41233.41233.41400.09.336400e+06
1304552021-01-12Z1TS34ZOETIS INC234.00234.00231.19231.1934.07.867520e+05
\n", 193 | "

130456 rows × 9 columns

\n", 194 | "
" 195 | ], 196 | "text/plain": [ 197 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n", 198 | "0 2020-01-02 AALR3 ALLIAR 18.29 19.00 \n", 199 | "1 2020-01-02 AAPL34 APPLE 120.00 121.34 \n", 200 | "2 2020-01-02 ABCB4 ABC BRASIL 20.00 20.30 \n", 201 | "3 2020-01-02 ABEV3 AMBEV S/A 18.86 19.25 \n", 202 | "4 2020-01-02 ADHM3 ADVANCED-DH 2.55 2.55 \n", 203 | "... ... ... ... ... ... \n", 204 | "130451 2021-01-13 Z1TO34 ZTO EXPRESS 39.74 39.74 \n", 205 | "130452 2021-01-14 Z1TO34 ZTO EXPRESS 38.96 38.96 \n", 206 | "130453 2021-01-15 Z1TO34 ZTO EXPRESS 39.03 39.03 \n", 207 | "130454 2021-01-11 Z1TS34 ZOETIS INC 233.41 233.41 \n", 208 | "130455 2021-01-12 Z1TS34 ZOETIS INC 234.00 234.00 \n", 209 | "\n", 210 | " preco_minimo preco_fechamento qtd_negocios volume_negocios \n", 211 | "0 18.28 19.00 585800.0 1.094620e+09 \n", 212 | "1 120.00 121.34 12700.0 1.533380e+08 \n", 213 | "2 19.82 20.30 870400.0 1.745788e+09 \n", 214 | "3 18.78 19.20 16011300.0 3.062348e+10 \n", 215 | "4 2.35 2.48 193400.0 4.719270e+07 \n", 216 | "... ... ... ... ... \n", 217 | "130451 39.74 39.74 79.0 3.139460e+05 \n", 218 | "130452 38.88 38.88 919.0 3.574432e+06 \n", 219 | "130453 39.03 39.03 930.0 3.629790e+06 \n", 220 | "130454 233.41 233.41 400.0 9.336400e+06 \n", 221 | "130455 231.19 231.19 34.0 7.867520e+05 \n", 222 | "\n", 223 | "[130456 rows x 9 columns]" 224 | ] 225 | }, 226 | "execution_count": 2, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "#lendo o arquivo de ações\n", 233 | "#Lendo csv\n", 234 | "\n", 235 | "df = pd.read_csv(\"D:\\\\acoes\\\\all_bovespa.csv\", delimiter=';')\n", 236 | "df" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 3, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "#Itau\n", 246 | "df_itau = df[df['sigla_acao'] == 'ITUB4' ]" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 4, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "data": { 256 | "text/html": [ 257 | "
\n", 258 | "\n", 271 | "\n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | "
data_pregaosigla_acaonome_acaopreco_aberturapreco_maxpreco_minimopreco_fechamentoqtd_negociosvolume_negocios
1227382021-01-04ITUB4ITAUUNIBANCO31.9831.9930.8430.9031347800.09.744949e+10
1238162021-01-05ITUB4ITAUUNIBANCO30.7331.0630.1730.7028249800.08.648705e+10
1253312021-01-06ITUB4ITAUUNIBANCO30.9832.0630.7931.5543061900.01.365655e+11
1253322021-01-07ITUB4ITAUUNIBANCO31.6533.0631.5032.8346129800.01.502078e+11
1253332021-01-08ITUB4ITAUUNIBANCO32.9333.4432.4332.8252532500.01.721607e+11
1286732021-01-11ITUB4ITAUUNIBANCO32.4732.7031.7232.0831564300.01.016568e+11
1286742021-01-12ITUB4ITAUUNIBANCO32.1832.4532.0232.1833089300.01.066563e+11
1286752021-01-13ITUB4ITAUUNIBANCO32.1832.5331.2631.6542722600.01.354698e+11
1286762021-01-14ITUB4ITAUUNIBANCO32.0232.9431.6732.5928222500.09.166648e+10
1286772021-01-15ITUB4ITAUUNIBANCO32.0532.3931.3531.3637757100.01.193353e+11
\n", 409 | "
" 410 | ], 411 | "text/plain": [ 412 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n", 413 | "122738 2021-01-04 ITUB4 ITAUUNIBANCO 31.98 31.99 \n", 414 | "123816 2021-01-05 ITUB4 ITAUUNIBANCO 30.73 31.06 \n", 415 | "125331 2021-01-06 ITUB4 ITAUUNIBANCO 30.98 32.06 \n", 416 | "125332 2021-01-07 ITUB4 ITAUUNIBANCO 31.65 33.06 \n", 417 | "125333 2021-01-08 ITUB4 ITAUUNIBANCO 32.93 33.44 \n", 418 | "128673 2021-01-11 ITUB4 ITAUUNIBANCO 32.47 32.70 \n", 419 | "128674 2021-01-12 ITUB4 ITAUUNIBANCO 32.18 32.45 \n", 420 | "128675 2021-01-13 ITUB4 ITAUUNIBANCO 32.18 32.53 \n", 421 | "128676 2021-01-14 ITUB4 ITAUUNIBANCO 32.02 32.94 \n", 422 | "128677 2021-01-15 ITUB4 ITAUUNIBANCO 32.05 32.39 \n", 423 | "\n", 424 | " preco_minimo preco_fechamento qtd_negocios volume_negocios \n", 425 | "122738 30.84 30.90 31347800.0 9.744949e+10 \n", 426 | "123816 30.17 30.70 28249800.0 8.648705e+10 \n", 427 | "125331 30.79 31.55 43061900.0 1.365655e+11 \n", 428 | "125332 31.50 32.83 46129800.0 1.502078e+11 \n", 429 | "125333 32.43 32.82 52532500.0 1.721607e+11 \n", 430 | "128673 31.72 32.08 31564300.0 1.016568e+11 \n", 431 | "128674 32.02 32.18 33089300.0 1.066563e+11 \n", 432 | "128675 31.26 31.65 42722600.0 1.354698e+11 \n", 433 | "128676 31.67 32.59 28222500.0 9.166648e+10 \n", 434 | "128677 31.35 31.36 37757100.0 1.193353e+11 " 435 | ] 436 | }, 437 | "execution_count": 4, 438 | "metadata": {}, 439 | "output_type": "execute_result" 440 | } 441 | ], 442 | "source": [ 443 | "df_itau.tail(10)" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": 5, 449 | "metadata": {}, 450 | "outputs": [ 451 | { 452 | "data": { 453 | "text/plain": [ 454 | "data_pregao object\n", 455 | "sigla_acao object\n", 456 | "nome_acao object\n", 457 | "preco_abertura float64\n", 458 | "preco_max float64\n", 459 | "preco_minimo float64\n", 460 | "preco_fechamento float64\n", 461 | "qtd_negocios float64\n", 462 | "volume_negocios float64\n", 463 | "dtype: object" 464 | ] 465 | }, 466 | "execution_count": 5, 467 | "metadata": {}, 468 | "output_type": "execute_result" 469 | } 470 | ], 471 | "source": [ 472 | "#verificar o tipo do arquivo\n", 473 | "df_itau.dtypes" 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": 6, 479 | "metadata": {}, 480 | "outputs": [ 481 | { 482 | "name": "stderr", 483 | "output_type": "stream", 484 | "text": [ 485 | "C:\\Users\\fabri\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", 486 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 487 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 488 | "\n", 489 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 490 | " \n" 491 | ] 492 | } 493 | ], 494 | "source": [ 495 | "#Mudar o tipo data\n", 496 | "df_itau['data_pregao'] = pd.to_datetime(df_itau['data_pregao'], format='%Y-%m-%d')\n" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 7, 502 | "metadata": {}, 503 | "outputs": [ 504 | { 505 | "data": { 506 | "text/plain": [ 507 | "data_pregao datetime64[ns]\n", 508 | "sigla_acao object\n", 509 | "nome_acao object\n", 510 | "preco_abertura float64\n", 511 | "preco_max float64\n", 512 | "preco_minimo float64\n", 513 | "preco_fechamento float64\n", 514 | "qtd_negocios float64\n", 515 | "volume_negocios float64\n", 516 | "dtype: object" 517 | ] 518 | }, 519 | "execution_count": 7, 520 | "metadata": {}, 521 | "output_type": "execute_result" 522 | } 523 | ], 524 | "source": [ 525 | "df_itau.dtypes" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 8, 531 | "metadata": {}, 532 | "outputs": [ 533 | { 534 | "data": { 535 | "text/html": [ 536 | "
\n", 537 | "\n", 550 | "\n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | "
data_pregaosigla_acaonome_acaopreco_aberturapreco_maxpreco_minimopreco_fechamentoqtd_negociosvolume_negocios
1286732021-01-11ITUB4ITAUUNIBANCO32.4732.7031.7232.0831564300.01.016568e+11
1286742021-01-12ITUB4ITAUUNIBANCO32.1832.4532.0232.1833089300.01.066563e+11
1286752021-01-13ITUB4ITAUUNIBANCO32.1832.5331.2631.6542722600.01.354698e+11
1286762021-01-14ITUB4ITAUUNIBANCO32.0232.9431.6732.5928222500.09.166648e+10
1286772021-01-15ITUB4ITAUUNIBANCO32.0532.3931.3531.3637757100.01.193353e+11
\n", 628 | "
" 629 | ], 630 | "text/plain": [ 631 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n", 632 | "128673 2021-01-11 ITUB4 ITAUUNIBANCO 32.47 32.70 \n", 633 | "128674 2021-01-12 ITUB4 ITAUUNIBANCO 32.18 32.45 \n", 634 | "128675 2021-01-13 ITUB4 ITAUUNIBANCO 32.18 32.53 \n", 635 | "128676 2021-01-14 ITUB4 ITAUUNIBANCO 32.02 32.94 \n", 636 | "128677 2021-01-15 ITUB4 ITAUUNIBANCO 32.05 32.39 \n", 637 | "\n", 638 | " preco_minimo preco_fechamento qtd_negocios volume_negocios \n", 639 | "128673 31.72 32.08 31564300.0 1.016568e+11 \n", 640 | "128674 32.02 32.18 33089300.0 1.066563e+11 \n", 641 | "128675 31.26 31.65 42722600.0 1.354698e+11 \n", 642 | "128676 31.67 32.59 28222500.0 9.166648e+10 \n", 643 | "128677 31.35 31.36 37757100.0 1.193353e+11 " 644 | ] 645 | }, 646 | "execution_count": 8, 647 | "metadata": {}, 648 | "output_type": "execute_result" 649 | } 650 | ], 651 | "source": [ 652 | "df_itau.tail()" 653 | ] 654 | }, 655 | { 656 | "cell_type": "code", 657 | "execution_count": 9, 658 | "metadata": {}, 659 | "outputs": [ 660 | { 661 | "name": "stderr", 662 | "output_type": "stream", 663 | "text": [ 664 | "C:\\Users\\fabri\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", 665 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 666 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 667 | "\n", 668 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 669 | " \n", 670 | "C:\\Users\\fabri\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: SettingWithCopyWarning: \n", 671 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 672 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 673 | "\n", 674 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 675 | " This is separate from the ipykernel package so we can avoid doing imports until\n" 676 | ] 677 | } 678 | ], 679 | "source": [ 680 | "#criando novos campos de medias móveis\n", 681 | "df_itau['mm5d'] = df_itau['preco_fechamento'].rolling(5).mean()\n", 682 | "df_itau['mm21d'] = df_itau['preco_fechamento'].rolling(21).mean()" 683 | ] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "execution_count": 10, 688 | "metadata": {}, 689 | "outputs": [ 690 | { 691 | "data": { 692 | "text/html": [ 693 | "
\n", 694 | "\n", 707 | "\n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | "
data_pregaosigla_acaonome_acaopreco_aberturapreco_maxpreco_minimopreco_fechamentoqtd_negociosvolume_negociosmm5dmm21d
2022020-01-02ITUB4ITAUUNIBANCO37.2838.0336.9938.0320666100.07.812151e+10NaNNaN
8442020-01-03ITUB4ITAUUNIBANCO37.5038.2437.4537.6324891400.09.400384e+10NaNNaN
8452020-01-06ITUB4ITAUUNIBANCO37.5537.5836.9137.0722294700.08.294725e+10NaNNaN
22732020-01-07ITUB4ITAUUNIBANCO37.0737.2436.2136.2120000900.07.294927e+10NaNNaN
22742020-01-08ITUB4ITAUUNIBANCO36.4536.8135.6235.6225980900.09.345243e+1036.912NaN
22752020-01-09ITUB4ITAUUNIBANCO35.7935.9334.5234.9148423800.01.694589e+1136.288NaN
22762020-01-10ITUB4ITAUUNIBANCO35.0835.4534.5734.6025521600.08.900229e+1035.682NaN
48212020-01-13ITUB4ITAUUNIBANCO35.0235.4034.7435.0629200700.01.025651e+1135.280NaN
48222020-01-14ITUB4ITAUUNIBANCO34.9235.0634.6335.0618433300.06.424604e+1035.050NaN
48232020-01-15ITUB4ITAUUNIBANCO34.8934.9434.4034.6332757500.01.134815e+1134.852NaN
48242020-01-16ITUB4ITAUUNIBANCO34.9035.0734.4234.7022857500.07.940644e+1034.810NaN
48252020-01-17ITUB4ITAUUNIBANCO34.9035.3034.7334.9419722900.06.900594e+1034.878NaN
48262020-01-20ITUB4ITAUUNIBANCO34.8035.0034.1634.2318322200.06.311801e+1034.712NaN
48272020-01-21ITUB4ITAUUNIBANCO34.0134.4833.4433.5026878100.09.077382e+1034.400NaN
48282020-01-22ITUB4ITAUUNIBANCO33.8534.0733.5033.7017443500.05.874532e+1034.214NaN
48292020-01-23ITUB4ITAUUNIBANCO33.5034.8133.4034.5033077600.01.135770e+1134.174NaN
83172020-01-24ITUB4ITAUUNIBANCO34.6134.6233.9534.2414756100.05.052616e+1034.034NaN
83182020-01-27ITUB4ITAUUNIBANCO33.7434.0933.4133.5014387100.04.849840e+1033.888NaN
83192020-01-28ITUB4ITAUUNIBANCO33.7733.9733.3033.4917185400.05.758163e+1033.886NaN
83202020-01-29ITUB4ITAUUNIBANCO33.6933.7932.9232.9720071400.06.680223e+1033.740NaN
83212020-01-30ITUB4ITAUUNIBANCO32.6533.4532.5833.4541789800.01.374939e+1133.53034.859048
83222020-01-31ITUB4ITAUUNIBANCO32.9933.3932.5432.8225459800.08.358342e+1033.24634.610952
\n", 1035 | "
" 1036 | ], 1037 | "text/plain": [ 1038 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n", 1039 | "202 2020-01-02 ITUB4 ITAUUNIBANCO 37.28 38.03 \n", 1040 | "844 2020-01-03 ITUB4 ITAUUNIBANCO 37.50 38.24 \n", 1041 | "845 2020-01-06 ITUB4 ITAUUNIBANCO 37.55 37.58 \n", 1042 | "2273 2020-01-07 ITUB4 ITAUUNIBANCO 37.07 37.24 \n", 1043 | "2274 2020-01-08 ITUB4 ITAUUNIBANCO 36.45 36.81 \n", 1044 | "2275 2020-01-09 ITUB4 ITAUUNIBANCO 35.79 35.93 \n", 1045 | "2276 2020-01-10 ITUB4 ITAUUNIBANCO 35.08 35.45 \n", 1046 | "4821 2020-01-13 ITUB4 ITAUUNIBANCO 35.02 35.40 \n", 1047 | "4822 2020-01-14 ITUB4 ITAUUNIBANCO 34.92 35.06 \n", 1048 | "4823 2020-01-15 ITUB4 ITAUUNIBANCO 34.89 34.94 \n", 1049 | "4824 2020-01-16 ITUB4 ITAUUNIBANCO 34.90 35.07 \n", 1050 | "4825 2020-01-17 ITUB4 ITAUUNIBANCO 34.90 35.30 \n", 1051 | "4826 2020-01-20 ITUB4 ITAUUNIBANCO 34.80 35.00 \n", 1052 | "4827 2020-01-21 ITUB4 ITAUUNIBANCO 34.01 34.48 \n", 1053 | "4828 2020-01-22 ITUB4 ITAUUNIBANCO 33.85 34.07 \n", 1054 | "4829 2020-01-23 ITUB4 ITAUUNIBANCO 33.50 34.81 \n", 1055 | "8317 2020-01-24 ITUB4 ITAUUNIBANCO 34.61 34.62 \n", 1056 | "8318 2020-01-27 ITUB4 ITAUUNIBANCO 33.74 34.09 \n", 1057 | "8319 2020-01-28 ITUB4 ITAUUNIBANCO 33.77 33.97 \n", 1058 | "8320 2020-01-29 ITUB4 ITAUUNIBANCO 33.69 33.79 \n", 1059 | "8321 2020-01-30 ITUB4 ITAUUNIBANCO 32.65 33.45 \n", 1060 | "8322 2020-01-31 ITUB4 ITAUUNIBANCO 32.99 33.39 \n", 1061 | "\n", 1062 | " preco_minimo preco_fechamento qtd_negocios volume_negocios mm5d \\\n", 1063 | "202 36.99 38.03 20666100.0 7.812151e+10 NaN \n", 1064 | "844 37.45 37.63 24891400.0 9.400384e+10 NaN \n", 1065 | "845 36.91 37.07 22294700.0 8.294725e+10 NaN \n", 1066 | "2273 36.21 36.21 20000900.0 7.294927e+10 NaN \n", 1067 | "2274 35.62 35.62 25980900.0 9.345243e+10 36.912 \n", 1068 | "2275 34.52 34.91 48423800.0 1.694589e+11 36.288 \n", 1069 | "2276 34.57 34.60 25521600.0 8.900229e+10 35.682 \n", 1070 | "4821 34.74 35.06 29200700.0 1.025651e+11 35.280 \n", 1071 | "4822 34.63 35.06 18433300.0 6.424604e+10 35.050 \n", 1072 | "4823 34.40 34.63 32757500.0 1.134815e+11 34.852 \n", 1073 | "4824 34.42 34.70 22857500.0 7.940644e+10 34.810 \n", 1074 | "4825 34.73 34.94 19722900.0 6.900594e+10 34.878 \n", 1075 | "4826 34.16 34.23 18322200.0 6.311801e+10 34.712 \n", 1076 | "4827 33.44 33.50 26878100.0 9.077382e+10 34.400 \n", 1077 | "4828 33.50 33.70 17443500.0 5.874532e+10 34.214 \n", 1078 | "4829 33.40 34.50 33077600.0 1.135770e+11 34.174 \n", 1079 | "8317 33.95 34.24 14756100.0 5.052616e+10 34.034 \n", 1080 | "8318 33.41 33.50 14387100.0 4.849840e+10 33.888 \n", 1081 | "8319 33.30 33.49 17185400.0 5.758163e+10 33.886 \n", 1082 | "8320 32.92 32.97 20071400.0 6.680223e+10 33.740 \n", 1083 | "8321 32.58 33.45 41789800.0 1.374939e+11 33.530 \n", 1084 | "8322 32.54 32.82 25459800.0 8.358342e+10 33.246 \n", 1085 | "\n", 1086 | " mm21d \n", 1087 | "202 NaN \n", 1088 | "844 NaN \n", 1089 | "845 NaN \n", 1090 | "2273 NaN \n", 1091 | "2274 NaN \n", 1092 | "2275 NaN \n", 1093 | "2276 NaN \n", 1094 | "4821 NaN \n", 1095 | "4822 NaN \n", 1096 | "4823 NaN \n", 1097 | "4824 NaN \n", 1098 | "4825 NaN \n", 1099 | "4826 NaN \n", 1100 | "4827 NaN \n", 1101 | "4828 NaN \n", 1102 | "4829 NaN \n", 1103 | "8317 NaN \n", 1104 | "8318 NaN \n", 1105 | "8319 NaN \n", 1106 | "8320 NaN \n", 1107 | "8321 34.859048 \n", 1108 | "8322 34.610952 " 1109 | ] 1110 | }, 1111 | "execution_count": 10, 1112 | "metadata": {}, 1113 | "output_type": "execute_result" 1114 | } 1115 | ], 1116 | "source": [ 1117 | "df_itau.head(22)" 1118 | ] 1119 | }, 1120 | { 1121 | "cell_type": "code", 1122 | "execution_count": 11, 1123 | "metadata": {}, 1124 | "outputs": [ 1125 | { 1126 | "name": "stderr", 1127 | "output_type": "stream", 1128 | "text": [ 1129 | "C:\\Users\\fabri\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", 1130 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 1131 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 1132 | "\n", 1133 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 1134 | " \n" 1135 | ] 1136 | }, 1137 | { 1138 | "data": { 1139 | "text/html": [ 1140 | "
\n", 1141 | "\n", 1154 | "\n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | "
data_pregaosigla_acaonome_acaopreco_aberturapreco_maxpreco_minimopreco_fechamentoqtd_negociosvolume_negociosmm5dmm21d
2022020-01-02ITUB4ITAUUNIBANCO37.2838.0336.9937.6320666100.07.812151e+10NaNNaN
8442020-01-03ITUB4ITAUUNIBANCO37.5038.2437.4537.0724891400.09.400384e+10NaNNaN
8452020-01-06ITUB4ITAUUNIBANCO37.5537.5836.9136.2122294700.08.294725e+10NaNNaN
22732020-01-07ITUB4ITAUUNIBANCO37.0737.2436.2135.6220000900.07.294927e+10NaNNaN
22742020-01-08ITUB4ITAUUNIBANCO36.4536.8135.6234.9125980900.09.345243e+1036.912NaN
\n", 1244 | "
" 1245 | ], 1246 | "text/plain": [ 1247 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n", 1248 | "202 2020-01-02 ITUB4 ITAUUNIBANCO 37.28 38.03 \n", 1249 | "844 2020-01-03 ITUB4 ITAUUNIBANCO 37.50 38.24 \n", 1250 | "845 2020-01-06 ITUB4 ITAUUNIBANCO 37.55 37.58 \n", 1251 | "2273 2020-01-07 ITUB4 ITAUUNIBANCO 37.07 37.24 \n", 1252 | "2274 2020-01-08 ITUB4 ITAUUNIBANCO 36.45 36.81 \n", 1253 | "\n", 1254 | " preco_minimo preco_fechamento qtd_negocios volume_negocios mm5d \\\n", 1255 | "202 36.99 37.63 20666100.0 7.812151e+10 NaN \n", 1256 | "844 37.45 37.07 24891400.0 9.400384e+10 NaN \n", 1257 | "845 36.91 36.21 22294700.0 8.294725e+10 NaN \n", 1258 | "2273 36.21 35.62 20000900.0 7.294927e+10 NaN \n", 1259 | "2274 35.62 34.91 25980900.0 9.345243e+10 36.912 \n", 1260 | "\n", 1261 | " mm21d \n", 1262 | "202 NaN \n", 1263 | "844 NaN \n", 1264 | "845 NaN \n", 1265 | "2273 NaN \n", 1266 | "2274 NaN " 1267 | ] 1268 | }, 1269 | "execution_count": 11, 1270 | "metadata": {}, 1271 | "output_type": "execute_result" 1272 | } 1273 | ], 1274 | "source": [ 1275 | "#Empurrando para frente os valores das ações\n", 1276 | "df_itau['preco_fechamento'] = df_itau['preco_fechamento'].shift(-1)\n", 1277 | "\n", 1278 | "df_itau.head()" 1279 | ] 1280 | }, 1281 | { 1282 | "cell_type": "code", 1283 | "execution_count": 12, 1284 | "metadata": {}, 1285 | "outputs": [ 1286 | { 1287 | "name": "stderr", 1288 | "output_type": "stream", 1289 | "text": [ 1290 | "C:\\Users\\fabri\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", 1291 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 1292 | "\n", 1293 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 1294 | " \n" 1295 | ] 1296 | }, 1297 | { 1298 | "data": { 1299 | "text/html": [ 1300 | "
\n", 1301 | "\n", 1314 | "\n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | "
data_pregaosigla_acaonome_acaopreco_aberturapreco_maxpreco_minimopreco_fechamentoqtd_negociosvolume_negociosmm5dmm21d
83212020-01-30ITUB4ITAUUNIBANCO32.6533.4532.5832.8241789800.01.374939e+1133.53034.859048
83222020-01-31ITUB4ITAUUNIBANCO32.9933.3932.5432.9025459800.08.358342e+1033.24634.610952
83232020-02-03ITUB4ITAUUNIBANCO33.0033.3132.7933.1617586000.05.817445e+1033.12634.385714
83242020-02-04ITUB4ITAUUNIBANCO33.3633.4632.9033.3615730300.05.220127e+1033.06034.199524
83252020-02-05ITUB4ITAUUNIBANCO33.7134.1233.3633.2834791900.01.169225e+1133.13834.063810
....................................
1253332021-01-08ITUB4ITAUUNIBANCO32.9333.4432.4332.0852532500.01.721607e+1131.76031.444762
1286732021-01-11ITUB4ITAUUNIBANCO32.4732.7031.7232.1831564300.01.016568e+1131.99631.522857
1286742021-01-12ITUB4ITAUUNIBANCO32.1832.4532.0231.6533089300.01.066563e+1132.29231.609524
1286752021-01-13ITUB4ITAUUNIBANCO32.1832.5331.2632.5942722600.01.354698e+1132.31231.662381
1286762021-01-14ITUB4ITAUUNIBANCO32.0232.9431.6731.3628222500.09.166648e+1032.26431.711905
\n", 1488 | "

238 rows × 11 columns

\n", 1489 | "
" 1490 | ], 1491 | "text/plain": [ 1492 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n", 1493 | "8321 2020-01-30 ITUB4 ITAUUNIBANCO 32.65 33.45 \n", 1494 | "8322 2020-01-31 ITUB4 ITAUUNIBANCO 32.99 33.39 \n", 1495 | "8323 2020-02-03 ITUB4 ITAUUNIBANCO 33.00 33.31 \n", 1496 | "8324 2020-02-04 ITUB4 ITAUUNIBANCO 33.36 33.46 \n", 1497 | "8325 2020-02-05 ITUB4 ITAUUNIBANCO 33.71 34.12 \n", 1498 | "... ... ... ... ... ... \n", 1499 | "125333 2021-01-08 ITUB4 ITAUUNIBANCO 32.93 33.44 \n", 1500 | "128673 2021-01-11 ITUB4 ITAUUNIBANCO 32.47 32.70 \n", 1501 | "128674 2021-01-12 ITUB4 ITAUUNIBANCO 32.18 32.45 \n", 1502 | "128675 2021-01-13 ITUB4 ITAUUNIBANCO 32.18 32.53 \n", 1503 | "128676 2021-01-14 ITUB4 ITAUUNIBANCO 32.02 32.94 \n", 1504 | "\n", 1505 | " preco_minimo preco_fechamento qtd_negocios volume_negocios mm5d \\\n", 1506 | "8321 32.58 32.82 41789800.0 1.374939e+11 33.530 \n", 1507 | "8322 32.54 32.90 25459800.0 8.358342e+10 33.246 \n", 1508 | "8323 32.79 33.16 17586000.0 5.817445e+10 33.126 \n", 1509 | "8324 32.90 33.36 15730300.0 5.220127e+10 33.060 \n", 1510 | "8325 33.36 33.28 34791900.0 1.169225e+11 33.138 \n", 1511 | "... ... ... ... ... ... \n", 1512 | "125333 32.43 32.08 52532500.0 1.721607e+11 31.760 \n", 1513 | "128673 31.72 32.18 31564300.0 1.016568e+11 31.996 \n", 1514 | "128674 32.02 31.65 33089300.0 1.066563e+11 32.292 \n", 1515 | "128675 31.26 32.59 42722600.0 1.354698e+11 32.312 \n", 1516 | "128676 31.67 31.36 28222500.0 9.166648e+10 32.264 \n", 1517 | "\n", 1518 | " mm21d \n", 1519 | "8321 34.859048 \n", 1520 | "8322 34.610952 \n", 1521 | "8323 34.385714 \n", 1522 | "8324 34.199524 \n", 1523 | "8325 34.063810 \n", 1524 | "... ... \n", 1525 | "125333 31.444762 \n", 1526 | "128673 31.522857 \n", 1527 | "128674 31.609524 \n", 1528 | "128675 31.662381 \n", 1529 | "128676 31.711905 \n", 1530 | "\n", 1531 | "[238 rows x 11 columns]" 1532 | ] 1533 | }, 1534 | "execution_count": 12, 1535 | "metadata": {}, 1536 | "output_type": "execute_result" 1537 | } 1538 | ], 1539 | "source": [ 1540 | "#retirando os dados nulos\n", 1541 | "df_itau.dropna(inplace=True)\n", 1542 | "df_itau" 1543 | ] 1544 | }, 1545 | { 1546 | "cell_type": "code", 1547 | "execution_count": 13, 1548 | "metadata": {}, 1549 | "outputs": [ 1550 | { 1551 | "data": { 1552 | "text/html": [ 1553 | "
\n", 1554 | "\n", 1567 | "\n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1672 | " \n", 1673 | " \n", 1674 | " \n", 1675 | " \n", 1676 | " \n", 1677 | " \n", 1678 | " \n", 1679 | " \n", 1680 | " \n", 1681 | " \n", 1682 | " \n", 1683 | " \n", 1684 | " \n", 1685 | " \n", 1686 | " \n", 1687 | " \n", 1688 | " \n", 1689 | " \n", 1690 | " \n", 1691 | " \n", 1692 | " \n", 1693 | " \n", 1694 | " \n", 1695 | " \n", 1696 | " \n", 1697 | " \n", 1698 | " \n", 1699 | " \n", 1700 | " \n", 1701 | " \n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | "
data_pregaosigla_acaonome_acaopreco_aberturapreco_maxpreco_minimopreco_fechamentoqtd_negociosvolume_negociosmm5dmm21d
02020-01-30ITUB4ITAUUNIBANCO32.6533.4532.5832.8241789800.01.374939e+1133.53034.859048
12020-01-31ITUB4ITAUUNIBANCO32.9933.3932.5432.9025459800.08.358342e+1033.24634.610952
22020-02-03ITUB4ITAUUNIBANCO33.0033.3132.7933.1617586000.05.817445e+1033.12634.385714
32020-02-04ITUB4ITAUUNIBANCO33.3633.4632.9033.3615730300.05.220127e+1033.06034.199524
42020-02-05ITUB4ITAUUNIBANCO33.7134.1233.3633.2834791900.01.169225e+1133.13834.063810
....................................
2332021-01-08ITUB4ITAUUNIBANCO32.9333.4432.4332.0852532500.01.721607e+1131.76031.444762
2342021-01-11ITUB4ITAUUNIBANCO32.4732.7031.7232.1831564300.01.016568e+1131.99631.522857
2352021-01-12ITUB4ITAUUNIBANCO32.1832.4532.0231.6533089300.01.066563e+1132.29231.609524
2362021-01-13ITUB4ITAUUNIBANCO32.1832.5331.2632.5942722600.01.354698e+1132.31231.662381
2372021-01-14ITUB4ITAUUNIBANCO32.0232.9431.6731.3628222500.09.166648e+1032.26431.711905
\n", 1741 | "

238 rows × 11 columns

\n", 1742 | "
" 1743 | ], 1744 | "text/plain": [ 1745 | " data_pregao sigla_acao nome_acao preco_abertura preco_max \\\n", 1746 | "0 2020-01-30 ITUB4 ITAUUNIBANCO 32.65 33.45 \n", 1747 | "1 2020-01-31 ITUB4 ITAUUNIBANCO 32.99 33.39 \n", 1748 | "2 2020-02-03 ITUB4 ITAUUNIBANCO 33.00 33.31 \n", 1749 | "3 2020-02-04 ITUB4 ITAUUNIBANCO 33.36 33.46 \n", 1750 | "4 2020-02-05 ITUB4 ITAUUNIBANCO 33.71 34.12 \n", 1751 | ".. ... ... ... ... ... \n", 1752 | "233 2021-01-08 ITUB4 ITAUUNIBANCO 32.93 33.44 \n", 1753 | "234 2021-01-11 ITUB4 ITAUUNIBANCO 32.47 32.70 \n", 1754 | "235 2021-01-12 ITUB4 ITAUUNIBANCO 32.18 32.45 \n", 1755 | "236 2021-01-13 ITUB4 ITAUUNIBANCO 32.18 32.53 \n", 1756 | "237 2021-01-14 ITUB4 ITAUUNIBANCO 32.02 32.94 \n", 1757 | "\n", 1758 | " preco_minimo preco_fechamento qtd_negocios volume_negocios mm5d \\\n", 1759 | "0 32.58 32.82 41789800.0 1.374939e+11 33.530 \n", 1760 | "1 32.54 32.90 25459800.0 8.358342e+10 33.246 \n", 1761 | "2 32.79 33.16 17586000.0 5.817445e+10 33.126 \n", 1762 | "3 32.90 33.36 15730300.0 5.220127e+10 33.060 \n", 1763 | "4 33.36 33.28 34791900.0 1.169225e+11 33.138 \n", 1764 | ".. ... ... ... ... ... \n", 1765 | "233 32.43 32.08 52532500.0 1.721607e+11 31.760 \n", 1766 | "234 31.72 32.18 31564300.0 1.016568e+11 31.996 \n", 1767 | "235 32.02 31.65 33089300.0 1.066563e+11 32.292 \n", 1768 | "236 31.26 32.59 42722600.0 1.354698e+11 32.312 \n", 1769 | "237 31.67 31.36 28222500.0 9.166648e+10 32.264 \n", 1770 | "\n", 1771 | " mm21d \n", 1772 | "0 34.859048 \n", 1773 | "1 34.610952 \n", 1774 | "2 34.385714 \n", 1775 | "3 34.199524 \n", 1776 | "4 34.063810 \n", 1777 | ".. ... \n", 1778 | "233 31.444762 \n", 1779 | "234 31.522857 \n", 1780 | "235 31.609524 \n", 1781 | "236 31.662381 \n", 1782 | "237 31.711905 \n", 1783 | "\n", 1784 | "[238 rows x 11 columns]" 1785 | ] 1786 | }, 1787 | "execution_count": 13, 1788 | "metadata": {}, 1789 | "output_type": "execute_result" 1790 | } 1791 | ], 1792 | "source": [ 1793 | "#reindexando o data frame\n", 1794 | "df_itau = df_itau.reset_index(drop=True)\n", 1795 | "df_itau" 1796 | ] 1797 | }, 1798 | { 1799 | "cell_type": "code", 1800 | "execution_count": 14, 1801 | "metadata": {}, 1802 | "outputs": [ 1803 | { 1804 | "data": { 1805 | "text/plain": [ 1806 | "'linhas treino= 0:167 linhas teste= 167:237 linhas validação= 237'" 1807 | ] 1808 | }, 1809 | "execution_count": 14, 1810 | "metadata": {}, 1811 | "output_type": "execute_result" 1812 | } 1813 | ], 1814 | "source": [ 1815 | "#verificando quantidade de linhas\n", 1816 | "qtd_linhas = len(df_itau)\n", 1817 | "\n", 1818 | "qtd_linhas_treino= round(.70 * qtd_linhas)\n", 1819 | "qtd_linhas_teste= qtd_linhas - qtd_linhas_treino \n", 1820 | "qtd_linhas_validacao = qtd_linhas -1\n", 1821 | "\n", 1822 | "info = (\n", 1823 | " f\"linhas treino= 0:{qtd_linhas_treino}\"\n", 1824 | " f\" linhas teste= {qtd_linhas_treino}:{qtd_linhas_treino + qtd_linhas_teste -1}\"\n", 1825 | " f\" linhas validação= {qtd_linhas_validacao}\"\n", 1826 | ")\n", 1827 | "\n", 1828 | "info\n" 1829 | ] 1830 | }, 1831 | { 1832 | "cell_type": "code", 1833 | "execution_count": 15, 1834 | "metadata": {}, 1835 | "outputs": [], 1836 | "source": [ 1837 | "#separando as features e labels\n", 1838 | "features = df_itau.drop(['sigla_acao', 'nome_acao', 'data_pregao', 'preco_fechamento'], 1)\n", 1839 | "labels = df_itau['preco_fechamento']" 1840 | ] 1841 | }, 1842 | { 1843 | "cell_type": "code", 1844 | "execution_count": 16, 1845 | "metadata": {}, 1846 | "outputs": [ 1847 | { 1848 | "name": "stdout", 1849 | "output_type": "stream", 1850 | "text": [ 1851 | "\n", 1852 | "Melhores features:\n", 1853 | "{'qtd_negocios': 18.799424892097544, 'preco_minimo': 15.959684908457135, 'preco_max': 10.97138027738372, 'mm21d': 7.809600323338402, 'mm5d': 2.2915178065342623, 'volume_negocios': 1.79780379509346}\n" 1854 | ] 1855 | } 1856 | ], 1857 | "source": [ 1858 | "#Escolhendo as melhores features com Kbest\n", 1859 | "\n", 1860 | "features_list = ('preco_abertura','preco_max','preco_minimo','qtd_negocios','volume_negocios','mm5d','mm21d')\n", 1861 | "\n", 1862 | "k_best_features = SelectKBest(k='all')\n", 1863 | "k_best_features.fit_transform(features, labels)\n", 1864 | "k_best_features_scores = k_best_features.scores_\n", 1865 | "raw_pairs = zip(features_list[1:], k_best_features_scores)\n", 1866 | "ordered_pairs = list(reversed(sorted(raw_pairs, key=lambda x: x[1])))\n", 1867 | "\n", 1868 | "k_best_features_final = dict(ordered_pairs[:15])\n", 1869 | "best_features = k_best_features_final.keys()\n", 1870 | "print ('')\n", 1871 | "print (\"Melhores features:\")\n", 1872 | "print (k_best_features_final)" 1873 | ] 1874 | }, 1875 | { 1876 | "cell_type": "code", 1877 | "execution_count": 17, 1878 | "metadata": {}, 1879 | "outputs": [], 1880 | "source": [ 1881 | "#separando as features escolhidas\n", 1882 | "features = df_itau.loc[:,['preco_max','preco_minimo','volume_negocios','mm5d']]" 1883 | ] 1884 | }, 1885 | { 1886 | "cell_type": "code", 1887 | "execution_count": 18, 1888 | "metadata": {}, 1889 | "outputs": [ 1890 | { 1891 | "data": { 1892 | "text/html": [ 1893 | "
\n", 1894 | "\n", 1907 | "\n", 1908 | " \n", 1909 | " \n", 1910 | " \n", 1911 | " \n", 1912 | " \n", 1913 | " \n", 1914 | " \n", 1915 | " \n", 1916 | " \n", 1917 | " \n", 1918 | " \n", 1919 | " \n", 1920 | " \n", 1921 | " \n", 1922 | " \n", 1923 | " \n", 1924 | " \n", 1925 | " \n", 1926 | " \n", 1927 | " \n", 1928 | " \n", 1929 | " \n", 1930 | " \n", 1931 | " \n", 1932 | " \n", 1933 | " \n", 1934 | " \n", 1935 | " \n", 1936 | " \n", 1937 | " \n", 1938 | " \n", 1939 | " \n", 1940 | " \n", 1941 | " \n", 1942 | " \n", 1943 | " \n", 1944 | " \n", 1945 | " \n", 1946 | " \n", 1947 | " \n", 1948 | " \n", 1949 | " \n", 1950 | " \n", 1951 | " \n", 1952 | " \n", 1953 | " \n", 1954 | " \n", 1955 | " \n", 1956 | " \n", 1957 | " \n", 1958 | " \n", 1959 | " \n", 1960 | " \n", 1961 | " \n", 1962 | " \n", 1963 | " \n", 1964 | " \n", 1965 | " \n", 1966 | " \n", 1967 | " \n", 1968 | " \n", 1969 | " \n", 1970 | " \n", 1971 | " \n", 1972 | " \n", 1973 | " \n", 1974 | " \n", 1975 | " \n", 1976 | " \n", 1977 | " \n", 1978 | " \n", 1979 | " \n", 1980 | " \n", 1981 | " \n", 1982 | " \n", 1983 | " \n", 1984 | " \n", 1985 | " \n", 1986 | " \n", 1987 | " \n", 1988 | " \n", 1989 | " \n", 1990 | " \n", 1991 | " \n", 1992 | " \n", 1993 | " \n", 1994 | " \n", 1995 | " \n", 1996 | "
preco_maxpreco_minimovolume_negociosmm5d
033.4532.581.374939e+1133.530
133.3932.548.358342e+1033.246
233.3132.795.817445e+1033.126
333.4632.905.220127e+1033.060
434.1233.361.169225e+1133.138
...............
23333.4432.431.721607e+1131.760
23432.7031.721.016568e+1131.996
23532.4532.021.066563e+1132.292
23632.5331.261.354698e+1132.312
23732.9431.679.166648e+1032.264
\n", 1997 | "

238 rows × 4 columns

\n", 1998 | "
" 1999 | ], 2000 | "text/plain": [ 2001 | " preco_max preco_minimo volume_negocios mm5d\n", 2002 | "0 33.45 32.58 1.374939e+11 33.530\n", 2003 | "1 33.39 32.54 8.358342e+10 33.246\n", 2004 | "2 33.31 32.79 5.817445e+10 33.126\n", 2005 | "3 33.46 32.90 5.220127e+10 33.060\n", 2006 | "4 34.12 33.36 1.169225e+11 33.138\n", 2007 | ".. ... ... ... ...\n", 2008 | "233 33.44 32.43 1.721607e+11 31.760\n", 2009 | "234 32.70 31.72 1.016568e+11 31.996\n", 2010 | "235 32.45 32.02 1.066563e+11 32.292\n", 2011 | "236 32.53 31.26 1.354698e+11 32.312\n", 2012 | "237 32.94 31.67 9.166648e+10 32.264\n", 2013 | "\n", 2014 | "[238 rows x 4 columns]" 2015 | ] 2016 | }, 2017 | "execution_count": 18, 2018 | "metadata": {}, 2019 | "output_type": "execute_result" 2020 | } 2021 | ], 2022 | "source": [ 2023 | "features" 2024 | ] 2025 | }, 2026 | { 2027 | "cell_type": "code", 2028 | "execution_count": 19, 2029 | "metadata": {}, 2030 | "outputs": [ 2031 | { 2032 | "name": "stdout", 2033 | "output_type": "stream", 2034 | "text": [ 2035 | "167 167\n", 2036 | "70 70\n" 2037 | ] 2038 | } 2039 | ], 2040 | "source": [ 2041 | "#Separa os dados de treino teste e validação\n", 2042 | "X_train = features[:qtd_linhas_treino]\n", 2043 | "X_test = features[qtd_linhas_treino:qtd_linhas_treino + qtd_linhas_teste -1]\n", 2044 | "\n", 2045 | "y_train = labels[:qtd_linhas_treino]\n", 2046 | "y_test = labels[qtd_linhas_treino:qtd_linhas_treino + qtd_linhas_teste -1]\n", 2047 | "\n", 2048 | "print( len(X_train), len(y_train))\n", 2049 | "\n", 2050 | "print( len(X_test), len(y_test))" 2051 | ] 2052 | }, 2053 | { 2054 | "cell_type": "code", 2055 | "execution_count": 20, 2056 | "metadata": {}, 2057 | "outputs": [], 2058 | "source": [ 2059 | "# Normalizando os dados de entrada(features)\n", 2060 | "\n", 2061 | "# Gerando o novo padrão\n", 2062 | "scaler = MinMaxScaler()\n", 2063 | "X_train_scale = scaler.fit_transform(X_train) # Normalizando os dados de entrada(treinamento)\n", 2064 | "X_test_scale = scaler.transform(X_test) # Normalizando os dados de entrada(teste)\n", 2065 | "\n" 2066 | ] 2067 | }, 2068 | { 2069 | "cell_type": "code", 2070 | "execution_count": 21, 2071 | "metadata": {}, 2072 | "outputs": [ 2073 | { 2074 | "data": { 2075 | "text/plain": [ 2076 | "'Coeficiente de determinação:95.39'" 2077 | ] 2078 | }, 2079 | "execution_count": 21, 2080 | "metadata": {}, 2081 | "output_type": "execute_result" 2082 | } 2083 | ], 2084 | "source": [ 2085 | "#treinamento usando regressão linear\n", 2086 | "lr = linear_model.LinearRegression()\n", 2087 | "lr.fit(X_train_scale, y_train)\n", 2088 | "pred= lr.predict(X_test_scale)\n", 2089 | "cd =r2_score(y_test, pred)\n", 2090 | "\n", 2091 | "f'Coeficiente de determinação:{cd * 100:.2f}'" 2092 | ] 2093 | }, 2094 | { 2095 | "cell_type": "code", 2096 | "execution_count": 22, 2097 | "metadata": {}, 2098 | "outputs": [ 2099 | { 2100 | "name": "stderr", 2101 | "output_type": "stream", 2102 | "text": [ 2103 | "C:\\Users\\fabri\\anaconda3\\lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:571: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (2000) reached and the optimization hasn't converged yet.\n", 2104 | " % self.max_iter, ConvergenceWarning)\n" 2105 | ] 2106 | }, 2107 | { 2108 | "data": { 2109 | "text/plain": [ 2110 | "'Coeficiente de determinação:93.77'" 2111 | ] 2112 | }, 2113 | "execution_count": 22, 2114 | "metadata": {}, 2115 | "output_type": "execute_result" 2116 | } 2117 | ], 2118 | "source": [ 2119 | "#rede neural\n", 2120 | "rn = MLPRegressor(max_iter=2000)\n", 2121 | "\n", 2122 | "rn.fit(X_train_scale, y_train)\n", 2123 | "pred= rn.predict(X_test_scale)\n", 2124 | "\n", 2125 | "cd = rn.score(X_test_scale, y_test)\n", 2126 | "\n", 2127 | "\n", 2128 | "f'Coeficiente de determinação:{cd * 100:.2f}'" 2129 | ] 2130 | }, 2131 | { 2132 | "cell_type": "code", 2133 | "execution_count": 23, 2134 | "metadata": {}, 2135 | "outputs": [ 2136 | { 2137 | "data": { 2138 | "text/plain": [ 2139 | "'Coeficiente de determinação:94.38'" 2140 | ] 2141 | }, 2142 | "execution_count": 23, 2143 | "metadata": {}, 2144 | "output_type": "execute_result" 2145 | } 2146 | ], 2147 | "source": [ 2148 | "#rede neural com ajuste hyper parameters\n", 2149 | "\n", 2150 | "rn = MLPRegressor()\n", 2151 | "\n", 2152 | "parameter_space = {\n", 2153 | " 'hidden_layer_sizes': [(i,) for i in list(range(1, 21))],\n", 2154 | " 'activation': ['tanh', 'relu'],\n", 2155 | " 'solver': ['sgd', 'adam', 'lbfgs'], \n", 2156 | " 'alpha': [0.0001, 0.05],\n", 2157 | " 'learning_rate': ['constant', 'adaptive'],\n", 2158 | " }\n", 2159 | "\n", 2160 | "search = GridSearchCV(rn, parameter_space, n_jobs=-1, cv=5)\n", 2161 | "\n", 2162 | "\n", 2163 | "search.fit(X_train_scale,y_train)\n", 2164 | "clf = search.best_estimator_\n", 2165 | "pred= search.predict(X_test_scale)\n", 2166 | "\n", 2167 | "cd = search.score(X_test_scale, y_test)\n", 2168 | "\n", 2169 | "f'Coeficiente de determinação:{cd * 100:.2f}'" 2170 | ] 2171 | }, 2172 | { 2173 | "cell_type": "code", 2174 | "execution_count": 24, 2175 | "metadata": {}, 2176 | "outputs": [ 2177 | { 2178 | "data": { 2179 | "text/html": [ 2180 | "
\n", 2181 | "\n", 2194 | "\n", 2195 | " \n", 2196 | " \n", 2197 | " \n", 2198 | " \n", 2199 | " \n", 2200 | " \n", 2201 | " \n", 2202 | " \n", 2203 | " \n", 2204 | " \n", 2205 | " \n", 2206 | " \n", 2207 | " \n", 2208 | " \n", 2209 | " \n", 2210 | " \n", 2211 | " \n", 2212 | " \n", 2213 | "
preco_maxpreco_minimovolume_negociosmm5d
23732.9431.679.166648e+1032.264
\n", 2214 | "
" 2215 | ], 2216 | "text/plain": [ 2217 | " preco_max preco_minimo volume_negocios mm5d\n", 2218 | "237 32.94 31.67 9.166648e+10 32.264" 2219 | ] 2220 | }, 2221 | "execution_count": 24, 2222 | "metadata": {}, 2223 | "output_type": "execute_result" 2224 | } 2225 | ], 2226 | "source": [ 2227 | "valor_novo = features.tail(1)\n", 2228 | "valor_novo" 2229 | ] 2230 | }, 2231 | { 2232 | "cell_type": "code", 2233 | "execution_count": 25, 2234 | "metadata": {}, 2235 | "outputs": [ 2236 | { 2237 | "data": { 2238 | "text/plain": [ 2239 | "array([31.84759339])" 2240 | ] 2241 | }, 2242 | "execution_count": 25, 2243 | "metadata": {}, 2244 | "output_type": "execute_result" 2245 | } 2246 | ], 2247 | "source": [ 2248 | "#executando a previsão\n", 2249 | "\n", 2250 | "\n", 2251 | "previsao=scaler.transform(valor_novo)\n", 2252 | "\n", 2253 | "\n", 2254 | "pred=lr.predict(previsao)\n", 2255 | "\n", 2256 | "pred" 2257 | ] 2258 | }, 2259 | { 2260 | "cell_type": "code", 2261 | "execution_count": 26, 2262 | "metadata": {}, 2263 | "outputs": [], 2264 | "source": [ 2265 | "df = df[df['sigla_acao'] == 'ITUB4' ]" 2266 | ] 2267 | }, 2268 | { 2269 | "cell_type": "code", 2270 | "execution_count": 27, 2271 | "metadata": {}, 2272 | "outputs": [ 2273 | { 2274 | "name": "stdout", 2275 | "output_type": "stream", 2276 | "text": [ 2277 | " real previsao\n", 2278 | "data_pregao \n", 2279 | "2021-01-15 31.36 31.847593\n" 2280 | ] 2281 | } 2282 | ], 2283 | "source": [ 2284 | "\n", 2285 | "data_pregao_full=df['data_pregao']\n", 2286 | "data_pregao=data_pregao_full.tail(1)\n", 2287 | "\n", 2288 | "res_full=df['preco_fechamento']\n", 2289 | "res=res_full.tail(1)\n", 2290 | "\n", 2291 | "df=pd.DataFrame({'data_pregao':data_pregao, 'real':res, 'previsao':pred})\n", 2292 | "\n", 2293 | "\n", 2294 | "df.set_index('data_pregao', inplace=True)\n", 2295 | "\n", 2296 | "print(df)" 2297 | ] 2298 | }, 2299 | { 2300 | "cell_type": "code", 2301 | "execution_count": null, 2302 | "metadata": {}, 2303 | "outputs": [], 2304 | "source": [ 2305 | "\n" 2306 | ] 2307 | } 2308 | ], 2309 | "metadata": { 2310 | "kernelspec": { 2311 | "display_name": "Python 3", 2312 | "language": "python", 2313 | "name": "python3" 2314 | }, 2315 | "language_info": { 2316 | "codemirror_mode": { 2317 | "name": "ipython", 2318 | "version": 3 2319 | }, 2320 | "file_extension": ".py", 2321 | "mimetype": "text/x-python", 2322 | "name": "python", 2323 | "nbconvert_exporter": "python", 2324 | "pygments_lexer": "ipython3", 2325 | "version": "3.7.6" 2326 | } 2327 | }, 2328 | "nbformat": 4, 2329 | "nbformat_minor": 4 2330 | } 2331 | --------------------------------------------------------------------------------