├── aula1.3
    ├── Introdução_a_Machine_Learning_e_Classificação_1.ipynb
    └── introdução_a_machine_learning_e_classificação_1.py
├── aula1.4
    ├── Introdução_a_Machine_Learning_e_Classificação_1.ipynb
    └── introdução_a_machine_learning_e_classificação_1.py
├── aula2.1
    ├── Introdução_a_Machine_Learning_Classificação_2.ipynb
    └── introdução_a_machine_learning_classificação_2.py
├── aula2.2
    ├── Introdução_a_Machine_Learning_Classificação_2.ipynb
    └── introdução_a_machine_learning_classificação_2.py
├── aula3.1
    ├── Introdução_a_Machine_Learning_3.ipynb
    └── introdução_a_machine_learning_3 (1).py
├── aula4.1
    ├── Introdução_a_Machine_Learning_3.ipynb
    └── introdução_a_machine_learning_3.py
├── aula5.1
    ├── Introdução_a_Machine_Learning_4.ipynb
    └── introdução_a_machine_learning_4.py
├── aula5.2
    ├── Introdução_a_Machine_Learning_4.ipynb
    └── introdução_a_machine_learning_4.py
└── aula5.4
    ├── Introdução_a_Machine_Learning_4.ipynb
    └── introdução_a_machine_learning_4.py


/aula1.3/Introdução_a_Machine_Learning_e_Classificação_1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Introdução a Machine Learning e Classificação - 1.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": []
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     }
 14 |   },
 15 |   "cells": [
 16 |     {
 17 |       "metadata": {
 18 |         "id": "iN35zFmNyYIc",
 19 |         "colab_type": "code",
 20 |         "colab": {}
 21 |       },
 22 |       "cell_type": "code",
 23 |       "source": [
 24 |         "# features (1 sim, 0 não)\n",
 25 |         "# pelo longo?\n",
 26 |         "# perna curta?\n",
 27 |         "# faz auau?\n",
 28 |         "porco1 = [0, 1, 0]\n",
 29 |         "porco2 = [0, 1, 1]\n",
 30 |         "porco3 = [1, 1, 0]\n",
 31 |         "\n",
 32 |         "cachorro1 = [0, 1, 1]\n",
 33 |         "cachorro2 = [1, 0, 1]\n",
 34 |         "cachorro3 = [1, 1, 1]\n",
 35 |         "\n",
 36 |         "# 1 => porco, 0 => cachorro\n",
 37 |         "dados = [porco1, porco2, porco3, cachorro1, cachorro2, cachorro3]\n",
 38 |         "classes = [1,1,1,0,0,0]"
 39 |       ],
 40 |       "execution_count": 0,
 41 |       "outputs": []
 42 |     },
 43 |     {
 44 |       "metadata": {
 45 |         "id": "tcWrSPHkzaby",
 46 |         "colab_type": "code",
 47 |         "colab": {
 48 |           "base_uri": "https://localhost:8080/",
 49 |           "height": 86
 50 |         },
 51 |         "outputId": "17cd7227-3924-457c-b41a-2498a22c141e"
 52 |       },
 53 |       "cell_type": "code",
 54 |       "source": [
 55 |         "from sklearn.svm import LinearSVC\n",
 56 |         "\n",
 57 |         "model = LinearSVC()\n",
 58 |         "model.fit(dados, classes)"
 59 |       ],
 60 |       "execution_count": 7,
 61 |       "outputs": [
 62 |         {
 63 |           "output_type": "execute_result",
 64 |           "data": {
 65 |             "text/plain": [
 66 |               "LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n",
 67 |               "     intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n",
 68 |               "     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n",
 69 |               "     verbose=0)"
 70 |             ]
 71 |           },
 72 |           "metadata": {
 73 |             "tags": []
 74 |           },
 75 |           "execution_count": 7
 76 |         }
 77 |       ]
 78 |     },
 79 |     {
 80 |       "metadata": {
 81 |         "id": "YaUuFWOx0YZF",
 82 |         "colab_type": "code",
 83 |         "colab": {
 84 |           "base_uri": "https://localhost:8080/",
 85 |           "height": 34
 86 |         },
 87 |         "outputId": "43e963d4-9b76-40a4-9896-e513dd23fd8f"
 88 |       },
 89 |       "cell_type": "code",
 90 |       "source": [
 91 |         "animal_misterioso = [1,1,1]\n",
 92 |         "model.predict([animal_misterioso])"
 93 |       ],
 94 |       "execution_count": 9,
 95 |       "outputs": [
 96 |         {
 97 |           "output_type": "execute_result",
 98 |           "data": {
 99 |             "text/plain": [
100 |               "array([0])"
101 |             ]
102 |           },
103 |           "metadata": {
104 |             "tags": []
105 |           },
106 |           "execution_count": 9
107 |         }
108 |       ]
109 |     },
110 |     {
111 |       "metadata": {
112 |         "id": "yJEFM8mx0jtR",
113 |         "colab_type": "code",
114 |         "colab": {}
115 |       },
116 |       "cell_type": "code",
117 |       "source": [
118 |         "misterio1 = [1,1,1]\n",
119 |         "misterio2 = [1,1,0]\n",
120 |         "misterio3 = [0,1,1]\n",
121 |         "\n",
122 |         "testes = [misterio1, misterio2, misterio3]\n",
123 |         "previsoes = model.predict(testes)"
124 |       ],
125 |       "execution_count": 0,
126 |       "outputs": []
127 |     },
128 |     {
129 |       "metadata": {
130 |         "id": "3xOA4L4e03GO",
131 |         "colab_type": "code",
132 |         "colab": {}
133 |       },
134 |       "cell_type": "code",
135 |       "source": [
136 |         "testes_classes = [0, 1, 1]"
137 |       ],
138 |       "execution_count": 0,
139 |       "outputs": []
140 |     },
141 |     {
142 |       "metadata": {
143 |         "id": "cUaaDQol1b_D",
144 |         "colab_type": "code",
145 |         "colab": {
146 |           "base_uri": "https://localhost:8080/",
147 |           "height": 34
148 |         },
149 |         "outputId": "6b335fa6-9f1b-4e60-de6c-5dd417ff3b08"
150 |       },
151 |       "cell_type": "code",
152 |       "source": [
153 |         "corretos = (previsoes == testes_classes).sum()\n",
154 |         "total = len(testes)\n",
155 |         "taxa_de_acerto = corretos/total\n",
156 |         "print(\"Taxa de acerto: \", taxa_de_acerto * 100)"
157 |       ],
158 |       "execution_count": 24,
159 |       "outputs": [
160 |         {
161 |           "output_type": "stream",
162 |           "text": [
163 |             "Taxa de acerto:  66.66666666666666\n"
164 |           ],
165 |           "name": "stdout"
166 |         }
167 |       ]
168 |     },
169 |     {
170 |       "metadata": {
171 |         "id": "T6zhFhQa1c28",
172 |         "colab_type": "code",
173 |         "colab": {
174 |           "base_uri": "https://localhost:8080/",
175 |           "height": 34
176 |         },
177 |         "outputId": "3f8b5c7b-2d17-4233-c8de-e9cc9a3b7476"
178 |       },
179 |       "cell_type": "code",
180 |       "source": [
181 |         "from sklearn.metrics import accuracy_score\n",
182 |         "\n",
183 |         "taxa_de_acerto = accuracy_score(testes_classes, previsoes)\n",
184 |         "print(\"Taxa de acerto\", taxa_de_acerto * 100)"
185 |       ],
186 |       "execution_count": 26,
187 |       "outputs": [
188 |         {
189 |           "output_type": "stream",
190 |           "text": [
191 |             "Taxa de acerto 66.66666666666666\n"
192 |           ],
193 |           "name": "stdout"
194 |         }
195 |       ]
196 |     },
197 |     {
198 |       "metadata": {
199 |         "id": "Z9wOaemi2fzE",
200 |         "colab_type": "code",
201 |         "colab": {}
202 |       },
203 |       "cell_type": "code",
204 |       "source": [
205 |         ""
206 |       ],
207 |       "execution_count": 0,
208 |       "outputs": []
209 |     }
210 |   ]
211 | }


--------------------------------------------------------------------------------
/aula1.3/introdução_a_machine_learning_e_classificação_1.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Introdução a Machine Learning e Classificação - 1.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/drive/1SNvuZmre0mDEJgTBBXBzvptPtth7q_IX
 8 | """
 9 | 
10 | # features (1 sim, 0 não)
11 | # pelo longo?
12 | # perna curta?
13 | # faz auau?
14 | porco1 = [0, 1, 0]
15 | porco2 = [0, 1, 1]
16 | porco3 = [1, 1, 0]
17 | 
18 | cachorro1 = [0, 1, 1]
19 | cachorro2 = [1, 0, 1]
20 | cachorro3 = [1, 1, 1]
21 | 
22 | # 1 => porco, 0 => cachorro
23 | dados = [porco1, porco2, porco3, cachorro1, cachorro2, cachorro3]
24 | classes = [1,1,1,0,0,0]
25 | 
26 | from sklearn.svm import LinearSVC
27 | 
28 | model = LinearSVC()
29 | model.fit(dados, classes)
30 | 
31 | animal_misterioso = [1,1,1]
32 | model.predict([animal_misterioso])
33 | 
34 | misterio1 = [1,1,1]
35 | misterio2 = [1,1,0]
36 | misterio3 = [0,1,1]
37 | 
38 | testes = [misterio1, misterio2, misterio3]
39 | previsoes = model.predict(testes)
40 | 
41 | testes_classes = [0, 1, 1]
42 | 
43 | corretos = (previsoes == testes_classes).sum()
44 | total = len(testes)
45 | taxa_de_acerto = corretos/total
46 | print("Taxa de acerto: ", taxa_de_acerto * 100)
47 | 
48 | from sklearn.metrics import accuracy_score
49 | 
50 | taxa_de_acerto = accuracy_score(testes_classes, previsoes)
51 | print("Taxa de acerto", taxa_de_acerto * 100)
52 | 
53 | 


--------------------------------------------------------------------------------
/aula1.4/Introdução_a_Machine_Learning_e_Classificação_1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Introdução a Machine Learning e Classificação - 1.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": [],
  9 |       "collapsed_sections": []
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "metadata": {
 19 |         "id": "iN35zFmNyYIc",
 20 |         "colab_type": "code",
 21 |         "colab": {}
 22 |       },
 23 |       "cell_type": "code",
 24 |       "source": [
 25 |         "# features (1 sim, 0 não)\n",
 26 |         "# pelo longo?\n",
 27 |         "# perna curta?\n",
 28 |         "# faz auau?\n",
 29 |         "porco1 = [0, 1, 0]\n",
 30 |         "porco2 = [0, 1, 1]\n",
 31 |         "porco3 = [1, 1, 0]\n",
 32 |         "\n",
 33 |         "cachorro1 = [0, 1, 1]\n",
 34 |         "cachorro2 = [1, 0, 1]\n",
 35 |         "cachorro3 = [1, 1, 1]\n",
 36 |         "\n",
 37 |         "# 1 => porco, 0 => cachorro\n",
 38 |         "treino_x = [porco1, porco2, porco3, cachorro1, cachorro2, cachorro3]\n",
 39 |         "treino_y = [1,1,1,0,0,0] # labels / etiqueta"
 40 |       ],
 41 |       "execution_count": 0,
 42 |       "outputs": []
 43 |     },
 44 |     {
 45 |       "metadata": {
 46 |         "id": "tcWrSPHkzaby",
 47 |         "colab_type": "code",
 48 |         "colab": {
 49 |           "base_uri": "https://localhost:8080/",
 50 |           "height": 87
 51 |         },
 52 |         "outputId": "0675eaa5-68bd-4df2-cafe-a3d94a9fcec9"
 53 |       },
 54 |       "cell_type": "code",
 55 |       "source": [
 56 |         "from sklearn.svm import LinearSVC\n",
 57 |         "\n",
 58 |         "model = LinearSVC()\n",
 59 |         "model.fit(treino_x, treino_y)"
 60 |       ],
 61 |       "execution_count": 2,
 62 |       "outputs": [
 63 |         {
 64 |           "output_type": "execute_result",
 65 |           "data": {
 66 |             "text/plain": [
 67 |               "LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n",
 68 |               "     intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n",
 69 |               "     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n",
 70 |               "     verbose=0)"
 71 |             ]
 72 |           },
 73 |           "metadata": {
 74 |             "tags": []
 75 |           },
 76 |           "execution_count": 2
 77 |         }
 78 |       ]
 79 |     },
 80 |     {
 81 |       "metadata": {
 82 |         "id": "YaUuFWOx0YZF",
 83 |         "colab_type": "code",
 84 |         "colab": {
 85 |           "base_uri": "https://localhost:8080/",
 86 |           "height": 35
 87 |         },
 88 |         "outputId": "defc66aa-ce1b-460c-895c-e422ffe287c5"
 89 |       },
 90 |       "cell_type": "code",
 91 |       "source": [
 92 |         "animal_misterioso = [1,1,1]\n",
 93 |         "model.predict([animal_misterioso])"
 94 |       ],
 95 |       "execution_count": 3,
 96 |       "outputs": [
 97 |         {
 98 |           "output_type": "execute_result",
 99 |           "data": {
100 |             "text/plain": [
101 |               "array([0])"
102 |             ]
103 |           },
104 |           "metadata": {
105 |             "tags": []
106 |           },
107 |           "execution_count": 3
108 |         }
109 |       ]
110 |     },
111 |     {
112 |       "metadata": {
113 |         "id": "yJEFM8mx0jtR",
114 |         "colab_type": "code",
115 |         "colab": {}
116 |       },
117 |       "cell_type": "code",
118 |       "source": [
119 |         "misterio1 = [1,1,1]\n",
120 |         "misterio2 = [1,1,0]\n",
121 |         "misterio3 = [0,1,1]\n",
122 |         "\n",
123 |         "teste_x = [misterio1, misterio2, misterio3]\n",
124 |         "teste_y = [0, 1, 1]"
125 |       ],
126 |       "execution_count": 0,
127 |       "outputs": []
128 |     },
129 |     {
130 |       "metadata": {
131 |         "id": "3xOA4L4e03GO",
132 |         "colab_type": "code",
133 |         "colab": {}
134 |       },
135 |       "cell_type": "code",
136 |       "source": [
137 |         "previsoes = model.predict(teste_x)"
138 |       ],
139 |       "execution_count": 0,
140 |       "outputs": []
141 |     },
142 |     {
143 |       "metadata": {
144 |         "id": "cUaaDQol1b_D",
145 |         "colab_type": "code",
146 |         "colab": {
147 |           "base_uri": "https://localhost:8080/",
148 |           "height": 35
149 |         },
150 |         "outputId": "4cd12076-c2e0-433c-b023-b0c3936b5b36"
151 |       },
152 |       "cell_type": "code",
153 |       "source": [
154 |         "corretos = (previsoes == teste_y).sum()\n",
155 |         "total = len(teste_x)\n",
156 |         "taxa_de_acerto = corretos/total\n",
157 |         "print(\"Taxa de acerto %.2f\" % (taxa_de_acerto * 100))"
158 |       ],
159 |       "execution_count": 8,
160 |       "outputs": [
161 |         {
162 |           "output_type": "stream",
163 |           "text": [
164 |             "Taxa de acerto 66.67\n"
165 |           ],
166 |           "name": "stdout"
167 |         }
168 |       ]
169 |     },
170 |     {
171 |       "metadata": {
172 |         "id": "T6zhFhQa1c28",
173 |         "colab_type": "code",
174 |         "colab": {
175 |           "base_uri": "https://localhost:8080/",
176 |           "height": 34
177 |         },
178 |         "outputId": "29ed71f9-5a9d-4de3-84f5-825121a92a0b"
179 |       },
180 |       "cell_type": "code",
181 |       "source": [
182 |         "from sklearn.metrics import accuracy_score\n",
183 |         "\n",
184 |         "taxa_de_acerto = accuracy_score(teste_y, previsoes)\n",
185 |         "print(\"Taxa de acerto %.2f\" % (taxa_de_acerto * 100))"
186 |       ],
187 |       "execution_count": 9,
188 |       "outputs": [
189 |         {
190 |           "output_type": "stream",
191 |           "text": [
192 |             "Taxa de acerto 66.67\n"
193 |           ],
194 |           "name": "stdout"
195 |         }
196 |       ]
197 |     },
198 |     {
199 |       "metadata": {
200 |         "id": "Z9wOaemi2fzE",
201 |         "colab_type": "code",
202 |         "colab": {}
203 |       },
204 |       "cell_type": "code",
205 |       "source": [
206 |         ""
207 |       ],
208 |       "execution_count": 0,
209 |       "outputs": []
210 |     }
211 |   ]
212 | }


--------------------------------------------------------------------------------
/aula1.4/introdução_a_machine_learning_e_classificação_1.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Introdução a Machine Learning e Classificação - 1.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/drive/1SNvuZmre0mDEJgTBBXBzvptPtth7q_IX
 8 | """
 9 | 
10 | # features (1 sim, 0 não)
11 | # pelo longo?
12 | # perna curta?
13 | # faz auau?
14 | porco1 = [0, 1, 0]
15 | porco2 = [0, 1, 1]
16 | porco3 = [1, 1, 0]
17 | 
18 | cachorro1 = [0, 1, 1]
19 | cachorro2 = [1, 0, 1]
20 | cachorro3 = [1, 1, 1]
21 | 
22 | # 1 => porco, 0 => cachorro
23 | treino_x = [porco1, porco2, porco3, cachorro1, cachorro2, cachorro3]
24 | treino_y = [1,1,1,0,0,0] # labels / etiqueta
25 | 
26 | from sklearn.svm import LinearSVC
27 | 
28 | model = LinearSVC()
29 | model.fit(treino_x, treino_y)
30 | 
31 | animal_misterioso = [1,1,1]
32 | model.predict([animal_misterioso])
33 | 
34 | misterio1 = [1,1,1]
35 | misterio2 = [1,1,0]
36 | misterio3 = [0,1,1]
37 | 
38 | teste_x = [misterio1, misterio2, misterio3]
39 | teste_y = [0, 1, 1]
40 | 
41 | previsoes = model.predict(teste_x)
42 | 
43 | corretos = (previsoes == teste_y).sum()
44 | total = len(teste_x)
45 | taxa_de_acerto = corretos/total
46 | print("Taxa de acerto %.2f" % (taxa_de_acerto * 100))
47 | 
48 | from sklearn.metrics import accuracy_score
49 | 
50 | taxa_de_acerto = accuracy_score(teste_y, previsoes)
51 | print("Taxa de acerto %.2f" % (taxa_de_acerto * 100))
52 | 
53 | 


--------------------------------------------------------------------------------
/aula2.1/Introdução_a_Machine_Learning_Classificação_2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Introdução a Machine Learning Classificação - 2.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": [],
  9 |       "collapsed_sections": []
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "metadata": {
 19 |         "id": "mEIOC06i7QEJ",
 20 |         "colab_type": "code",
 21 |         "colab": {
 22 |           "base_uri": "https://localhost:8080/",
 23 |           "height": 202
 24 |         },
 25 |         "outputId": "f6dbd4b4-8372-44b6-c3d1-92a4e08d83ff"
 26 |       },
 27 |       "cell_type": "code",
 28 |       "source": [
 29 |         "import pandas as pd\n",
 30 |         "\n",
 31 |         "uri = \"https://gist.githubusercontent.com/guilhermesilveira/2d2efa37d66b6c84a722ea627a897ced/raw/10968b997d885cbded1c92938c7a9912ba41c615/tracking.csv\"\n",
 32 |         "dados = pd.read_csv(uri)\n",
 33 |         "dados.head()"
 34 |       ],
 35 |       "execution_count": 2,
 36 |       "outputs": [
 37 |         {
 38 |           "output_type": "execute_result",
 39 |           "data": {
 40 |             "text/html": [
 41 |               "<div>\n",
 42 |               "<style scoped>\n",
 43 |               "    .dataframe tbody tr th:only-of-type {\n",
 44 |               "        vertical-align: middle;\n",
 45 |               "    }\n",
 46 |               "\n",
 47 |               "    .dataframe tbody tr th {\n",
 48 |               "        vertical-align: top;\n",
 49 |               "    }\n",
 50 |               "\n",
 51 |               "    .dataframe thead th {\n",
 52 |               "        text-align: right;\n",
 53 |               "    }\n",
 54 |               "</style>\n",
 55 |               "<table border=\"1\" class=\"dataframe\">\n",
 56 |               "  <thead>\n",
 57 |               "    <tr style=\"text-align: right;\">\n",
 58 |               "      <th></th>\n",
 59 |               "      <th>home</th>\n",
 60 |               "      <th>how_it_works</th>\n",
 61 |               "      <th>contact</th>\n",
 62 |               "      <th>bought</th>\n",
 63 |               "    </tr>\n",
 64 |               "  </thead>\n",
 65 |               "  <tbody>\n",
 66 |               "    <tr>\n",
 67 |               "      <th>0</th>\n",
 68 |               "      <td>1</td>\n",
 69 |               "      <td>1</td>\n",
 70 |               "      <td>0</td>\n",
 71 |               "      <td>0</td>\n",
 72 |               "    </tr>\n",
 73 |               "    <tr>\n",
 74 |               "      <th>1</th>\n",
 75 |               "      <td>1</td>\n",
 76 |               "      <td>1</td>\n",
 77 |               "      <td>0</td>\n",
 78 |               "      <td>0</td>\n",
 79 |               "    </tr>\n",
 80 |               "    <tr>\n",
 81 |               "      <th>2</th>\n",
 82 |               "      <td>1</td>\n",
 83 |               "      <td>1</td>\n",
 84 |               "      <td>0</td>\n",
 85 |               "      <td>0</td>\n",
 86 |               "    </tr>\n",
 87 |               "    <tr>\n",
 88 |               "      <th>3</th>\n",
 89 |               "      <td>1</td>\n",
 90 |               "      <td>1</td>\n",
 91 |               "      <td>0</td>\n",
 92 |               "      <td>0</td>\n",
 93 |               "    </tr>\n",
 94 |               "    <tr>\n",
 95 |               "      <th>4</th>\n",
 96 |               "      <td>1</td>\n",
 97 |               "      <td>1</td>\n",
 98 |               "      <td>0</td>\n",
 99 |               "      <td>0</td>\n",
100 |               "    </tr>\n",
101 |               "  </tbody>\n",
102 |               "</table>\n",
103 |               "</div>"
104 |             ],
105 |             "text/plain": [
106 |               "   home  how_it_works  contact  bought\n",
107 |               "0     1             1        0       0\n",
108 |               "1     1             1        0       0\n",
109 |               "2     1             1        0       0\n",
110 |               "3     1             1        0       0\n",
111 |               "4     1             1        0       0"
112 |             ]
113 |           },
114 |           "metadata": {
115 |             "tags": []
116 |           },
117 |           "execution_count": 2
118 |         }
119 |       ]
120 |     },
121 |     {
122 |       "metadata": {
123 |         "id": "uDu0eTJn7x0D",
124 |         "colab_type": "code",
125 |         "colab": {}
126 |       },
127 |       "cell_type": "code",
128 |       "source": [
129 |         "mapa = {\n",
130 |         "    \"home\" : \"principal\",\n",
131 |         "    \"how_it_works\" : \"como_funciona\",\n",
132 |         "    \"contact\" : \"contato\",\n",
133 |         "    \"bought\" : \"comprou\"\n",
134 |         "}\n",
135 |         "dados = dados.rename(columns = mapa)"
136 |       ],
137 |       "execution_count": 0,
138 |       "outputs": []
139 |     },
140 |     {
141 |       "metadata": {
142 |         "id": "9En1V0PM7e8V",
143 |         "colab_type": "code",
144 |         "colab": {
145 |           "base_uri": "https://localhost:8080/",
146 |           "height": 202
147 |         },
148 |         "outputId": "0f54cba9-4f0c-433e-b7a0-b0fa15d39f5a"
149 |       },
150 |       "cell_type": "code",
151 |       "source": [
152 |         "x = dados[[\"principal\",\"como_funciona\",\"contato\"]]\n",
153 |         "x.head()"
154 |       ],
155 |       "execution_count": 10,
156 |       "outputs": [
157 |         {
158 |           "output_type": "execute_result",
159 |           "data": {
160 |             "text/html": [
161 |               "<div>\n",
162 |               "<style scoped>\n",
163 |               "    .dataframe tbody tr th:only-of-type {\n",
164 |               "        vertical-align: middle;\n",
165 |               "    }\n",
166 |               "\n",
167 |               "    .dataframe tbody tr th {\n",
168 |               "        vertical-align: top;\n",
169 |               "    }\n",
170 |               "\n",
171 |               "    .dataframe thead th {\n",
172 |               "        text-align: right;\n",
173 |               "    }\n",
174 |               "</style>\n",
175 |               "<table border=\"1\" class=\"dataframe\">\n",
176 |               "  <thead>\n",
177 |               "    <tr style=\"text-align: right;\">\n",
178 |               "      <th></th>\n",
179 |               "      <th>principal</th>\n",
180 |               "      <th>como_funciona</th>\n",
181 |               "      <th>contato</th>\n",
182 |               "    </tr>\n",
183 |               "  </thead>\n",
184 |               "  <tbody>\n",
185 |               "    <tr>\n",
186 |               "      <th>0</th>\n",
187 |               "      <td>1</td>\n",
188 |               "      <td>1</td>\n",
189 |               "      <td>0</td>\n",
190 |               "    </tr>\n",
191 |               "    <tr>\n",
192 |               "      <th>1</th>\n",
193 |               "      <td>1</td>\n",
194 |               "      <td>1</td>\n",
195 |               "      <td>0</td>\n",
196 |               "    </tr>\n",
197 |               "    <tr>\n",
198 |               "      <th>2</th>\n",
199 |               "      <td>1</td>\n",
200 |               "      <td>1</td>\n",
201 |               "      <td>0</td>\n",
202 |               "    </tr>\n",
203 |               "    <tr>\n",
204 |               "      <th>3</th>\n",
205 |               "      <td>1</td>\n",
206 |               "      <td>1</td>\n",
207 |               "      <td>0</td>\n",
208 |               "    </tr>\n",
209 |               "    <tr>\n",
210 |               "      <th>4</th>\n",
211 |               "      <td>1</td>\n",
212 |               "      <td>1</td>\n",
213 |               "      <td>0</td>\n",
214 |               "    </tr>\n",
215 |               "  </tbody>\n",
216 |               "</table>\n",
217 |               "</div>"
218 |             ],
219 |             "text/plain": [
220 |               "   principal  como_funciona  contato\n",
221 |               "0          1              1        0\n",
222 |               "1          1              1        0\n",
223 |               "2          1              1        0\n",
224 |               "3          1              1        0\n",
225 |               "4          1              1        0"
226 |             ]
227 |           },
228 |           "metadata": {
229 |             "tags": []
230 |           },
231 |           "execution_count": 10
232 |         }
233 |       ]
234 |     },
235 |     {
236 |       "metadata": {
237 |         "id": "qDr2YoWu8O3O",
238 |         "colab_type": "code",
239 |         "colab": {
240 |           "base_uri": "https://localhost:8080/",
241 |           "height": 121
242 |         },
243 |         "outputId": "29495494-aff4-4b5f-b8ae-95ea280f3bce"
244 |       },
245 |       "cell_type": "code",
246 |       "source": [
247 |         "y = dados[\"comprou\"]\n",
248 |         "y.head()"
249 |       ],
250 |       "execution_count": 11,
251 |       "outputs": [
252 |         {
253 |           "output_type": "execute_result",
254 |           "data": {
255 |             "text/plain": [
256 |               "0    0\n",
257 |               "1    0\n",
258 |               "2    0\n",
259 |               "3    0\n",
260 |               "4    0\n",
261 |               "Name: comprou, dtype: int64"
262 |             ]
263 |           },
264 |           "metadata": {
265 |             "tags": []
266 |           },
267 |           "execution_count": 11
268 |         }
269 |       ]
270 |     },
271 |     {
272 |       "metadata": {
273 |         "id": "X5pZ6xcZ8fYq",
274 |         "colab_type": "code",
275 |         "colab": {
276 |           "base_uri": "https://localhost:8080/",
277 |           "height": 35
278 |         },
279 |         "outputId": "a31766ff-52df-4b74-97a7-605916419c87"
280 |       },
281 |       "cell_type": "code",
282 |       "source": [
283 |         "dados.shape"
284 |       ],
285 |       "execution_count": 12,
286 |       "outputs": [
287 |         {
288 |           "output_type": "execute_result",
289 |           "data": {
290 |             "text/plain": [
291 |               "(99, 4)"
292 |             ]
293 |           },
294 |           "metadata": {
295 |             "tags": []
296 |           },
297 |           "execution_count": 12
298 |         }
299 |       ]
300 |     },
301 |     {
302 |       "metadata": {
303 |         "id": "TLZ9eTvP9U9q",
304 |         "colab_type": "code",
305 |         "colab": {
306 |           "base_uri": "https://localhost:8080/",
307 |           "height": 35
308 |         },
309 |         "outputId": "5c392acb-77ff-496c-d8f9-573a8c6414d2"
310 |       },
311 |       "cell_type": "code",
312 |       "source": [
313 |         "treino_x = x[:75]\n",
314 |         "treino_y = y[:75]\n",
315 |         "teste_x = x[75:]\n",
316 |         "teste_y = y[75:]\n",
317 |         "\n",
318 |         "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))"
319 |       ],
320 |       "execution_count": 16,
321 |       "outputs": [
322 |         {
323 |           "output_type": "stream",
324 |           "text": [
325 |             "Treinaremos com 75 elementos e testaremos com 24 elementos\n"
326 |           ],
327 |           "name": "stdout"
328 |         }
329 |       ]
330 |     },
331 |     {
332 |       "metadata": {
333 |         "id": "pZZjbQxh9jn8",
334 |         "colab_type": "code",
335 |         "colab": {
336 |           "base_uri": "https://localhost:8080/",
337 |           "height": 34
338 |         },
339 |         "outputId": "b7feb2a2-2694-4e6a-aa6c-9fb33e25917f"
340 |       },
341 |       "cell_type": "code",
342 |       "source": [
343 |         "from sklearn.svm import LinearSVC\n",
344 |         "from sklearn.metrics import accuracy_score\n",
345 |         "\n",
346 |         "modelo = LinearSVC()\n",
347 |         "modelo.fit(treino_x, treino_y)\n",
348 |         "previsoes = modelo.predict(teste_x)\n",
349 |         "\n",
350 |         "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
351 |         "print(\"A acurácia foi %.2f%%\" % acuracia)"
352 |       ],
353 |       "execution_count": 20,
354 |       "outputs": [
355 |         {
356 |           "output_type": "stream",
357 |           "text": [
358 |             "A acurácia foi 95.83%\n"
359 |           ],
360 |           "name": "stdout"
361 |         }
362 |       ]
363 |     },
364 |     {
365 |       "metadata": {
366 |         "id": "rA-z0_a6-CM1",
367 |         "colab_type": "code",
368 |         "colab": {}
369 |       },
370 |       "cell_type": "code",
371 |       "source": [
372 |         ""
373 |       ],
374 |       "execution_count": 0,
375 |       "outputs": []
376 |     }
377 |   ]
378 | }


--------------------------------------------------------------------------------
/aula2.1/introdução_a_machine_learning_classificação_2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Introdução a Machine Learning Classificação - 2.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/drive/1nIhP3F_nGiAQayvsPziHuEOZva-HvzLn
 8 | """
 9 | 
10 | import pandas as pd
11 | 
12 | uri = "https://gist.githubusercontent.com/guilhermesilveira/2d2efa37d66b6c84a722ea627a897ced/raw/10968b997d885cbded1c92938c7a9912ba41c615/tracking.csv"
13 | dados = pd.read_csv(uri)
14 | dados.head()
15 | 
16 | mapa = {
17 |     "home" : "principal",
18 |     "how_it_works" : "como_funciona",
19 |     "contact" : "contato",
20 |     "bought" : "comprou"
21 | }
22 | dados = dados.rename(columns = mapa)
23 | 
24 | x = dados[["principal","como_funciona","contato"]]
25 | x.head()
26 | 
27 | y = dados["comprou"]
28 | y.head()
29 | 
30 | dados.shape
31 | 
32 | treino_x = x[:75]
33 | treino_y = y[:75]
34 | teste_x = x[75:]
35 | teste_y = y[75:]
36 | 
37 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
38 | 
39 | from sklearn.svm import LinearSVC
40 | from sklearn.metrics import accuracy_score
41 | 
42 | modelo = LinearSVC()
43 | modelo.fit(treino_x, treino_y)
44 | previsoes = modelo.predict(teste_x)
45 | 
46 | acuracia = accuracy_score(teste_y, previsoes) * 100
47 | print("A acurácia foi %.2f%%" % acuracia)
48 | 
49 | 


--------------------------------------------------------------------------------
/aula2.2/Introdução_a_Machine_Learning_Classificação_2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Introdução a Machine Learning Classificação - 2.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": [],
  9 |       "collapsed_sections": []
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "metadata": {
 19 |         "id": "mEIOC06i7QEJ",
 20 |         "colab_type": "code",
 21 |         "colab": {
 22 |           "base_uri": "https://localhost:8080/",
 23 |           "height": 195
 24 |         },
 25 |         "outputId": "f6dbd4b4-8372-44b6-c3d1-92a4e08d83ff"
 26 |       },
 27 |       "cell_type": "code",
 28 |       "source": [
 29 |         "import pandas as pd\n",
 30 |         "\n",
 31 |         "uri = \"https://gist.githubusercontent.com/guilhermesilveira/2d2efa37d66b6c84a722ea627a897ced/raw/10968b997d885cbded1c92938c7a9912ba41c615/tracking.csv\"\n",
 32 |         "dados = pd.read_csv(uri)\n",
 33 |         "dados.head()"
 34 |       ],
 35 |       "execution_count": 2,
 36 |       "outputs": [
 37 |         {
 38 |           "output_type": "execute_result",
 39 |           "data": {
 40 |             "text/html": [
 41 |               "<div>\n",
 42 |               "<style scoped>\n",
 43 |               "    .dataframe tbody tr th:only-of-type {\n",
 44 |               "        vertical-align: middle;\n",
 45 |               "    }\n",
 46 |               "\n",
 47 |               "    .dataframe tbody tr th {\n",
 48 |               "        vertical-align: top;\n",
 49 |               "    }\n",
 50 |               "\n",
 51 |               "    .dataframe thead th {\n",
 52 |               "        text-align: right;\n",
 53 |               "    }\n",
 54 |               "</style>\n",
 55 |               "<table border=\"1\" class=\"dataframe\">\n",
 56 |               "  <thead>\n",
 57 |               "    <tr style=\"text-align: right;\">\n",
 58 |               "      <th></th>\n",
 59 |               "      <th>home</th>\n",
 60 |               "      <th>how_it_works</th>\n",
 61 |               "      <th>contact</th>\n",
 62 |               "      <th>bought</th>\n",
 63 |               "    </tr>\n",
 64 |               "  </thead>\n",
 65 |               "  <tbody>\n",
 66 |               "    <tr>\n",
 67 |               "      <th>0</th>\n",
 68 |               "      <td>1</td>\n",
 69 |               "      <td>1</td>\n",
 70 |               "      <td>0</td>\n",
 71 |               "      <td>0</td>\n",
 72 |               "    </tr>\n",
 73 |               "    <tr>\n",
 74 |               "      <th>1</th>\n",
 75 |               "      <td>1</td>\n",
 76 |               "      <td>1</td>\n",
 77 |               "      <td>0</td>\n",
 78 |               "      <td>0</td>\n",
 79 |               "    </tr>\n",
 80 |               "    <tr>\n",
 81 |               "      <th>2</th>\n",
 82 |               "      <td>1</td>\n",
 83 |               "      <td>1</td>\n",
 84 |               "      <td>0</td>\n",
 85 |               "      <td>0</td>\n",
 86 |               "    </tr>\n",
 87 |               "    <tr>\n",
 88 |               "      <th>3</th>\n",
 89 |               "      <td>1</td>\n",
 90 |               "      <td>1</td>\n",
 91 |               "      <td>0</td>\n",
 92 |               "      <td>0</td>\n",
 93 |               "    </tr>\n",
 94 |               "    <tr>\n",
 95 |               "      <th>4</th>\n",
 96 |               "      <td>1</td>\n",
 97 |               "      <td>1</td>\n",
 98 |               "      <td>0</td>\n",
 99 |               "      <td>0</td>\n",
100 |               "    </tr>\n",
101 |               "  </tbody>\n",
102 |               "</table>\n",
103 |               "</div>"
104 |             ],
105 |             "text/plain": [
106 |               "   home  how_it_works  contact  bought\n",
107 |               "0     1             1        0       0\n",
108 |               "1     1             1        0       0\n",
109 |               "2     1             1        0       0\n",
110 |               "3     1             1        0       0\n",
111 |               "4     1             1        0       0"
112 |             ]
113 |           },
114 |           "metadata": {
115 |             "tags": []
116 |           },
117 |           "execution_count": 2
118 |         }
119 |       ]
120 |     },
121 |     {
122 |       "metadata": {
123 |         "id": "uDu0eTJn7x0D",
124 |         "colab_type": "code",
125 |         "colab": {}
126 |       },
127 |       "cell_type": "code",
128 |       "source": [
129 |         "mapa = {\n",
130 |         "    \"home\" : \"principal\",\n",
131 |         "    \"how_it_works\" : \"como_funciona\",\n",
132 |         "    \"contact\" : \"contato\",\n",
133 |         "    \"bought\" : \"comprou\"\n",
134 |         "}\n",
135 |         "dados = dados.rename(columns = mapa)"
136 |       ],
137 |       "execution_count": 0,
138 |       "outputs": []
139 |     },
140 |     {
141 |       "metadata": {
142 |         "id": "9En1V0PM7e8V",
143 |         "colab_type": "code",
144 |         "colab": {
145 |           "base_uri": "https://localhost:8080/",
146 |           "height": 195
147 |         },
148 |         "outputId": "0f54cba9-4f0c-433e-b7a0-b0fa15d39f5a"
149 |       },
150 |       "cell_type": "code",
151 |       "source": [
152 |         "x = dados[[\"principal\",\"como_funciona\",\"contato\"]]\n",
153 |         "x.head()"
154 |       ],
155 |       "execution_count": 10,
156 |       "outputs": [
157 |         {
158 |           "output_type": "execute_result",
159 |           "data": {
160 |             "text/html": [
161 |               "<div>\n",
162 |               "<style scoped>\n",
163 |               "    .dataframe tbody tr th:only-of-type {\n",
164 |               "        vertical-align: middle;\n",
165 |               "    }\n",
166 |               "\n",
167 |               "    .dataframe tbody tr th {\n",
168 |               "        vertical-align: top;\n",
169 |               "    }\n",
170 |               "\n",
171 |               "    .dataframe thead th {\n",
172 |               "        text-align: right;\n",
173 |               "    }\n",
174 |               "</style>\n",
175 |               "<table border=\"1\" class=\"dataframe\">\n",
176 |               "  <thead>\n",
177 |               "    <tr style=\"text-align: right;\">\n",
178 |               "      <th></th>\n",
179 |               "      <th>principal</th>\n",
180 |               "      <th>como_funciona</th>\n",
181 |               "      <th>contato</th>\n",
182 |               "    </tr>\n",
183 |               "  </thead>\n",
184 |               "  <tbody>\n",
185 |               "    <tr>\n",
186 |               "      <th>0</th>\n",
187 |               "      <td>1</td>\n",
188 |               "      <td>1</td>\n",
189 |               "      <td>0</td>\n",
190 |               "    </tr>\n",
191 |               "    <tr>\n",
192 |               "      <th>1</th>\n",
193 |               "      <td>1</td>\n",
194 |               "      <td>1</td>\n",
195 |               "      <td>0</td>\n",
196 |               "    </tr>\n",
197 |               "    <tr>\n",
198 |               "      <th>2</th>\n",
199 |               "      <td>1</td>\n",
200 |               "      <td>1</td>\n",
201 |               "      <td>0</td>\n",
202 |               "    </tr>\n",
203 |               "    <tr>\n",
204 |               "      <th>3</th>\n",
205 |               "      <td>1</td>\n",
206 |               "      <td>1</td>\n",
207 |               "      <td>0</td>\n",
208 |               "    </tr>\n",
209 |               "    <tr>\n",
210 |               "      <th>4</th>\n",
211 |               "      <td>1</td>\n",
212 |               "      <td>1</td>\n",
213 |               "      <td>0</td>\n",
214 |               "    </tr>\n",
215 |               "  </tbody>\n",
216 |               "</table>\n",
217 |               "</div>"
218 |             ],
219 |             "text/plain": [
220 |               "   principal  como_funciona  contato\n",
221 |               "0          1              1        0\n",
222 |               "1          1              1        0\n",
223 |               "2          1              1        0\n",
224 |               "3          1              1        0\n",
225 |               "4          1              1        0"
226 |             ]
227 |           },
228 |           "metadata": {
229 |             "tags": []
230 |           },
231 |           "execution_count": 10
232 |         }
233 |       ]
234 |     },
235 |     {
236 |       "metadata": {
237 |         "id": "qDr2YoWu8O3O",
238 |         "colab_type": "code",
239 |         "colab": {
240 |           "base_uri": "https://localhost:8080/",
241 |           "height": 118
242 |         },
243 |         "outputId": "29495494-aff4-4b5f-b8ae-95ea280f3bce"
244 |       },
245 |       "cell_type": "code",
246 |       "source": [
247 |         "y = dados[\"comprou\"]\n",
248 |         "y.head()"
249 |       ],
250 |       "execution_count": 11,
251 |       "outputs": [
252 |         {
253 |           "output_type": "execute_result",
254 |           "data": {
255 |             "text/plain": [
256 |               "0    0\n",
257 |               "1    0\n",
258 |               "2    0\n",
259 |               "3    0\n",
260 |               "4    0\n",
261 |               "Name: comprou, dtype: int64"
262 |             ]
263 |           },
264 |           "metadata": {
265 |             "tags": []
266 |           },
267 |           "execution_count": 11
268 |         }
269 |       ]
270 |     },
271 |     {
272 |       "metadata": {
273 |         "id": "X5pZ6xcZ8fYq",
274 |         "colab_type": "code",
275 |         "colab": {
276 |           "base_uri": "https://localhost:8080/",
277 |           "height": 34
278 |         },
279 |         "outputId": "a31766ff-52df-4b74-97a7-605916419c87"
280 |       },
281 |       "cell_type": "code",
282 |       "source": [
283 |         "dados.shape"
284 |       ],
285 |       "execution_count": 12,
286 |       "outputs": [
287 |         {
288 |           "output_type": "execute_result",
289 |           "data": {
290 |             "text/plain": [
291 |               "(99, 4)"
292 |             ]
293 |           },
294 |           "metadata": {
295 |             "tags": []
296 |           },
297 |           "execution_count": 12
298 |         }
299 |       ]
300 |     },
301 |     {
302 |       "metadata": {
303 |         "id": "TLZ9eTvP9U9q",
304 |         "colab_type": "code",
305 |         "colab": {
306 |           "base_uri": "https://localhost:8080/",
307 |           "height": 34
308 |         },
309 |         "outputId": "5c392acb-77ff-496c-d8f9-573a8c6414d2"
310 |       },
311 |       "cell_type": "code",
312 |       "source": [
313 |         "treino_x = x[:75]\n",
314 |         "treino_y = y[:75]\n",
315 |         "teste_x = x[75:]\n",
316 |         "teste_y = y[75:]\n",
317 |         "\n",
318 |         "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))"
319 |       ],
320 |       "execution_count": 16,
321 |       "outputs": [
322 |         {
323 |           "output_type": "stream",
324 |           "text": [
325 |             "Treinaremos com 75 elementos e testaremos com 24 elementos\n"
326 |           ],
327 |           "name": "stdout"
328 |         }
329 |       ]
330 |     },
331 |     {
332 |       "metadata": {
333 |         "id": "pZZjbQxh9jn8",
334 |         "colab_type": "code",
335 |         "colab": {
336 |           "base_uri": "https://localhost:8080/",
337 |           "height": 34
338 |         },
339 |         "outputId": "b7feb2a2-2694-4e6a-aa6c-9fb33e25917f"
340 |       },
341 |       "cell_type": "code",
342 |       "source": [
343 |         "from sklearn.svm import LinearSVC\n",
344 |         "from sklearn.metrics import accuracy_score\n",
345 |         "\n",
346 |         "modelo = LinearSVC()\n",
347 |         "modelo.fit(treino_x, treino_y)\n",
348 |         "previsoes = modelo.predict(teste_x)\n",
349 |         "\n",
350 |         "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
351 |         "print(\"A acurácia foi %.2f%%\" % acuracia)"
352 |       ],
353 |       "execution_count": 20,
354 |       "outputs": [
355 |         {
356 |           "output_type": "stream",
357 |           "text": [
358 |             "A acurácia foi 95.83%\n"
359 |           ],
360 |           "name": "stdout"
361 |         }
362 |       ]
363 |     },
364 |     {
365 |       "metadata": {
366 |         "id": "2iVcuGkyA5tK",
367 |         "colab_type": "text"
368 |       },
369 |       "cell_type": "markdown",
370 |       "source": [
371 |         "# Usando a biblioteca para separar treino e teste"
372 |       ]
373 |     },
374 |     {
375 |       "metadata": {
376 |         "id": "rA-z0_a6-CM1",
377 |         "colab_type": "code",
378 |         "colab": {
379 |           "base_uri": "https://localhost:8080/",
380 |           "height": 50
381 |         },
382 |         "outputId": "f3287dba-50a8-4cd8-9001-ce41278c8bb1"
383 |       },
384 |       "cell_type": "code",
385 |       "source": [
386 |         "from sklearn.model_selection import train_test_split\n",
387 |         "from sklearn.svm import LinearSVC\n",
388 |         "from sklearn.metrics import accuracy_score\n",
389 |         "\n",
390 |         "SEED = 20\n",
391 |         "\n",
392 |         "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, random_state = SEED, test_size = 0.25)\n",
393 |         "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
394 |         "\n",
395 |         "modelo = LinearSVC()\n",
396 |         "modelo.fit(treino_x, treino_y)\n",
397 |         "previsoes = modelo.predict(teste_x)\n",
398 |         "\n",
399 |         "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
400 |         "print(\"A acurácia foi %.2f%%\" % acuracia)"
401 |       ],
402 |       "execution_count": 35,
403 |       "outputs": [
404 |         {
405 |           "output_type": "stream",
406 |           "text": [
407 |             "Treinaremos com 74 elementos e testaremos com 25 elementos\n",
408 |             "A acurácia foi 96.00%\n"
409 |           ],
410 |           "name": "stdout"
411 |         }
412 |       ]
413 |     },
414 |     {
415 |       "metadata": {
416 |         "id": "JWFKlQccAk1F",
417 |         "colab_type": "code",
418 |         "colab": {
419 |           "base_uri": "https://localhost:8080/",
420 |           "height": 68
421 |         },
422 |         "outputId": "749d52d7-a4b3-488b-a7ad-01ab66793ef7"
423 |       },
424 |       "cell_type": "code",
425 |       "source": [
426 |         "treino_y.value_counts()"
427 |       ],
428 |       "execution_count": 37,
429 |       "outputs": [
430 |         {
431 |           "output_type": "execute_result",
432 |           "data": {
433 |             "text/plain": [
434 |               "0    47\n",
435 |               "1    27\n",
436 |               "Name: comprou, dtype: int64"
437 |             ]
438 |           },
439 |           "metadata": {
440 |             "tags": []
441 |           },
442 |           "execution_count": 37
443 |         }
444 |       ]
445 |     },
446 |     {
447 |       "metadata": {
448 |         "id": "fdORezxWBcwX",
449 |         "colab_type": "code",
450 |         "colab": {
451 |           "base_uri": "https://localhost:8080/",
452 |           "height": 68
453 |         },
454 |         "outputId": "7ce6be19-5f39-498a-cf61-e76af3990271"
455 |       },
456 |       "cell_type": "code",
457 |       "source": [
458 |         "teste_y.value_counts()"
459 |       ],
460 |       "execution_count": 38,
461 |       "outputs": [
462 |         {
463 |           "output_type": "execute_result",
464 |           "data": {
465 |             "text/plain": [
466 |               "0    19\n",
467 |               "1     6\n",
468 |               "Name: comprou, dtype: int64"
469 |             ]
470 |           },
471 |           "metadata": {
472 |             "tags": []
473 |           },
474 |           "execution_count": 38
475 |         }
476 |       ]
477 |     },
478 |     {
479 |       "metadata": {
480 |         "id": "k_kFDoBABh4B",
481 |         "colab_type": "code",
482 |         "colab": {
483 |           "base_uri": "https://localhost:8080/",
484 |           "height": 51
485 |         },
486 |         "outputId": "ba8b4464-305a-4e94-9422-445600888353"
487 |       },
488 |       "cell_type": "code",
489 |       "source": [
490 |         "from sklearn.model_selection import train_test_split\n",
491 |         "from sklearn.svm import LinearSVC\n",
492 |         "from sklearn.metrics import accuracy_score\n",
493 |         "\n",
494 |         "SEED = 20\n",
495 |         "\n",
496 |         "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y,\n",
497 |         "                                                         random_state = SEED, test_size = 0.25,\n",
498 |         "                                                         stratify = y)\n",
499 |         "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
500 |         "\n",
501 |         "modelo = LinearSVC()\n",
502 |         "modelo.fit(treino_x, treino_y)\n",
503 |         "previsoes = modelo.predict(teste_x)\n",
504 |         "\n",
505 |         "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
506 |         "print(\"A acurácia foi %.2f%%\" % acuracia)"
507 |       ],
508 |       "execution_count": 41,
509 |       "outputs": [
510 |         {
511 |           "output_type": "stream",
512 |           "text": [
513 |             "Treinaremos com 74 elementos e testaremos com 25 elementos\n",
514 |             "A acurácia foi 96.00%\n"
515 |           ],
516 |           "name": "stdout"
517 |         }
518 |       ]
519 |     },
520 |     {
521 |       "metadata": {
522 |         "id": "SS4n0CVXB6Fo",
523 |         "colab_type": "code",
524 |         "colab": {
525 |           "base_uri": "https://localhost:8080/",
526 |           "height": 67
527 |         },
528 |         "outputId": "60d3e7e9-4a79-4d96-f3f5-6ef1a843519a"
529 |       },
530 |       "cell_type": "code",
531 |       "source": [
532 |         "treino_y.value_counts()"
533 |       ],
534 |       "execution_count": 42,
535 |       "outputs": [
536 |         {
537 |           "output_type": "execute_result",
538 |           "data": {
539 |             "text/plain": [
540 |               "0    49\n",
541 |               "1    25\n",
542 |               "Name: comprou, dtype: int64"
543 |             ]
544 |           },
545 |           "metadata": {
546 |             "tags": []
547 |           },
548 |           "execution_count": 42
549 |         }
550 |       ]
551 |     },
552 |     {
553 |       "metadata": {
554 |         "id": "hgvSvos6CHIk",
555 |         "colab_type": "code",
556 |         "colab": {
557 |           "base_uri": "https://localhost:8080/",
558 |           "height": 67
559 |         },
560 |         "outputId": "8ec62784-d5a3-4197-81fb-accc0632bdf8"
561 |       },
562 |       "cell_type": "code",
563 |       "source": [
564 |         "teste_y.value_counts()"
565 |       ],
566 |       "execution_count": 43,
567 |       "outputs": [
568 |         {
569 |           "output_type": "execute_result",
570 |           "data": {
571 |             "text/plain": [
572 |               "0    17\n",
573 |               "1     8\n",
574 |               "Name: comprou, dtype: int64"
575 |             ]
576 |           },
577 |           "metadata": {
578 |             "tags": []
579 |           },
580 |           "execution_count": 43
581 |         }
582 |       ]
583 |     },
584 |     {
585 |       "metadata": {
586 |         "id": "bqhIUWBsCH8w",
587 |         "colab_type": "code",
588 |         "colab": {}
589 |       },
590 |       "cell_type": "code",
591 |       "source": [
592 |         ""
593 |       ],
594 |       "execution_count": 0,
595 |       "outputs": []
596 |     }
597 |   ]
598 | }


--------------------------------------------------------------------------------
/aula2.2/introdução_a_machine_learning_classificação_2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Introdução a Machine Learning Classificação - 2.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/drive/1nIhP3F_nGiAQayvsPziHuEOZva-HvzLn
 8 | """
 9 | 
10 | import pandas as pd
11 | 
12 | uri = "https://gist.githubusercontent.com/guilhermesilveira/2d2efa37d66b6c84a722ea627a897ced/raw/10968b997d885cbded1c92938c7a9912ba41c615/tracking.csv"
13 | dados = pd.read_csv(uri)
14 | dados.head()
15 | 
16 | mapa = {
17 |     "home" : "principal",
18 |     "how_it_works" : "como_funciona",
19 |     "contact" : "contato",
20 |     "bought" : "comprou"
21 | }
22 | dados = dados.rename(columns = mapa)
23 | 
24 | x = dados[["principal","como_funciona","contato"]]
25 | x.head()
26 | 
27 | y = dados["comprou"]
28 | y.head()
29 | 
30 | dados.shape
31 | 
32 | treino_x = x[:75]
33 | treino_y = y[:75]
34 | teste_x = x[75:]
35 | teste_y = y[75:]
36 | 
37 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
38 | 
39 | from sklearn.svm import LinearSVC
40 | from sklearn.metrics import accuracy_score
41 | 
42 | modelo = LinearSVC()
43 | modelo.fit(treino_x, treino_y)
44 | previsoes = modelo.predict(teste_x)
45 | 
46 | acuracia = accuracy_score(teste_y, previsoes) * 100
47 | print("A acurácia foi %.2f%%" % acuracia)
48 | 
49 | """# Usando a biblioteca para separar treino e teste"""
50 | 
51 | from sklearn.model_selection import train_test_split
52 | from sklearn.svm import LinearSVC
53 | from sklearn.metrics import accuracy_score
54 | 
55 | SEED = 20
56 | 
57 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, random_state = SEED, test_size = 0.25)
58 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
59 | 
60 | modelo = LinearSVC()
61 | modelo.fit(treino_x, treino_y)
62 | previsoes = modelo.predict(teste_x)
63 | 
64 | acuracia = accuracy_score(teste_y, previsoes) * 100
65 | print("A acurácia foi %.2f%%" % acuracia)
66 | 
67 | treino_y.value_counts()
68 | 
69 | teste_y.value_counts()
70 | 
71 | from sklearn.model_selection import train_test_split
72 | from sklearn.svm import LinearSVC
73 | from sklearn.metrics import accuracy_score
74 | 
75 | SEED = 20
76 | 
77 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y,
78 |                                                          random_state = SEED, test_size = 0.25,
79 |                                                          stratify = y)
80 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
81 | 
82 | modelo = LinearSVC()
83 | modelo.fit(treino_x, treino_y)
84 | previsoes = modelo.predict(teste_x)
85 | 
86 | acuracia = accuracy_score(teste_y, previsoes) * 100
87 | print("A acurácia foi %.2f%%" % acuracia)
88 | 
89 | treino_y.value_counts()
90 | 
91 | teste_y.value_counts()
92 | 
93 | 


--------------------------------------------------------------------------------
/aula3.1/introdução_a_machine_learning_3 (1).py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Introdução a Machine Learning 3.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/drive/1r4UlftWbCZA3w-glDhPlo1TUK-Pf6-Sa
 8 | """
 9 | 
10 | !pip install seaborn==0.9.0
11 | 
12 | import pandas as pd
13 | 
14 | uri = "https://gist.githubusercontent.com/guilhermesilveira/1b7d5475863c15f484ac495bd70975cf/raw/16aff7a0aee67e7c100a2a48b676a2d2d142f646/projects.csv"
15 | dados = pd.read_csv(uri)
16 | dados.head()
17 | 
18 | a_renomear = {
19 |     'expected_hours' : 'horas_esperadas',
20 |     'price' : 'preco',
21 |     'unfinished' : 'nao_finalizado'
22 | }
23 | dados = dados.rename(columns = a_renomear)
24 | dados.head()
25 | 
26 | troca = {
27 |     0 : 1,
28 |     1 : 0
29 | }
30 | dados['finalizado'] = dados.nao_finalizado.map(troca)
31 | dados.head()
32 | 
33 | dados.tail()
34 | 
35 | import seaborn as sns
36 | 
37 | sns.scatterplot(x="horas_esperadas", y="preco", data=dados)
38 | 
39 | sns.scatterplot(x="horas_esperadas", y="preco", hue="finalizado", data=dados)
40 | 
41 | sns.relplot(x="horas_esperadas", y="preco", hue="finalizado", col="finalizado", data=dados)
42 | 
43 | x = dados[['horas_esperadas', 'preco']]
44 | y = dados['finalizado']
45 | 
46 | from sklearn.model_selection import train_test_split
47 | from sklearn.svm import LinearSVC
48 | from sklearn.metrics import accuracy_score
49 | 
50 | SEED = 20
51 | 
52 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y,
53 |                                                          random_state = SEED, test_size = 0.25,
54 |                                                          stratify = y)
55 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
56 | 
57 | modelo = LinearSVC()
58 | modelo.fit(treino_x, treino_y)
59 | previsoes = modelo.predict(teste_x)
60 | 
61 | acuracia = accuracy_score(teste_y, previsoes) * 100
62 | print("A acurácia foi %.2f%%" % acuracia)
63 | 
64 | import numpy as np
65 | previsoes_de_base = np.ones(540)
66 | acuracia = accuracy_score(teste_y, previsoes_de_base) * 100
67 | print("A acurácia do algoritmo de baseline foi %.2f%%" % acuracia)
68 | 
69 | 


--------------------------------------------------------------------------------
/aula4.1/introdução_a_machine_learning_3.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Introdução a Machine Learning 3.ipynb
  3 | 
  4 | Automatically generated by Colaboratory.
  5 | 
  6 | Original file is located at
  7 |     https://colab.research.google.com/drive/1r4UlftWbCZA3w-glDhPlo1TUK-Pf6-Sa
  8 | """
  9 | 
 10 | !pip install seaborn==0.9.0
 11 | 
 12 | import pandas as pd
 13 | 
 14 | uri = "https://gist.githubusercontent.com/guilhermesilveira/1b7d5475863c15f484ac495bd70975cf/raw/16aff7a0aee67e7c100a2a48b676a2d2d142f646/projects.csv"
 15 | dados = pd.read_csv(uri)
 16 | dados.head()
 17 | 
 18 | a_renomear = {
 19 |     'expected_hours' : 'horas_esperadas',
 20 |     'price' : 'preco',
 21 |     'unfinished' : 'nao_finalizado'
 22 | }
 23 | dados = dados.rename(columns = a_renomear)
 24 | dados.head()
 25 | 
 26 | troca = {
 27 |     0 : 1,
 28 |     1 : 0
 29 | }
 30 | dados['finalizado'] = dados.nao_finalizado.map(troca)
 31 | dados.head()
 32 | 
 33 | dados.tail()
 34 | 
 35 | import seaborn as sns
 36 | 
 37 | sns.scatterplot(x="horas_esperadas", y="preco", data=dados)
 38 | 
 39 | sns.scatterplot(x="horas_esperadas", y="preco", hue="finalizado", data=dados)
 40 | 
 41 | sns.relplot(x="horas_esperadas", y="preco", hue="finalizado", col="finalizado", data=dados)
 42 | 
 43 | x = dados[['horas_esperadas', 'preco']]
 44 | y = dados['finalizado']
 45 | 
 46 | from sklearn.model_selection import train_test_split
 47 | from sklearn.svm import LinearSVC
 48 | from sklearn.metrics import accuracy_score
 49 | 
 50 | SEED = 5
 51 | np.random.seed(SEED)
 52 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
 53 |                                                          stratify = y)
 54 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
 55 | 
 56 | modelo = LinearSVC()
 57 | modelo.fit(treino_x, treino_y)
 58 | previsoes = modelo.predict(teste_x)
 59 | 
 60 | acuracia = accuracy_score(teste_y, previsoes) * 100
 61 | print("A acurácia foi %.2f%%" % acuracia)
 62 | 
 63 | import numpy as np
 64 | previsoes_de_base = np.ones(540)
 65 | acuracia = accuracy_score(teste_y, previsoes_de_base) * 100
 66 | print("A acurácia do algoritmo de baseline foi %.2f%%" % acuracia)
 67 | 
 68 | sns.scatterplot(x="horas_esperadas", y="preco", hue=teste_y, data=teste_x)
 69 | 
 70 | x_min = teste_x.horas_esperadas.min()
 71 | x_max = teste_x.horas_esperadas.max()
 72 | y_min = teste_x.preco.min()
 73 | y_max = teste_x.preco.max()
 74 | print(x_min, x_max,y_min,y_max)
 75 | 
 76 | pixels = 100
 77 | eixo_x = np.arange(x_min, x_max, (x_max - x_min) / pixels)
 78 | eixo_y = np.arange(y_min, y_max, (y_max - y_min) / pixels)
 79 | 
 80 | xx, yy = np.meshgrid(eixo_x, eixo_y)
 81 | pontos = np.c_[xx.ravel(), yy.ravel()]
 82 | pontos
 83 | 
 84 | Z = modelo.predict(pontos)
 85 | Z = Z.reshape(xx.shape)
 86 | Z
 87 | 
 88 | import matplotlib.pyplot as plt
 89 | 
 90 | plt.contourf(xx, yy, Z, alpha=0.3)
 91 | plt.scatter(teste_x.horas_esperadas, teste_x.preco, c=teste_y, s=1)
 92 | 
 93 | # DECISION BOUNDARY
 94 | 
 95 | from sklearn.model_selection import train_test_split
 96 | from sklearn.svm import SVC
 97 | from sklearn.metrics import accuracy_score
 98 | 
 99 | SEED = 5
100 | np.random.seed(SEED)
101 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
102 |                                                          stratify = y)
103 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
104 | 
105 | modelo = SVC()
106 | modelo.fit(treino_x, treino_y)
107 | previsoes = modelo.predict(teste_x)
108 | 
109 | acuracia = accuracy_score(teste_y, previsoes) * 100
110 | print("A acurácia foi %.2f%%" % acuracia)
111 | 
112 | x_min = teste_x.horas_esperadas.min()
113 | x_max = teste_x.horas_esperadas.max()
114 | y_min = teste_x.preco.min()
115 | y_max = teste_x.preco.max()
116 | 
117 | pixels = 100
118 | eixo_x = np.arange(x_min, x_max, (x_max - x_min) / pixels)
119 | eixo_y = np.arange(y_min, y_max, (y_max - y_min) / pixels)
120 | 
121 | xx, yy = np.meshgrid(eixo_x, eixo_y)
122 | pontos = np.c_[xx.ravel(), yy.ravel()]
123 | 
124 | Z = modelo.predict(pontos)
125 | Z = Z.reshape(xx.shape)
126 | 
127 | import matplotlib.pyplot as plt
128 | 
129 | plt.contourf(xx, yy, Z, alpha=0.3)
130 | plt.scatter(teste_x.horas_esperadas, teste_x.preco, c=teste_y, s=1)
131 | 
132 | # DECISION BOUNDARY
133 | 
134 | from sklearn.preprocessing import StandardScaler
135 | from sklearn.model_selection import train_test_split
136 | from sklearn.svm import SVC
137 | from sklearn.metrics import accuracy_score
138 | 
139 | SEED = 5
140 | np.random.seed(SEED)
141 | raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
142 |                                                          stratify = y)
143 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
144 | 
145 | scaler = StandardScaler()
146 | scaler.fit(raw_treino_x)
147 | treino_x = scaler.transform(raw_treino_x)
148 | teste_x = scaler.transform(raw_teste_x)
149 | 
150 | modelo = SVC()
151 | modelo.fit(treino_x, treino_y)
152 | previsoes = modelo.predict(teste_x)
153 | 
154 | acuracia = accuracy_score(teste_y, previsoes) * 100
155 | print("A acurácia foi %.2f%%" % acuracia)
156 | 
157 | treino_x
158 | 
159 | data_x = teste_x[:,0]
160 | data_y = teste_x[:,1]
161 | 
162 | x_min = data_x.min()
163 | x_max = data_x.max()
164 | y_min = data_y.min()
165 | y_max = data_y.max()
166 | 
167 | pixels = 100
168 | eixo_x = np.arange(x_min, x_max, (x_max - x_min) / pixels)
169 | eixo_y = np.arange(y_min, y_max, (y_max - y_min) / pixels)
170 | 
171 | xx, yy = np.meshgrid(eixo_x, eixo_y)
172 | pontos = np.c_[xx.ravel(), yy.ravel()]
173 | 
174 | Z = modelo.predict(pontos)
175 | Z = Z.reshape(xx.shape)
176 | 
177 | import matplotlib.pyplot as plt
178 | 
179 | plt.contourf(xx, yy, Z, alpha=0.3)
180 | plt.scatter(data_x, data_y, c=teste_y, s=1)
181 | 
182 | # DECISION BOUNDARY
183 | 
184 | 


--------------------------------------------------------------------------------
/aula5.1/Introdução_a_Machine_Learning_4.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Introdução a Machine Learning - 4.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": [],
  9 |       "collapsed_sections": []
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "metadata": {
 19 |         "id": "y7ik04NlDZMA",
 20 |         "colab_type": "code",
 21 |         "colab": {
 22 |           "base_uri": "https://localhost:8080/",
 23 |           "height": 195
 24 |         },
 25 |         "outputId": "0bdc8cf3-ab12-4a1c-83e4-3691f646e930"
 26 |       },
 27 |       "cell_type": "code",
 28 |       "source": [
 29 |         "import pandas as pd\n",
 30 |         "\n",
 31 |         "uri = \"https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv\"\n",
 32 |         "dados = pd.read_csv(uri)\n",
 33 |         "dados.head()"
 34 |       ],
 35 |       "execution_count": 4,
 36 |       "outputs": [
 37 |         {
 38 |           "output_type": "execute_result",
 39 |           "data": {
 40 |             "text/html": [
 41 |               "<div>\n",
 42 |               "<style scoped>\n",
 43 |               "    .dataframe tbody tr th:only-of-type {\n",
 44 |               "        vertical-align: middle;\n",
 45 |               "    }\n",
 46 |               "\n",
 47 |               "    .dataframe tbody tr th {\n",
 48 |               "        vertical-align: top;\n",
 49 |               "    }\n",
 50 |               "\n",
 51 |               "    .dataframe thead th {\n",
 52 |               "        text-align: right;\n",
 53 |               "    }\n",
 54 |               "</style>\n",
 55 |               "<table border=\"1\" class=\"dataframe\">\n",
 56 |               "  <thead>\n",
 57 |               "    <tr style=\"text-align: right;\">\n",
 58 |               "      <th></th>\n",
 59 |               "      <th>Unnamed: 0</th>\n",
 60 |               "      <th>mileage_per_year</th>\n",
 61 |               "      <th>model_year</th>\n",
 62 |               "      <th>price</th>\n",
 63 |               "      <th>sold</th>\n",
 64 |               "    </tr>\n",
 65 |               "  </thead>\n",
 66 |               "  <tbody>\n",
 67 |               "    <tr>\n",
 68 |               "      <th>0</th>\n",
 69 |               "      <td>0</td>\n",
 70 |               "      <td>21801</td>\n",
 71 |               "      <td>2000</td>\n",
 72 |               "      <td>30941.02</td>\n",
 73 |               "      <td>yes</td>\n",
 74 |               "    </tr>\n",
 75 |               "    <tr>\n",
 76 |               "      <th>1</th>\n",
 77 |               "      <td>1</td>\n",
 78 |               "      <td>7843</td>\n",
 79 |               "      <td>1998</td>\n",
 80 |               "      <td>40557.96</td>\n",
 81 |               "      <td>yes</td>\n",
 82 |               "    </tr>\n",
 83 |               "    <tr>\n",
 84 |               "      <th>2</th>\n",
 85 |               "      <td>2</td>\n",
 86 |               "      <td>7109</td>\n",
 87 |               "      <td>2006</td>\n",
 88 |               "      <td>89627.50</td>\n",
 89 |               "      <td>no</td>\n",
 90 |               "    </tr>\n",
 91 |               "    <tr>\n",
 92 |               "      <th>3</th>\n",
 93 |               "      <td>3</td>\n",
 94 |               "      <td>26823</td>\n",
 95 |               "      <td>2015</td>\n",
 96 |               "      <td>95276.14</td>\n",
 97 |               "      <td>no</td>\n",
 98 |               "    </tr>\n",
 99 |               "    <tr>\n",
100 |               "      <th>4</th>\n",
101 |               "      <td>4</td>\n",
102 |               "      <td>7935</td>\n",
103 |               "      <td>2014</td>\n",
104 |               "      <td>117384.68</td>\n",
105 |               "      <td>yes</td>\n",
106 |               "    </tr>\n",
107 |               "  </tbody>\n",
108 |               "</table>\n",
109 |               "</div>"
110 |             ],
111 |             "text/plain": [
112 |               "   Unnamed: 0  mileage_per_year  model_year      price sold\n",
113 |               "0           0             21801        2000   30941.02  yes\n",
114 |               "1           1              7843        1998   40557.96  yes\n",
115 |               "2           2              7109        2006   89627.50   no\n",
116 |               "3           3             26823        2015   95276.14   no\n",
117 |               "4           4              7935        2014  117384.68  yes"
118 |             ]
119 |           },
120 |           "metadata": {
121 |             "tags": []
122 |           },
123 |           "execution_count": 4
124 |         }
125 |       ]
126 |     },
127 |     {
128 |       "metadata": {
129 |         "id": "FqFrPmgJDhvM",
130 |         "colab_type": "code",
131 |         "colab": {
132 |           "base_uri": "https://localhost:8080/",
133 |           "height": 195
134 |         },
135 |         "outputId": "2c6b015c-4c70-461a-a5be-a1a5f225a5f7"
136 |       },
137 |       "cell_type": "code",
138 |       "source": [
139 |         "a_renomear = {\n",
140 |         "    'mileage_per_year' : 'milhas_por_ano',\n",
141 |         "    'model_year' : 'ano_do_modelo',\n",
142 |         "    'price' : 'preco',\n",
143 |         "    'sold' : 'vendido'\n",
144 |         "}\n",
145 |         "dados = dados.rename(columns=a_renomear)\n",
146 |         "dados.head()"
147 |       ],
148 |       "execution_count": 6,
149 |       "outputs": [
150 |         {
151 |           "output_type": "execute_result",
152 |           "data": {
153 |             "text/html": [
154 |               "<div>\n",
155 |               "<style scoped>\n",
156 |               "    .dataframe tbody tr th:only-of-type {\n",
157 |               "        vertical-align: middle;\n",
158 |               "    }\n",
159 |               "\n",
160 |               "    .dataframe tbody tr th {\n",
161 |               "        vertical-align: top;\n",
162 |               "    }\n",
163 |               "\n",
164 |               "    .dataframe thead th {\n",
165 |               "        text-align: right;\n",
166 |               "    }\n",
167 |               "</style>\n",
168 |               "<table border=\"1\" class=\"dataframe\">\n",
169 |               "  <thead>\n",
170 |               "    <tr style=\"text-align: right;\">\n",
171 |               "      <th></th>\n",
172 |               "      <th>Unnamed: 0</th>\n",
173 |               "      <th>milhas_por_ano</th>\n",
174 |               "      <th>ano_do_modelo</th>\n",
175 |               "      <th>preco</th>\n",
176 |               "      <th>vendido</th>\n",
177 |               "    </tr>\n",
178 |               "  </thead>\n",
179 |               "  <tbody>\n",
180 |               "    <tr>\n",
181 |               "      <th>0</th>\n",
182 |               "      <td>0</td>\n",
183 |               "      <td>21801</td>\n",
184 |               "      <td>2000</td>\n",
185 |               "      <td>30941.02</td>\n",
186 |               "      <td>yes</td>\n",
187 |               "    </tr>\n",
188 |               "    <tr>\n",
189 |               "      <th>1</th>\n",
190 |               "      <td>1</td>\n",
191 |               "      <td>7843</td>\n",
192 |               "      <td>1998</td>\n",
193 |               "      <td>40557.96</td>\n",
194 |               "      <td>yes</td>\n",
195 |               "    </tr>\n",
196 |               "    <tr>\n",
197 |               "      <th>2</th>\n",
198 |               "      <td>2</td>\n",
199 |               "      <td>7109</td>\n",
200 |               "      <td>2006</td>\n",
201 |               "      <td>89627.50</td>\n",
202 |               "      <td>no</td>\n",
203 |               "    </tr>\n",
204 |               "    <tr>\n",
205 |               "      <th>3</th>\n",
206 |               "      <td>3</td>\n",
207 |               "      <td>26823</td>\n",
208 |               "      <td>2015</td>\n",
209 |               "      <td>95276.14</td>\n",
210 |               "      <td>no</td>\n",
211 |               "    </tr>\n",
212 |               "    <tr>\n",
213 |               "      <th>4</th>\n",
214 |               "      <td>4</td>\n",
215 |               "      <td>7935</td>\n",
216 |               "      <td>2014</td>\n",
217 |               "      <td>117384.68</td>\n",
218 |               "      <td>yes</td>\n",
219 |               "    </tr>\n",
220 |               "  </tbody>\n",
221 |               "</table>\n",
222 |               "</div>"
223 |             ],
224 |             "text/plain": [
225 |               "   Unnamed: 0  milhas_por_ano  ano_do_modelo      preco vendido\n",
226 |               "0           0           21801           2000   30941.02     yes\n",
227 |               "1           1            7843           1998   40557.96     yes\n",
228 |               "2           2            7109           2006   89627.50      no\n",
229 |               "3           3           26823           2015   95276.14      no\n",
230 |               "4           4            7935           2014  117384.68     yes"
231 |             ]
232 |           },
233 |           "metadata": {
234 |             "tags": []
235 |           },
236 |           "execution_count": 6
237 |         }
238 |       ]
239 |     },
240 |     {
241 |       "metadata": {
242 |         "id": "J31fUGbVEKpW",
243 |         "colab_type": "code",
244 |         "colab": {
245 |           "base_uri": "https://localhost:8080/",
246 |           "height": 195
247 |         },
248 |         "outputId": "f25f070a-ee26-40ce-9b02-7e37aecc7b2f"
249 |       },
250 |       "cell_type": "code",
251 |       "source": [
252 |         "a_trocar = {\n",
253 |         "    'no' : 0,\n",
254 |         "    'yes' : 1\n",
255 |         "}\n",
256 |         "dados.vendido = dados.vendido.map(a_trocar)\n",
257 |         "dados.head()"
258 |       ],
259 |       "execution_count": 9,
260 |       "outputs": [
261 |         {
262 |           "output_type": "execute_result",
263 |           "data": {
264 |             "text/html": [
265 |               "<div>\n",
266 |               "<style scoped>\n",
267 |               "    .dataframe tbody tr th:only-of-type {\n",
268 |               "        vertical-align: middle;\n",
269 |               "    }\n",
270 |               "\n",
271 |               "    .dataframe tbody tr th {\n",
272 |               "        vertical-align: top;\n",
273 |               "    }\n",
274 |               "\n",
275 |               "    .dataframe thead th {\n",
276 |               "        text-align: right;\n",
277 |               "    }\n",
278 |               "</style>\n",
279 |               "<table border=\"1\" class=\"dataframe\">\n",
280 |               "  <thead>\n",
281 |               "    <tr style=\"text-align: right;\">\n",
282 |               "      <th></th>\n",
283 |               "      <th>Unnamed: 0</th>\n",
284 |               "      <th>milhas_por_ano</th>\n",
285 |               "      <th>ano_do_modelo</th>\n",
286 |               "      <th>preco</th>\n",
287 |               "      <th>vendido</th>\n",
288 |               "    </tr>\n",
289 |               "  </thead>\n",
290 |               "  <tbody>\n",
291 |               "    <tr>\n",
292 |               "      <th>0</th>\n",
293 |               "      <td>0</td>\n",
294 |               "      <td>21801</td>\n",
295 |               "      <td>2000</td>\n",
296 |               "      <td>30941.02</td>\n",
297 |               "      <td>1</td>\n",
298 |               "    </tr>\n",
299 |               "    <tr>\n",
300 |               "      <th>1</th>\n",
301 |               "      <td>1</td>\n",
302 |               "      <td>7843</td>\n",
303 |               "      <td>1998</td>\n",
304 |               "      <td>40557.96</td>\n",
305 |               "      <td>1</td>\n",
306 |               "    </tr>\n",
307 |               "    <tr>\n",
308 |               "      <th>2</th>\n",
309 |               "      <td>2</td>\n",
310 |               "      <td>7109</td>\n",
311 |               "      <td>2006</td>\n",
312 |               "      <td>89627.50</td>\n",
313 |               "      <td>0</td>\n",
314 |               "    </tr>\n",
315 |               "    <tr>\n",
316 |               "      <th>3</th>\n",
317 |               "      <td>3</td>\n",
318 |               "      <td>26823</td>\n",
319 |               "      <td>2015</td>\n",
320 |               "      <td>95276.14</td>\n",
321 |               "      <td>0</td>\n",
322 |               "    </tr>\n",
323 |               "    <tr>\n",
324 |               "      <th>4</th>\n",
325 |               "      <td>4</td>\n",
326 |               "      <td>7935</td>\n",
327 |               "      <td>2014</td>\n",
328 |               "      <td>117384.68</td>\n",
329 |               "      <td>1</td>\n",
330 |               "    </tr>\n",
331 |               "  </tbody>\n",
332 |               "</table>\n",
333 |               "</div>"
334 |             ],
335 |             "text/plain": [
336 |               "   Unnamed: 0  milhas_por_ano  ano_do_modelo      preco  vendido\n",
337 |               "0           0           21801           2000   30941.02        1\n",
338 |               "1           1            7843           1998   40557.96        1\n",
339 |               "2           2            7109           2006   89627.50        0\n",
340 |               "3           3           26823           2015   95276.14        0\n",
341 |               "4           4            7935           2014  117384.68        1"
342 |             ]
343 |           },
344 |           "metadata": {
345 |             "tags": []
346 |           },
347 |           "execution_count": 9
348 |         }
349 |       ]
350 |     },
351 |     {
352 |       "metadata": {
353 |         "id": "tZFog8O9EXYD",
354 |         "colab_type": "code",
355 |         "colab": {
356 |           "base_uri": "https://localhost:8080/",
357 |           "height": 195
358 |         },
359 |         "outputId": "09240207-0e20-4c07-822c-3a23186b99fe"
360 |       },
361 |       "cell_type": "code",
362 |       "source": [
363 |         "from datetime import datetime\n",
364 |         "\n",
365 |         "ano_atual = datetime.today().year\n",
366 |         "dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo\n",
367 |         "dados.head()"
368 |       ],
369 |       "execution_count": 12,
370 |       "outputs": [
371 |         {
372 |           "output_type": "execute_result",
373 |           "data": {
374 |             "text/html": [
375 |               "<div>\n",
376 |               "<style scoped>\n",
377 |               "    .dataframe tbody tr th:only-of-type {\n",
378 |               "        vertical-align: middle;\n",
379 |               "    }\n",
380 |               "\n",
381 |               "    .dataframe tbody tr th {\n",
382 |               "        vertical-align: top;\n",
383 |               "    }\n",
384 |               "\n",
385 |               "    .dataframe thead th {\n",
386 |               "        text-align: right;\n",
387 |               "    }\n",
388 |               "</style>\n",
389 |               "<table border=\"1\" class=\"dataframe\">\n",
390 |               "  <thead>\n",
391 |               "    <tr style=\"text-align: right;\">\n",
392 |               "      <th></th>\n",
393 |               "      <th>Unnamed: 0</th>\n",
394 |               "      <th>milhas_por_ano</th>\n",
395 |               "      <th>ano_do_modelo</th>\n",
396 |               "      <th>preco</th>\n",
397 |               "      <th>vendido</th>\n",
398 |               "      <th>idade_do_modelo</th>\n",
399 |               "    </tr>\n",
400 |               "  </thead>\n",
401 |               "  <tbody>\n",
402 |               "    <tr>\n",
403 |               "      <th>0</th>\n",
404 |               "      <td>0</td>\n",
405 |               "      <td>21801</td>\n",
406 |               "      <td>2000</td>\n",
407 |               "      <td>30941.02</td>\n",
408 |               "      <td>1</td>\n",
409 |               "      <td>18</td>\n",
410 |               "    </tr>\n",
411 |               "    <tr>\n",
412 |               "      <th>1</th>\n",
413 |               "      <td>1</td>\n",
414 |               "      <td>7843</td>\n",
415 |               "      <td>1998</td>\n",
416 |               "      <td>40557.96</td>\n",
417 |               "      <td>1</td>\n",
418 |               "      <td>20</td>\n",
419 |               "    </tr>\n",
420 |               "    <tr>\n",
421 |               "      <th>2</th>\n",
422 |               "      <td>2</td>\n",
423 |               "      <td>7109</td>\n",
424 |               "      <td>2006</td>\n",
425 |               "      <td>89627.50</td>\n",
426 |               "      <td>0</td>\n",
427 |               "      <td>12</td>\n",
428 |               "    </tr>\n",
429 |               "    <tr>\n",
430 |               "      <th>3</th>\n",
431 |               "      <td>3</td>\n",
432 |               "      <td>26823</td>\n",
433 |               "      <td>2015</td>\n",
434 |               "      <td>95276.14</td>\n",
435 |               "      <td>0</td>\n",
436 |               "      <td>3</td>\n",
437 |               "    </tr>\n",
438 |               "    <tr>\n",
439 |               "      <th>4</th>\n",
440 |               "      <td>4</td>\n",
441 |               "      <td>7935</td>\n",
442 |               "      <td>2014</td>\n",
443 |               "      <td>117384.68</td>\n",
444 |               "      <td>1</td>\n",
445 |               "      <td>4</td>\n",
446 |               "    </tr>\n",
447 |               "  </tbody>\n",
448 |               "</table>\n",
449 |               "</div>"
450 |             ],
451 |             "text/plain": [
452 |               "   Unnamed: 0  milhas_por_ano  ano_do_modelo      preco  vendido  \\\n",
453 |               "0           0           21801           2000   30941.02        1   \n",
454 |               "1           1            7843           1998   40557.96        1   \n",
455 |               "2           2            7109           2006   89627.50        0   \n",
456 |               "3           3           26823           2015   95276.14        0   \n",
457 |               "4           4            7935           2014  117384.68        1   \n",
458 |               "\n",
459 |               "   idade_do_modelo  \n",
460 |               "0               18  \n",
461 |               "1               20  \n",
462 |               "2               12  \n",
463 |               "3                3  \n",
464 |               "4                4  "
465 |             ]
466 |           },
467 |           "metadata": {
468 |             "tags": []
469 |           },
470 |           "execution_count": 12
471 |         }
472 |       ]
473 |     },
474 |     {
475 |       "metadata": {
476 |         "id": "3wWWgxhcFbR9",
477 |         "colab_type": "code",
478 |         "colab": {
479 |           "base_uri": "https://localhost:8080/",
480 |           "height": 195
481 |         },
482 |         "outputId": "5c1a4a30-6d60-44b7-d232-0c8d47bb4d22"
483 |       },
484 |       "cell_type": "code",
485 |       "source": [
486 |         "dados['km_por_ano'] = dados.milhas_por_ano * 1.60934\n",
487 |         "dados.head()"
488 |       ],
489 |       "execution_count": 15,
490 |       "outputs": [
491 |         {
492 |           "output_type": "execute_result",
493 |           "data": {
494 |             "text/html": [
495 |               "<div>\n",
496 |               "<style scoped>\n",
497 |               "    .dataframe tbody tr th:only-of-type {\n",
498 |               "        vertical-align: middle;\n",
499 |               "    }\n",
500 |               "\n",
501 |               "    .dataframe tbody tr th {\n",
502 |               "        vertical-align: top;\n",
503 |               "    }\n",
504 |               "\n",
505 |               "    .dataframe thead th {\n",
506 |               "        text-align: right;\n",
507 |               "    }\n",
508 |               "</style>\n",
509 |               "<table border=\"1\" class=\"dataframe\">\n",
510 |               "  <thead>\n",
511 |               "    <tr style=\"text-align: right;\">\n",
512 |               "      <th></th>\n",
513 |               "      <th>Unnamed: 0</th>\n",
514 |               "      <th>milhas_por_ano</th>\n",
515 |               "      <th>ano_do_modelo</th>\n",
516 |               "      <th>preco</th>\n",
517 |               "      <th>vendido</th>\n",
518 |               "      <th>idade_do_modelo</th>\n",
519 |               "      <th>km_por_ano</th>\n",
520 |               "    </tr>\n",
521 |               "  </thead>\n",
522 |               "  <tbody>\n",
523 |               "    <tr>\n",
524 |               "      <th>0</th>\n",
525 |               "      <td>0</td>\n",
526 |               "      <td>21801</td>\n",
527 |               "      <td>2000</td>\n",
528 |               "      <td>30941.02</td>\n",
529 |               "      <td>1</td>\n",
530 |               "      <td>18</td>\n",
531 |               "      <td>35085.22134</td>\n",
532 |               "    </tr>\n",
533 |               "    <tr>\n",
534 |               "      <th>1</th>\n",
535 |               "      <td>1</td>\n",
536 |               "      <td>7843</td>\n",
537 |               "      <td>1998</td>\n",
538 |               "      <td>40557.96</td>\n",
539 |               "      <td>1</td>\n",
540 |               "      <td>20</td>\n",
541 |               "      <td>12622.05362</td>\n",
542 |               "    </tr>\n",
543 |               "    <tr>\n",
544 |               "      <th>2</th>\n",
545 |               "      <td>2</td>\n",
546 |               "      <td>7109</td>\n",
547 |               "      <td>2006</td>\n",
548 |               "      <td>89627.50</td>\n",
549 |               "      <td>0</td>\n",
550 |               "      <td>12</td>\n",
551 |               "      <td>11440.79806</td>\n",
552 |               "    </tr>\n",
553 |               "    <tr>\n",
554 |               "      <th>3</th>\n",
555 |               "      <td>3</td>\n",
556 |               "      <td>26823</td>\n",
557 |               "      <td>2015</td>\n",
558 |               "      <td>95276.14</td>\n",
559 |               "      <td>0</td>\n",
560 |               "      <td>3</td>\n",
561 |               "      <td>43167.32682</td>\n",
562 |               "    </tr>\n",
563 |               "    <tr>\n",
564 |               "      <th>4</th>\n",
565 |               "      <td>4</td>\n",
566 |               "      <td>7935</td>\n",
567 |               "      <td>2014</td>\n",
568 |               "      <td>117384.68</td>\n",
569 |               "      <td>1</td>\n",
570 |               "      <td>4</td>\n",
571 |               "      <td>12770.11290</td>\n",
572 |               "    </tr>\n",
573 |               "  </tbody>\n",
574 |               "</table>\n",
575 |               "</div>"
576 |             ],
577 |             "text/plain": [
578 |               "   Unnamed: 0  milhas_por_ano  ano_do_modelo      preco  vendido  \\\n",
579 |               "0           0           21801           2000   30941.02        1   \n",
580 |               "1           1            7843           1998   40557.96        1   \n",
581 |               "2           2            7109           2006   89627.50        0   \n",
582 |               "3           3           26823           2015   95276.14        0   \n",
583 |               "4           4            7935           2014  117384.68        1   \n",
584 |               "\n",
585 |               "   idade_do_modelo   km_por_ano  \n",
586 |               "0               18  35085.22134  \n",
587 |               "1               20  12622.05362  \n",
588 |               "2               12  11440.79806  \n",
589 |               "3                3  43167.32682  \n",
590 |               "4                4  12770.11290  "
591 |             ]
592 |           },
593 |           "metadata": {
594 |             "tags": []
595 |           },
596 |           "execution_count": 15
597 |         }
598 |       ]
599 |     },
600 |     {
601 |       "metadata": {
602 |         "id": "MfjQNKlyFo2S",
603 |         "colab_type": "code",
604 |         "colab": {
605 |           "base_uri": "https://localhost:8080/",
606 |           "height": 195
607 |         },
608 |         "outputId": "908e17b5-9b48-48d0-f9e3-fbe69343a121"
609 |       },
610 |       "cell_type": "code",
611 |       "source": [
612 |         "dados = dados.drop(columns = [\"Unnamed: 0\", \"milhas_por_ano\",\"ano_do_modelo\"], axis=1)\n",
613 |         "dados.head()"
614 |       ],
615 |       "execution_count": 16,
616 |       "outputs": [
617 |         {
618 |           "output_type": "execute_result",
619 |           "data": {
620 |             "text/html": [
621 |               "<div>\n",
622 |               "<style scoped>\n",
623 |               "    .dataframe tbody tr th:only-of-type {\n",
624 |               "        vertical-align: middle;\n",
625 |               "    }\n",
626 |               "\n",
627 |               "    .dataframe tbody tr th {\n",
628 |               "        vertical-align: top;\n",
629 |               "    }\n",
630 |               "\n",
631 |               "    .dataframe thead th {\n",
632 |               "        text-align: right;\n",
633 |               "    }\n",
634 |               "</style>\n",
635 |               "<table border=\"1\" class=\"dataframe\">\n",
636 |               "  <thead>\n",
637 |               "    <tr style=\"text-align: right;\">\n",
638 |               "      <th></th>\n",
639 |               "      <th>preco</th>\n",
640 |               "      <th>vendido</th>\n",
641 |               "      <th>idade_do_modelo</th>\n",
642 |               "      <th>km_por_ano</th>\n",
643 |               "    </tr>\n",
644 |               "  </thead>\n",
645 |               "  <tbody>\n",
646 |               "    <tr>\n",
647 |               "      <th>0</th>\n",
648 |               "      <td>30941.02</td>\n",
649 |               "      <td>1</td>\n",
650 |               "      <td>18</td>\n",
651 |               "      <td>35085.22134</td>\n",
652 |               "    </tr>\n",
653 |               "    <tr>\n",
654 |               "      <th>1</th>\n",
655 |               "      <td>40557.96</td>\n",
656 |               "      <td>1</td>\n",
657 |               "      <td>20</td>\n",
658 |               "      <td>12622.05362</td>\n",
659 |               "    </tr>\n",
660 |               "    <tr>\n",
661 |               "      <th>2</th>\n",
662 |               "      <td>89627.50</td>\n",
663 |               "      <td>0</td>\n",
664 |               "      <td>12</td>\n",
665 |               "      <td>11440.79806</td>\n",
666 |               "    </tr>\n",
667 |               "    <tr>\n",
668 |               "      <th>3</th>\n",
669 |               "      <td>95276.14</td>\n",
670 |               "      <td>0</td>\n",
671 |               "      <td>3</td>\n",
672 |               "      <td>43167.32682</td>\n",
673 |               "    </tr>\n",
674 |               "    <tr>\n",
675 |               "      <th>4</th>\n",
676 |               "      <td>117384.68</td>\n",
677 |               "      <td>1</td>\n",
678 |               "      <td>4</td>\n",
679 |               "      <td>12770.11290</td>\n",
680 |               "    </tr>\n",
681 |               "  </tbody>\n",
682 |               "</table>\n",
683 |               "</div>"
684 |             ],
685 |             "text/plain": [
686 |               "       preco  vendido  idade_do_modelo   km_por_ano\n",
687 |               "0   30941.02        1               18  35085.22134\n",
688 |               "1   40557.96        1               20  12622.05362\n",
689 |               "2   89627.50        0               12  11440.79806\n",
690 |               "3   95276.14        0                3  43167.32682\n",
691 |               "4  117384.68        1                4  12770.11290"
692 |             ]
693 |           },
694 |           "metadata": {
695 |             "tags": []
696 |           },
697 |           "execution_count": 16
698 |         }
699 |       ]
700 |     },
701 |     {
702 |       "metadata": {
703 |         "id": "E3xebM4FF0Tc",
704 |         "colab_type": "code",
705 |         "colab": {
706 |           "base_uri": "https://localhost:8080/",
707 |           "height": 51
708 |         },
709 |         "outputId": "51eb40fc-4bb1-4183-85d5-de0fd699c5d7"
710 |       },
711 |       "cell_type": "code",
712 |       "source": [
713 |         "import numpy as np\n",
714 |         "from sklearn.model_selection import train_test_split\n",
715 |         "from sklearn.svm import LinearSVC\n",
716 |         "from sklearn.metrics import accuracy_score\n",
717 |         "\n",
718 |         "x = dados[[\"preco\", \"idade_do_modelo\",\"km_por_ano\"]]\n",
719 |         "y = dados[\"vendido\"]\n",
720 |         "\n",
721 |         "SEED = 5\n",
722 |         "np.random.seed(SEED)\n",
723 |         "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n",
724 |         "                                                         stratify = y)\n",
725 |         "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
726 |         "\n",
727 |         "modelo = LinearSVC()\n",
728 |         "modelo.fit(treino_x, treino_y)\n",
729 |         "previsoes = modelo.predict(teste_x)\n",
730 |         "\n",
731 |         "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
732 |         "print(\"A acurácia foi %.2f%%\" % acuracia)"
733 |       ],
734 |       "execution_count": 18,
735 |       "outputs": [
736 |         {
737 |           "output_type": "stream",
738 |           "text": [
739 |             "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n",
740 |             "A acurácia foi 57.88%\n"
741 |           ],
742 |           "name": "stdout"
743 |         }
744 |       ]
745 |     },
746 |     {
747 |       "metadata": {
748 |         "id": "G2ZFWoPkGONL",
749 |         "colab_type": "code",
750 |         "colab": {}
751 |       },
752 |       "cell_type": "code",
753 |       "source": [
754 |         ""
755 |       ],
756 |       "execution_count": 0,
757 |       "outputs": []
758 |     }
759 |   ]
760 | }


--------------------------------------------------------------------------------
/aula5.1/introdução_a_machine_learning_4.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Introdução a Machine Learning - 4.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/drive/1RpYAAROMa4C86iZscVUzaWIeVYSJapyE
 8 | """
 9 | 
10 | import pandas as pd
11 | 
12 | uri = "https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv"
13 | dados = pd.read_csv(uri)
14 | dados.head()
15 | 
16 | a_renomear = {
17 |     'mileage_per_year' : 'milhas_por_ano',
18 |     'model_year' : 'ano_do_modelo',
19 |     'price' : 'preco',
20 |     'sold' : 'vendido'
21 | }
22 | dados = dados.rename(columns=a_renomear)
23 | dados.head()
24 | 
25 | a_trocar = {
26 |     'no' : 0,
27 |     'yes' : 1
28 | }
29 | dados.vendido = dados.vendido.map(a_trocar)
30 | dados.head()
31 | 
32 | from datetime import datetime
33 | 
34 | ano_atual = datetime.today().year
35 | dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo
36 | dados.head()
37 | 
38 | dados['km_por_ano'] = dados.milhas_por_ano * 1.60934
39 | dados.head()
40 | 
41 | dados = dados.drop(columns = ["Unnamed: 0", "milhas_por_ano","ano_do_modelo"], axis=1)
42 | dados.head()
43 | 
44 | import numpy as np
45 | from sklearn.model_selection import train_test_split
46 | from sklearn.svm import LinearSVC
47 | from sklearn.metrics import accuracy_score
48 | 
49 | x = dados[["preco", "idade_do_modelo","km_por_ano"]]
50 | y = dados["vendido"]
51 | 
52 | SEED = 5
53 | np.random.seed(SEED)
54 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
55 |                                                          stratify = y)
56 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
57 | 
58 | modelo = LinearSVC()
59 | modelo.fit(treino_x, treino_y)
60 | previsoes = modelo.predict(teste_x)
61 | 
62 | acuracia = accuracy_score(teste_y, previsoes) * 100
63 | print("A acurácia foi %.2f%%" % acuracia)
64 | 
65 | 


--------------------------------------------------------------------------------
/aula5.2/Introdução_a_Machine_Learning_4.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Introdução a Machine Learning - 4.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": [],
  9 |       "collapsed_sections": []
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "metadata": {
 19 |         "id": "y7ik04NlDZMA",
 20 |         "colab_type": "code",
 21 |         "colab": {
 22 |           "base_uri": "https://localhost:8080/",
 23 |           "height": 204
 24 |         },
 25 |         "outputId": "73c13868-60e4-4a25-9338-02f324cabb18"
 26 |       },
 27 |       "cell_type": "code",
 28 |       "source": [
 29 |         "import pandas as pd\n",
 30 |         "\n",
 31 |         "uri = \"https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv\"\n",
 32 |         "dados = pd.read_csv(uri)\n",
 33 |         "dados.head()"
 34 |       ],
 35 |       "execution_count": 1,
 36 |       "outputs": [
 37 |         {
 38 |           "output_type": "execute_result",
 39 |           "data": {
 40 |             "text/html": [
 41 |               "<div>\n",
 42 |               "<style scoped>\n",
 43 |               "    .dataframe tbody tr th:only-of-type {\n",
 44 |               "        vertical-align: middle;\n",
 45 |               "    }\n",
 46 |               "\n",
 47 |               "    .dataframe tbody tr th {\n",
 48 |               "        vertical-align: top;\n",
 49 |               "    }\n",
 50 |               "\n",
 51 |               "    .dataframe thead th {\n",
 52 |               "        text-align: right;\n",
 53 |               "    }\n",
 54 |               "</style>\n",
 55 |               "<table border=\"1\" class=\"dataframe\">\n",
 56 |               "  <thead>\n",
 57 |               "    <tr style=\"text-align: right;\">\n",
 58 |               "      <th></th>\n",
 59 |               "      <th>Unnamed: 0</th>\n",
 60 |               "      <th>mileage_per_year</th>\n",
 61 |               "      <th>model_year</th>\n",
 62 |               "      <th>price</th>\n",
 63 |               "      <th>sold</th>\n",
 64 |               "    </tr>\n",
 65 |               "  </thead>\n",
 66 |               "  <tbody>\n",
 67 |               "    <tr>\n",
 68 |               "      <th>0</th>\n",
 69 |               "      <td>0</td>\n",
 70 |               "      <td>21801</td>\n",
 71 |               "      <td>2000</td>\n",
 72 |               "      <td>30941.02</td>\n",
 73 |               "      <td>yes</td>\n",
 74 |               "    </tr>\n",
 75 |               "    <tr>\n",
 76 |               "      <th>1</th>\n",
 77 |               "      <td>1</td>\n",
 78 |               "      <td>7843</td>\n",
 79 |               "      <td>1998</td>\n",
 80 |               "      <td>40557.96</td>\n",
 81 |               "      <td>yes</td>\n",
 82 |               "    </tr>\n",
 83 |               "    <tr>\n",
 84 |               "      <th>2</th>\n",
 85 |               "      <td>2</td>\n",
 86 |               "      <td>7109</td>\n",
 87 |               "      <td>2006</td>\n",
 88 |               "      <td>89627.50</td>\n",
 89 |               "      <td>no</td>\n",
 90 |               "    </tr>\n",
 91 |               "    <tr>\n",
 92 |               "      <th>3</th>\n",
 93 |               "      <td>3</td>\n",
 94 |               "      <td>26823</td>\n",
 95 |               "      <td>2015</td>\n",
 96 |               "      <td>95276.14</td>\n",
 97 |               "      <td>no</td>\n",
 98 |               "    </tr>\n",
 99 |               "    <tr>\n",
100 |               "      <th>4</th>\n",
101 |               "      <td>4</td>\n",
102 |               "      <td>7935</td>\n",
103 |               "      <td>2014</td>\n",
104 |               "      <td>117384.68</td>\n",
105 |               "      <td>yes</td>\n",
106 |               "    </tr>\n",
107 |               "  </tbody>\n",
108 |               "</table>\n",
109 |               "</div>"
110 |             ],
111 |             "text/plain": [
112 |               "   Unnamed: 0  mileage_per_year  model_year      price sold\n",
113 |               "0           0             21801        2000   30941.02  yes\n",
114 |               "1           1              7843        1998   40557.96  yes\n",
115 |               "2           2              7109        2006   89627.50   no\n",
116 |               "3           3             26823        2015   95276.14   no\n",
117 |               "4           4              7935        2014  117384.68  yes"
118 |             ]
119 |           },
120 |           "metadata": {
121 |             "tags": []
122 |           },
123 |           "execution_count": 1
124 |         }
125 |       ]
126 |     },
127 |     {
128 |       "metadata": {
129 |         "id": "FqFrPmgJDhvM",
130 |         "colab_type": "code",
131 |         "colab": {
132 |           "base_uri": "https://localhost:8080/",
133 |           "height": 195
134 |         },
135 |         "outputId": "46b16944-2a4a-4b53-b970-5a96a9b7d867"
136 |       },
137 |       "cell_type": "code",
138 |       "source": [
139 |         "a_renomear = {\n",
140 |         "    'mileage_per_year' : 'milhas_por_ano',\n",
141 |         "    'model_year' : 'ano_do_modelo',\n",
142 |         "    'price' : 'preco',\n",
143 |         "    'sold' : 'vendido'\n",
144 |         "}\n",
145 |         "dados = dados.rename(columns=a_renomear)\n",
146 |         "dados.head()"
147 |       ],
148 |       "execution_count": 2,
149 |       "outputs": [
150 |         {
151 |           "output_type": "execute_result",
152 |           "data": {
153 |             "text/html": [
154 |               "<div>\n",
155 |               "<style scoped>\n",
156 |               "    .dataframe tbody tr th:only-of-type {\n",
157 |               "        vertical-align: middle;\n",
158 |               "    }\n",
159 |               "\n",
160 |               "    .dataframe tbody tr th {\n",
161 |               "        vertical-align: top;\n",
162 |               "    }\n",
163 |               "\n",
164 |               "    .dataframe thead th {\n",
165 |               "        text-align: right;\n",
166 |               "    }\n",
167 |               "</style>\n",
168 |               "<table border=\"1\" class=\"dataframe\">\n",
169 |               "  <thead>\n",
170 |               "    <tr style=\"text-align: right;\">\n",
171 |               "      <th></th>\n",
172 |               "      <th>Unnamed: 0</th>\n",
173 |               "      <th>milhas_por_ano</th>\n",
174 |               "      <th>ano_do_modelo</th>\n",
175 |               "      <th>preco</th>\n",
176 |               "      <th>vendido</th>\n",
177 |               "    </tr>\n",
178 |               "  </thead>\n",
179 |               "  <tbody>\n",
180 |               "    <tr>\n",
181 |               "      <th>0</th>\n",
182 |               "      <td>0</td>\n",
183 |               "      <td>21801</td>\n",
184 |               "      <td>2000</td>\n",
185 |               "      <td>30941.02</td>\n",
186 |               "      <td>yes</td>\n",
187 |               "    </tr>\n",
188 |               "    <tr>\n",
189 |               "      <th>1</th>\n",
190 |               "      <td>1</td>\n",
191 |               "      <td>7843</td>\n",
192 |               "      <td>1998</td>\n",
193 |               "      <td>40557.96</td>\n",
194 |               "      <td>yes</td>\n",
195 |               "    </tr>\n",
196 |               "    <tr>\n",
197 |               "      <th>2</th>\n",
198 |               "      <td>2</td>\n",
199 |               "      <td>7109</td>\n",
200 |               "      <td>2006</td>\n",
201 |               "      <td>89627.50</td>\n",
202 |               "      <td>no</td>\n",
203 |               "    </tr>\n",
204 |               "    <tr>\n",
205 |               "      <th>3</th>\n",
206 |               "      <td>3</td>\n",
207 |               "      <td>26823</td>\n",
208 |               "      <td>2015</td>\n",
209 |               "      <td>95276.14</td>\n",
210 |               "      <td>no</td>\n",
211 |               "    </tr>\n",
212 |               "    <tr>\n",
213 |               "      <th>4</th>\n",
214 |               "      <td>4</td>\n",
215 |               "      <td>7935</td>\n",
216 |               "      <td>2014</td>\n",
217 |               "      <td>117384.68</td>\n",
218 |               "      <td>yes</td>\n",
219 |               "    </tr>\n",
220 |               "  </tbody>\n",
221 |               "</table>\n",
222 |               "</div>"
223 |             ],
224 |             "text/plain": [
225 |               "   Unnamed: 0  milhas_por_ano  ano_do_modelo      preco vendido\n",
226 |               "0           0           21801           2000   30941.02     yes\n",
227 |               "1           1            7843           1998   40557.96     yes\n",
228 |               "2           2            7109           2006   89627.50      no\n",
229 |               "3           3           26823           2015   95276.14      no\n",
230 |               "4           4            7935           2014  117384.68     yes"
231 |             ]
232 |           },
233 |           "metadata": {
234 |             "tags": []
235 |           },
236 |           "execution_count": 2
237 |         }
238 |       ]
239 |     },
240 |     {
241 |       "metadata": {
242 |         "id": "J31fUGbVEKpW",
243 |         "colab_type": "code",
244 |         "colab": {
245 |           "base_uri": "https://localhost:8080/",
246 |           "height": 195
247 |         },
248 |         "outputId": "ad0a651f-320a-4f5e-8e2c-706bf6937fbb"
249 |       },
250 |       "cell_type": "code",
251 |       "source": [
252 |         "a_trocar = {\n",
253 |         "    'no' : 0,\n",
254 |         "    'yes' : 1\n",
255 |         "}\n",
256 |         "dados.vendido = dados.vendido.map(a_trocar)\n",
257 |         "dados.head()"
258 |       ],
259 |       "execution_count": 3,
260 |       "outputs": [
261 |         {
262 |           "output_type": "execute_result",
263 |           "data": {
264 |             "text/html": [
265 |               "<div>\n",
266 |               "<style scoped>\n",
267 |               "    .dataframe tbody tr th:only-of-type {\n",
268 |               "        vertical-align: middle;\n",
269 |               "    }\n",
270 |               "\n",
271 |               "    .dataframe tbody tr th {\n",
272 |               "        vertical-align: top;\n",
273 |               "    }\n",
274 |               "\n",
275 |               "    .dataframe thead th {\n",
276 |               "        text-align: right;\n",
277 |               "    }\n",
278 |               "</style>\n",
279 |               "<table border=\"1\" class=\"dataframe\">\n",
280 |               "  <thead>\n",
281 |               "    <tr style=\"text-align: right;\">\n",
282 |               "      <th></th>\n",
283 |               "      <th>Unnamed: 0</th>\n",
284 |               "      <th>milhas_por_ano</th>\n",
285 |               "      <th>ano_do_modelo</th>\n",
286 |               "      <th>preco</th>\n",
287 |               "      <th>vendido</th>\n",
288 |               "    </tr>\n",
289 |               "  </thead>\n",
290 |               "  <tbody>\n",
291 |               "    <tr>\n",
292 |               "      <th>0</th>\n",
293 |               "      <td>0</td>\n",
294 |               "      <td>21801</td>\n",
295 |               "      <td>2000</td>\n",
296 |               "      <td>30941.02</td>\n",
297 |               "      <td>1</td>\n",
298 |               "    </tr>\n",
299 |               "    <tr>\n",
300 |               "      <th>1</th>\n",
301 |               "      <td>1</td>\n",
302 |               "      <td>7843</td>\n",
303 |               "      <td>1998</td>\n",
304 |               "      <td>40557.96</td>\n",
305 |               "      <td>1</td>\n",
306 |               "    </tr>\n",
307 |               "    <tr>\n",
308 |               "      <th>2</th>\n",
309 |               "      <td>2</td>\n",
310 |               "      <td>7109</td>\n",
311 |               "      <td>2006</td>\n",
312 |               "      <td>89627.50</td>\n",
313 |               "      <td>0</td>\n",
314 |               "    </tr>\n",
315 |               "    <tr>\n",
316 |               "      <th>3</th>\n",
317 |               "      <td>3</td>\n",
318 |               "      <td>26823</td>\n",
319 |               "      <td>2015</td>\n",
320 |               "      <td>95276.14</td>\n",
321 |               "      <td>0</td>\n",
322 |               "    </tr>\n",
323 |               "    <tr>\n",
324 |               "      <th>4</th>\n",
325 |               "      <td>4</td>\n",
326 |               "      <td>7935</td>\n",
327 |               "      <td>2014</td>\n",
328 |               "      <td>117384.68</td>\n",
329 |               "      <td>1</td>\n",
330 |               "    </tr>\n",
331 |               "  </tbody>\n",
332 |               "</table>\n",
333 |               "</div>"
334 |             ],
335 |             "text/plain": [
336 |               "   Unnamed: 0  milhas_por_ano  ano_do_modelo      preco  vendido\n",
337 |               "0           0           21801           2000   30941.02        1\n",
338 |               "1           1            7843           1998   40557.96        1\n",
339 |               "2           2            7109           2006   89627.50        0\n",
340 |               "3           3           26823           2015   95276.14        0\n",
341 |               "4           4            7935           2014  117384.68        1"
342 |             ]
343 |           },
344 |           "metadata": {
345 |             "tags": []
346 |           },
347 |           "execution_count": 3
348 |         }
349 |       ]
350 |     },
351 |     {
352 |       "metadata": {
353 |         "id": "tZFog8O9EXYD",
354 |         "colab_type": "code",
355 |         "colab": {
356 |           "base_uri": "https://localhost:8080/",
357 |           "height": 195
358 |         },
359 |         "outputId": "5792a2f8-f777-4237-e813-697f60309951"
360 |       },
361 |       "cell_type": "code",
362 |       "source": [
363 |         "from datetime import datetime\n",
364 |         "\n",
365 |         "ano_atual = datetime.today().year\n",
366 |         "dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo\n",
367 |         "dados.head()"
368 |       ],
369 |       "execution_count": 4,
370 |       "outputs": [
371 |         {
372 |           "output_type": "execute_result",
373 |           "data": {
374 |             "text/html": [
375 |               "<div>\n",
376 |               "<style scoped>\n",
377 |               "    .dataframe tbody tr th:only-of-type {\n",
378 |               "        vertical-align: middle;\n",
379 |               "    }\n",
380 |               "\n",
381 |               "    .dataframe tbody tr th {\n",
382 |               "        vertical-align: top;\n",
383 |               "    }\n",
384 |               "\n",
385 |               "    .dataframe thead th {\n",
386 |               "        text-align: right;\n",
387 |               "    }\n",
388 |               "</style>\n",
389 |               "<table border=\"1\" class=\"dataframe\">\n",
390 |               "  <thead>\n",
391 |               "    <tr style=\"text-align: right;\">\n",
392 |               "      <th></th>\n",
393 |               "      <th>Unnamed: 0</th>\n",
394 |               "      <th>milhas_por_ano</th>\n",
395 |               "      <th>ano_do_modelo</th>\n",
396 |               "      <th>preco</th>\n",
397 |               "      <th>vendido</th>\n",
398 |               "      <th>idade_do_modelo</th>\n",
399 |               "    </tr>\n",
400 |               "  </thead>\n",
401 |               "  <tbody>\n",
402 |               "    <tr>\n",
403 |               "      <th>0</th>\n",
404 |               "      <td>0</td>\n",
405 |               "      <td>21801</td>\n",
406 |               "      <td>2000</td>\n",
407 |               "      <td>30941.02</td>\n",
408 |               "      <td>1</td>\n",
409 |               "      <td>18</td>\n",
410 |               "    </tr>\n",
411 |               "    <tr>\n",
412 |               "      <th>1</th>\n",
413 |               "      <td>1</td>\n",
414 |               "      <td>7843</td>\n",
415 |               "      <td>1998</td>\n",
416 |               "      <td>40557.96</td>\n",
417 |               "      <td>1</td>\n",
418 |               "      <td>20</td>\n",
419 |               "    </tr>\n",
420 |               "    <tr>\n",
421 |               "      <th>2</th>\n",
422 |               "      <td>2</td>\n",
423 |               "      <td>7109</td>\n",
424 |               "      <td>2006</td>\n",
425 |               "      <td>89627.50</td>\n",
426 |               "      <td>0</td>\n",
427 |               "      <td>12</td>\n",
428 |               "    </tr>\n",
429 |               "    <tr>\n",
430 |               "      <th>3</th>\n",
431 |               "      <td>3</td>\n",
432 |               "      <td>26823</td>\n",
433 |               "      <td>2015</td>\n",
434 |               "      <td>95276.14</td>\n",
435 |               "      <td>0</td>\n",
436 |               "      <td>3</td>\n",
437 |               "    </tr>\n",
438 |               "    <tr>\n",
439 |               "      <th>4</th>\n",
440 |               "      <td>4</td>\n",
441 |               "      <td>7935</td>\n",
442 |               "      <td>2014</td>\n",
443 |               "      <td>117384.68</td>\n",
444 |               "      <td>1</td>\n",
445 |               "      <td>4</td>\n",
446 |               "    </tr>\n",
447 |               "  </tbody>\n",
448 |               "</table>\n",
449 |               "</div>"
450 |             ],
451 |             "text/plain": [
452 |               "   Unnamed: 0  milhas_por_ano  ano_do_modelo      preco  vendido  \\\n",
453 |               "0           0           21801           2000   30941.02        1   \n",
454 |               "1           1            7843           1998   40557.96        1   \n",
455 |               "2           2            7109           2006   89627.50        0   \n",
456 |               "3           3           26823           2015   95276.14        0   \n",
457 |               "4           4            7935           2014  117384.68        1   \n",
458 |               "\n",
459 |               "   idade_do_modelo  \n",
460 |               "0               18  \n",
461 |               "1               20  \n",
462 |               "2               12  \n",
463 |               "3                3  \n",
464 |               "4                4  "
465 |             ]
466 |           },
467 |           "metadata": {
468 |             "tags": []
469 |           },
470 |           "execution_count": 4
471 |         }
472 |       ]
473 |     },
474 |     {
475 |       "metadata": {
476 |         "id": "3wWWgxhcFbR9",
477 |         "colab_type": "code",
478 |         "colab": {
479 |           "base_uri": "https://localhost:8080/",
480 |           "height": 204
481 |         },
482 |         "outputId": "b4c147dc-c2e5-4bfd-e78f-5a34f72770d6"
483 |       },
484 |       "cell_type": "code",
485 |       "source": [
486 |         "dados['km_por_ano'] = dados.milhas_por_ano * 1.60934\n",
487 |         "dados.head()"
488 |       ],
489 |       "execution_count": 5,
490 |       "outputs": [
491 |         {
492 |           "output_type": "execute_result",
493 |           "data": {
494 |             "text/html": [
495 |               "<div>\n",
496 |               "<style scoped>\n",
497 |               "    .dataframe tbody tr th:only-of-type {\n",
498 |               "        vertical-align: middle;\n",
499 |               "    }\n",
500 |               "\n",
501 |               "    .dataframe tbody tr th {\n",
502 |               "        vertical-align: top;\n",
503 |               "    }\n",
504 |               "\n",
505 |               "    .dataframe thead th {\n",
506 |               "        text-align: right;\n",
507 |               "    }\n",
508 |               "</style>\n",
509 |               "<table border=\"1\" class=\"dataframe\">\n",
510 |               "  <thead>\n",
511 |               "    <tr style=\"text-align: right;\">\n",
512 |               "      <th></th>\n",
513 |               "      <th>Unnamed: 0</th>\n",
514 |               "      <th>milhas_por_ano</th>\n",
515 |               "      <th>ano_do_modelo</th>\n",
516 |               "      <th>preco</th>\n",
517 |               "      <th>vendido</th>\n",
518 |               "      <th>idade_do_modelo</th>\n",
519 |               "      <th>km_por_ano</th>\n",
520 |               "    </tr>\n",
521 |               "  </thead>\n",
522 |               "  <tbody>\n",
523 |               "    <tr>\n",
524 |               "      <th>0</th>\n",
525 |               "      <td>0</td>\n",
526 |               "      <td>21801</td>\n",
527 |               "      <td>2000</td>\n",
528 |               "      <td>30941.02</td>\n",
529 |               "      <td>1</td>\n",
530 |               "      <td>18</td>\n",
531 |               "      <td>35085.22134</td>\n",
532 |               "    </tr>\n",
533 |               "    <tr>\n",
534 |               "      <th>1</th>\n",
535 |               "      <td>1</td>\n",
536 |               "      <td>7843</td>\n",
537 |               "      <td>1998</td>\n",
538 |               "      <td>40557.96</td>\n",
539 |               "      <td>1</td>\n",
540 |               "      <td>20</td>\n",
541 |               "      <td>12622.05362</td>\n",
542 |               "    </tr>\n",
543 |               "    <tr>\n",
544 |               "      <th>2</th>\n",
545 |               "      <td>2</td>\n",
546 |               "      <td>7109</td>\n",
547 |               "      <td>2006</td>\n",
548 |               "      <td>89627.50</td>\n",
549 |               "      <td>0</td>\n",
550 |               "      <td>12</td>\n",
551 |               "      <td>11440.79806</td>\n",
552 |               "    </tr>\n",
553 |               "    <tr>\n",
554 |               "      <th>3</th>\n",
555 |               "      <td>3</td>\n",
556 |               "      <td>26823</td>\n",
557 |               "      <td>2015</td>\n",
558 |               "      <td>95276.14</td>\n",
559 |               "      <td>0</td>\n",
560 |               "      <td>3</td>\n",
561 |               "      <td>43167.32682</td>\n",
562 |               "    </tr>\n",
563 |               "    <tr>\n",
564 |               "      <th>4</th>\n",
565 |               "      <td>4</td>\n",
566 |               "      <td>7935</td>\n",
567 |               "      <td>2014</td>\n",
568 |               "      <td>117384.68</td>\n",
569 |               "      <td>1</td>\n",
570 |               "      <td>4</td>\n",
571 |               "      <td>12770.11290</td>\n",
572 |               "    </tr>\n",
573 |               "  </tbody>\n",
574 |               "</table>\n",
575 |               "</div>"
576 |             ],
577 |             "text/plain": [
578 |               "   Unnamed: 0  milhas_por_ano  ano_do_modelo      preco  vendido  \\\n",
579 |               "0           0           21801           2000   30941.02        1   \n",
580 |               "1           1            7843           1998   40557.96        1   \n",
581 |               "2           2            7109           2006   89627.50        0   \n",
582 |               "3           3           26823           2015   95276.14        0   \n",
583 |               "4           4            7935           2014  117384.68        1   \n",
584 |               "\n",
585 |               "   idade_do_modelo   km_por_ano  \n",
586 |               "0               18  35085.22134  \n",
587 |               "1               20  12622.05362  \n",
588 |               "2               12  11440.79806  \n",
589 |               "3                3  43167.32682  \n",
590 |               "4                4  12770.11290  "
591 |             ]
592 |           },
593 |           "metadata": {
594 |             "tags": []
595 |           },
596 |           "execution_count": 5
597 |         }
598 |       ]
599 |     },
600 |     {
601 |       "metadata": {
602 |         "id": "MfjQNKlyFo2S",
603 |         "colab_type": "code",
604 |         "colab": {
605 |           "base_uri": "https://localhost:8080/",
606 |           "height": 204
607 |         },
608 |         "outputId": "7588f6a5-ff37-4996-91a5-d9ee4b5543d7"
609 |       },
610 |       "cell_type": "code",
611 |       "source": [
612 |         "dados = dados.drop(columns = [\"Unnamed: 0\", \"milhas_por_ano\",\"ano_do_modelo\"], axis=1)\n",
613 |         "dados.head()"
614 |       ],
615 |       "execution_count": 6,
616 |       "outputs": [
617 |         {
618 |           "output_type": "execute_result",
619 |           "data": {
620 |             "text/html": [
621 |               "<div>\n",
622 |               "<style scoped>\n",
623 |               "    .dataframe tbody tr th:only-of-type {\n",
624 |               "        vertical-align: middle;\n",
625 |               "    }\n",
626 |               "\n",
627 |               "    .dataframe tbody tr th {\n",
628 |               "        vertical-align: top;\n",
629 |               "    }\n",
630 |               "\n",
631 |               "    .dataframe thead th {\n",
632 |               "        text-align: right;\n",
633 |               "    }\n",
634 |               "</style>\n",
635 |               "<table border=\"1\" class=\"dataframe\">\n",
636 |               "  <thead>\n",
637 |               "    <tr style=\"text-align: right;\">\n",
638 |               "      <th></th>\n",
639 |               "      <th>preco</th>\n",
640 |               "      <th>vendido</th>\n",
641 |               "      <th>idade_do_modelo</th>\n",
642 |               "      <th>km_por_ano</th>\n",
643 |               "    </tr>\n",
644 |               "  </thead>\n",
645 |               "  <tbody>\n",
646 |               "    <tr>\n",
647 |               "      <th>0</th>\n",
648 |               "      <td>30941.02</td>\n",
649 |               "      <td>1</td>\n",
650 |               "      <td>18</td>\n",
651 |               "      <td>35085.22134</td>\n",
652 |               "    </tr>\n",
653 |               "    <tr>\n",
654 |               "      <th>1</th>\n",
655 |               "      <td>40557.96</td>\n",
656 |               "      <td>1</td>\n",
657 |               "      <td>20</td>\n",
658 |               "      <td>12622.05362</td>\n",
659 |               "    </tr>\n",
660 |               "    <tr>\n",
661 |               "      <th>2</th>\n",
662 |               "      <td>89627.50</td>\n",
663 |               "      <td>0</td>\n",
664 |               "      <td>12</td>\n",
665 |               "      <td>11440.79806</td>\n",
666 |               "    </tr>\n",
667 |               "    <tr>\n",
668 |               "      <th>3</th>\n",
669 |               "      <td>95276.14</td>\n",
670 |               "      <td>0</td>\n",
671 |               "      <td>3</td>\n",
672 |               "      <td>43167.32682</td>\n",
673 |               "    </tr>\n",
674 |               "    <tr>\n",
675 |               "      <th>4</th>\n",
676 |               "      <td>117384.68</td>\n",
677 |               "      <td>1</td>\n",
678 |               "      <td>4</td>\n",
679 |               "      <td>12770.11290</td>\n",
680 |               "    </tr>\n",
681 |               "  </tbody>\n",
682 |               "</table>\n",
683 |               "</div>"
684 |             ],
685 |             "text/plain": [
686 |               "       preco  vendido  idade_do_modelo   km_por_ano\n",
687 |               "0   30941.02        1               18  35085.22134\n",
688 |               "1   40557.96        1               20  12622.05362\n",
689 |               "2   89627.50        0               12  11440.79806\n",
690 |               "3   95276.14        0                3  43167.32682\n",
691 |               "4  117384.68        1                4  12770.11290"
692 |             ]
693 |           },
694 |           "metadata": {
695 |             "tags": []
696 |           },
697 |           "execution_count": 6
698 |         }
699 |       ]
700 |     },
701 |     {
702 |       "metadata": {
703 |         "id": "E3xebM4FF0Tc",
704 |         "colab_type": "code",
705 |         "colab": {
706 |           "base_uri": "https://localhost:8080/",
707 |           "height": 50
708 |         },
709 |         "outputId": "7d48ae66-beb0-4884-cbfa-c81c9ce95eb1"
710 |       },
711 |       "cell_type": "code",
712 |       "source": [
713 |         "import numpy as np\n",
714 |         "from sklearn.model_selection import train_test_split\n",
715 |         "from sklearn.svm import LinearSVC\n",
716 |         "from sklearn.metrics import accuracy_score\n",
717 |         "\n",
718 |         "x = dados[[\"preco\", \"idade_do_modelo\",\"km_por_ano\"]]\n",
719 |         "y = dados[\"vendido\"]\n",
720 |         "\n",
721 |         "SEED = 5\n",
722 |         "np.random.seed(SEED)\n",
723 |         "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n",
724 |         "                                                         stratify = y)\n",
725 |         "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
726 |         "\n",
727 |         "modelo = LinearSVC()\n",
728 |         "modelo.fit(treino_x, treino_y)\n",
729 |         "previsoes = modelo.predict(teste_x)\n",
730 |         "\n",
731 |         "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
732 |         "print(\"A acurácia foi %.2f%%\" % acuracia)"
733 |       ],
734 |       "execution_count": 17,
735 |       "outputs": [
736 |         {
737 |           "output_type": "stream",
738 |           "text": [
739 |             "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n",
740 |             "A acurácia foi 57.88%\n"
741 |           ],
742 |           "name": "stdout"
743 |         }
744 |       ]
745 |     },
746 |     {
747 |       "metadata": {
748 |         "id": "G2ZFWoPkGONL",
749 |         "colab_type": "code",
750 |         "colab": {
751 |           "base_uri": "https://localhost:8080/",
752 |           "height": 34
753 |         },
754 |         "outputId": "f86fecd4-0528-420d-face-b3ab72af6a23"
755 |       },
756 |       "cell_type": "code",
757 |       "source": [
758 |         "from sklearn.dummy import DummyClassifier\n",
759 |         "\n",
760 |         "dummy_stratified = DummyClassifier()\n",
761 |         "dummy_stratified.fit(treino_x, treino_y)\n",
762 |         "acuracia = dummy_stratified.score(teste_x, teste_y) * 100\n",
763 |         "\n",
764 |         "print(\"A acurácia do dummy stratified foi %.2f%%\" % acuracia)"
765 |       ],
766 |       "execution_count": 18,
767 |       "outputs": [
768 |         {
769 |           "output_type": "stream",
770 |           "text": [
771 |             "A acurácia do dummy stratified foi 52.44%\n"
772 |           ],
773 |           "name": "stdout"
774 |         }
775 |       ]
776 |     },
777 |     {
778 |       "metadata": {
779 |         "id": "469ZKbHJIHL0",
780 |         "colab_type": "code",
781 |         "colab": {
782 |           "base_uri": "https://localhost:8080/",
783 |           "height": 34
784 |         },
785 |         "outputId": "7348330f-4752-4e1e-8f27-2420acf2ad14"
786 |       },
787 |       "cell_type": "code",
788 |       "source": [
789 |         "from sklearn.dummy import DummyClassifier\n",
790 |         "\n",
791 |         "dummy_mostfrequent = DummyClassifier()\n",
792 |         "dummy_mostfrequent.fit(treino_x, treino_y)\n",
793 |         "acuracia = dummy_mostfrequent.score(teste_x, teste_y) * 100\n",
794 |         "\n",
795 |         "print(\"A acurácia do dummy mostfrequent foi %.2f%%\" % acuracia)"
796 |       ],
797 |       "execution_count": 19,
798 |       "outputs": [
799 |         {
800 |           "output_type": "stream",
801 |           "text": [
802 |             "A acurácia do dummy mostfrequent foi 50.68%\n"
803 |           ],
804 |           "name": "stdout"
805 |         }
806 |       ]
807 |     },
808 |     {
809 |       "metadata": {
810 |         "id": "S_5AfFerIsH_",
811 |         "colab_type": "code",
812 |         "colab": {
813 |           "base_uri": "https://localhost:8080/",
814 |           "height": 50
815 |         },
816 |         "outputId": "569a686d-9b86-459f-d30d-5ce738bb6897"
817 |       },
818 |       "cell_type": "code",
819 |       "source": [
820 |         "from sklearn.preprocessing import StandardScaler\n",
821 |         "from sklearn.model_selection import train_test_split\n",
822 |         "from sklearn.svm import SVC\n",
823 |         "from sklearn.metrics import accuracy_score\n",
824 |         "\n",
825 |         "SEED = 5\n",
826 |         "np.random.seed(SEED)\n",
827 |         "raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n",
828 |         "                                                         stratify = y)\n",
829 |         "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
830 |         "\n",
831 |         "scaler = StandardScaler()\n",
832 |         "scaler.fit(raw_treino_x)\n",
833 |         "treino_x = scaler.transform(raw_treino_x)\n",
834 |         "teste_x = scaler.transform(raw_teste_x)\n",
835 |         "\n",
836 |         "modelo = SVC()\n",
837 |         "modelo.fit(treino_x, treino_y)\n",
838 |         "previsoes = modelo.predict(teste_x)\n",
839 |         "\n",
840 |         "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
841 |         "print(\"A acurácia foi %.2f%%\" % acuracia)\n"
842 |       ],
843 |       "execution_count": 20,
844 |       "outputs": [
845 |         {
846 |           "output_type": "stream",
847 |           "text": [
848 |             "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n",
849 |             "A acurácia foi 77.48%\n"
850 |           ],
851 |           "name": "stdout"
852 |         }
853 |       ]
854 |     },
855 |     {
856 |       "metadata": {
857 |         "id": "-1YsD-whJuGR",
858 |         "colab_type": "code",
859 |         "colab": {}
860 |       },
861 |       "cell_type": "code",
862 |       "source": [
863 |         ""
864 |       ],
865 |       "execution_count": 0,
866 |       "outputs": []
867 |     }
868 |   ]
869 | }


--------------------------------------------------------------------------------
/aula5.2/introdução_a_machine_learning_4.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Introdução a Machine Learning - 4.ipynb
  3 | 
  4 | Automatically generated by Colaboratory.
  5 | 
  6 | Original file is located at
  7 |     https://colab.research.google.com/drive/1RpYAAROMa4C86iZscVUzaWIeVYSJapyE
  8 | """
  9 | 
 10 | import pandas as pd
 11 | 
 12 | uri = "https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv"
 13 | dados = pd.read_csv(uri)
 14 | dados.head()
 15 | 
 16 | a_renomear = {
 17 |     'mileage_per_year' : 'milhas_por_ano',
 18 |     'model_year' : 'ano_do_modelo',
 19 |     'price' : 'preco',
 20 |     'sold' : 'vendido'
 21 | }
 22 | dados = dados.rename(columns=a_renomear)
 23 | dados.head()
 24 | 
 25 | a_trocar = {
 26 |     'no' : 0,
 27 |     'yes' : 1
 28 | }
 29 | dados.vendido = dados.vendido.map(a_trocar)
 30 | dados.head()
 31 | 
 32 | from datetime import datetime
 33 | 
 34 | ano_atual = datetime.today().year
 35 | dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo
 36 | dados.head()
 37 | 
 38 | dados['km_por_ano'] = dados.milhas_por_ano * 1.60934
 39 | dados.head()
 40 | 
 41 | dados = dados.drop(columns = ["Unnamed: 0", "milhas_por_ano","ano_do_modelo"], axis=1)
 42 | dados.head()
 43 | 
 44 | import numpy as np
 45 | from sklearn.model_selection import train_test_split
 46 | from sklearn.svm import LinearSVC
 47 | from sklearn.metrics import accuracy_score
 48 | 
 49 | x = dados[["preco", "idade_do_modelo","km_por_ano"]]
 50 | y = dados["vendido"]
 51 | 
 52 | SEED = 5
 53 | np.random.seed(SEED)
 54 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
 55 |                                                          stratify = y)
 56 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
 57 | 
 58 | modelo = LinearSVC()
 59 | modelo.fit(treino_x, treino_y)
 60 | previsoes = modelo.predict(teste_x)
 61 | 
 62 | acuracia = accuracy_score(teste_y, previsoes) * 100
 63 | print("A acurácia foi %.2f%%" % acuracia)
 64 | 
 65 | from sklearn.dummy import DummyClassifier
 66 | 
 67 | dummy_stratified = DummyClassifier()
 68 | dummy_stratified.fit(treino_x, treino_y)
 69 | acuracia = dummy_stratified.score(teste_x, teste_y) * 100
 70 | 
 71 | print("A acurácia do dummy stratified foi %.2f%%" % acuracia)
 72 | 
 73 | from sklearn.dummy import DummyClassifier
 74 | 
 75 | dummy_mostfrequent = DummyClassifier()
 76 | dummy_mostfrequent.fit(treino_x, treino_y)
 77 | acuracia = dummy_mostfrequent.score(teste_x, teste_y) * 100
 78 | 
 79 | print("A acurácia do dummy mostfrequent foi %.2f%%" % acuracia)
 80 | 
 81 | from sklearn.preprocessing import StandardScaler
 82 | from sklearn.model_selection import train_test_split
 83 | from sklearn.svm import SVC
 84 | from sklearn.metrics import accuracy_score
 85 | 
 86 | SEED = 5
 87 | np.random.seed(SEED)
 88 | raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
 89 |                                                          stratify = y)
 90 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
 91 | 
 92 | scaler = StandardScaler()
 93 | scaler.fit(raw_treino_x)
 94 | treino_x = scaler.transform(raw_treino_x)
 95 | teste_x = scaler.transform(raw_teste_x)
 96 | 
 97 | modelo = SVC()
 98 | modelo.fit(treino_x, treino_y)
 99 | previsoes = modelo.predict(teste_x)
100 | 
101 | acuracia = accuracy_score(teste_y, previsoes) * 100
102 | print("A acurácia foi %.2f%%" % acuracia)
103 | 
104 | 


--------------------------------------------------------------------------------
/aula5.4/Introdução_a_Machine_Learning_4.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "nbformat": 4,
   3 |   "nbformat_minor": 0,
   4 |   "metadata": {
   5 |     "colab": {
   6 |       "name": "Introdução a Machine Learning - 4.ipynb",
   7 |       "version": "0.3.2",
   8 |       "provenance": [],
   9 |       "collapsed_sections": []
  10 |     },
  11 |     "kernelspec": {
  12 |       "name": "python3",
  13 |       "display_name": "Python 3"
  14 |     }
  15 |   },
  16 |   "cells": [
  17 |     {
  18 |       "metadata": {
  19 |         "id": "T3ywJhRBMGvY",
  20 |         "colab_type": "code",
  21 |         "colab": {
  22 |           "base_uri": "https://localhost:8080/",
  23 |           "height": 3057
  24 |         },
  25 |         "outputId": "fdc1e620-4be1-4b38-f525-a075971a2572"
  26 |       },
  27 |       "cell_type": "code",
  28 |       "source": [
  29 |         "!pip install graphviz==0.10\n",
  30 |         "!apt-get install graphviz"
  31 |       ],
  32 |       "execution_count": 28,
  33 |       "outputs": [
  34 |         {
  35 |           "output_type": "stream",
  36 |           "text": [
  37 |             "Requirement already satisfied: graphviz==0.10 in /usr/local/lib/python3.6/dist-packages (0.10)\n",
  38 |             "Reading package lists... Done\n",
  39 |             "Building dependency tree       \n",
  40 |             "Reading state information... Done\n",
  41 |             "The following additional packages will be installed:\n",
  42 |             "  fontconfig libann0 libcairo2 libcdt5 libcgraph6 libdatrie1 libgd3\n",
  43 |             "  libgts-0.7-5 libgts-bin libgvc6 libgvpr2 libjbig0 liblab-gamut1 libltdl7\n",
  44 |             "  libpango-1.0-0 libpangocairo-1.0-0 libpangoft2-1.0-0 libpathplan4\n",
  45 |             "  libpixman-1-0 libthai-data libthai0 libtiff5 libwebp6 libxaw7 libxcb-render0\n",
  46 |             "  libxcb-shm0 libxmu6 libxpm4 libxt6\n",
  47 |             "Suggested packages:\n",
  48 |             "  gsfonts graphviz-doc libgd-tools\n",
  49 |             "The following NEW packages will be installed:\n",
  50 |             "  fontconfig graphviz libann0 libcairo2 libcdt5 libcgraph6 libdatrie1 libgd3\n",
  51 |             "  libgts-0.7-5 libgts-bin libgvc6 libgvpr2 libjbig0 liblab-gamut1 libltdl7\n",
  52 |             "  libpango-1.0-0 libpangocairo-1.0-0 libpangoft2-1.0-0 libpathplan4\n",
  53 |             "  libpixman-1-0 libthai-data libthai0 libtiff5 libwebp6 libxaw7 libxcb-render0\n",
  54 |             "  libxcb-shm0 libxmu6 libxpm4 libxt6\n",
  55 |             "0 upgraded, 30 newly installed, 0 to remove and 12 not upgraded.\n",
  56 |             "Need to get 4,154 kB of archives.\n",
  57 |             "After this operation, 16.1 MB of additional disk space will be used.\n",
  58 |             "Get:1 http://archive.ubuntu.com/ubuntu bionic/main amd64 fontconfig amd64 2.12.6-0ubuntu2 [169 kB]\n",
  59 |             "Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libann0 amd64 1.1.2+doc-6 [24.8 kB]\n",
  60 |             "Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libcdt5 amd64 2.40.1-2 [19.6 kB]\n",
  61 |             "Get:4 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libcgraph6 amd64 2.40.1-2 [40.8 kB]\n",
  62 |             "Get:5 http://archive.ubuntu.com/ubuntu bionic/main amd64 libjbig0 amd64 2.1-3.1build1 [26.7 kB]\n",
  63 |             "Get:6 http://archive.ubuntu.com/ubuntu bionic/main amd64 libtiff5 amd64 4.0.9-5 [152 kB]\n",
  64 |             "Get:7 http://archive.ubuntu.com/ubuntu bionic/main amd64 libwebp6 amd64 0.6.1-2 [185 kB]\n",
  65 |             "Get:8 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxpm4 amd64 1:3.5.12-1 [34.0 kB]\n",
  66 |             "Get:9 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libgd3 amd64 2.2.5-4ubuntu0.2 [119 kB]\n",
  67 |             "Get:10 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgts-0.7-5 amd64 0.7.6+darcs121130-4 [150 kB]\n",
  68 |             "Get:11 http://archive.ubuntu.com/ubuntu bionic/main amd64 libpixman-1-0 amd64 0.34.0-2 [229 kB]\n",
  69 |             "Get:12 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxcb-render0 amd64 1.13-1 [14.7 kB]\n",
  70 |             "Get:13 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxcb-shm0 amd64 1.13-1 [5,572 B]\n",
  71 |             "Get:14 http://archive.ubuntu.com/ubuntu bionic/main amd64 libcairo2 amd64 1.15.10-2 [580 kB]\n",
  72 |             "Get:15 http://archive.ubuntu.com/ubuntu bionic/main amd64 libltdl7 amd64 2.4.6-2 [38.8 kB]\n",
  73 |             "Get:16 http://archive.ubuntu.com/ubuntu bionic/main amd64 libthai-data all 0.1.27-2 [133 kB]\n",
  74 |             "Get:17 http://archive.ubuntu.com/ubuntu bionic/main amd64 libdatrie1 amd64 0.2.10-7 [17.8 kB]\n",
  75 |             "Get:18 http://archive.ubuntu.com/ubuntu bionic/main amd64 libthai0 amd64 0.1.27-2 [18.0 kB]\n",
  76 |             "Get:19 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libpango-1.0-0 amd64 1.40.14-1ubuntu0.1 [153 kB]\n",
  77 |             "Get:20 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libpangoft2-1.0-0 amd64 1.40.14-1ubuntu0.1 [33.2 kB]\n",
  78 |             "Get:21 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libpangocairo-1.0-0 amd64 1.40.14-1ubuntu0.1 [20.8 kB]\n",
  79 |             "Get:22 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libpathplan4 amd64 2.40.1-2 [22.6 kB]\n",
  80 |             "Get:23 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgvc6 amd64 2.40.1-2 [601 kB]\n",
  81 |             "Get:24 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgvpr2 amd64 2.40.1-2 [169 kB]\n",
  82 |             "Get:25 http://archive.ubuntu.com/ubuntu bionic/universe amd64 liblab-gamut1 amd64 2.40.1-2 [178 kB]\n",
  83 |             "Get:26 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxt6 amd64 1:1.1.5-1 [160 kB]\n",
  84 |             "Get:27 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxmu6 amd64 2:1.1.2-2 [46.0 kB]\n",
  85 |             "Get:28 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxaw7 amd64 2:1.0.13-1 [173 kB]\n",
  86 |             "Get:29 http://archive.ubuntu.com/ubuntu bionic/universe amd64 graphviz amd64 2.40.1-2 [601 kB]\n",
  87 |             "Get:30 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgts-bin amd64 0.7.6+darcs121130-4 [41.3 kB]\n",
  88 |             "Fetched 4,154 kB in 2s (2,256 kB/s)\n",
  89 |             "Selecting previously unselected package fontconfig.\n",
  90 |             "(Reading database ... 22278 files and directories currently installed.)\n",
  91 |             "Preparing to unpack .../00-fontconfig_2.12.6-0ubuntu2_amd64.deb ...\n",
  92 |             "Unpacking fontconfig (2.12.6-0ubuntu2) ...\n",
  93 |             "Selecting previously unselected package libann0.\n",
  94 |             "Preparing to unpack .../01-libann0_1.1.2+doc-6_amd64.deb ...\n",
  95 |             "Unpacking libann0 (1.1.2+doc-6) ...\n",
  96 |             "Selecting previously unselected package libcdt5.\n",
  97 |             "Preparing to unpack .../02-libcdt5_2.40.1-2_amd64.deb ...\n",
  98 |             "Unpacking libcdt5 (2.40.1-2) ...\n",
  99 |             "Selecting previously unselected package libcgraph6.\n",
 100 |             "Preparing to unpack .../03-libcgraph6_2.40.1-2_amd64.deb ...\n",
 101 |             "Unpacking libcgraph6 (2.40.1-2) ...\n",
 102 |             "Selecting previously unselected package libjbig0:amd64.\n",
 103 |             "Preparing to unpack .../04-libjbig0_2.1-3.1build1_amd64.deb ...\n",
 104 |             "Unpacking libjbig0:amd64 (2.1-3.1build1) ...\n",
 105 |             "Selecting previously unselected package libtiff5:amd64.\n",
 106 |             "Preparing to unpack .../05-libtiff5_4.0.9-5_amd64.deb ...\n",
 107 |             "Unpacking libtiff5:amd64 (4.0.9-5) ...\n",
 108 |             "Selecting previously unselected package libwebp6:amd64.\n",
 109 |             "Preparing to unpack .../06-libwebp6_0.6.1-2_amd64.deb ...\n",
 110 |             "Unpacking libwebp6:amd64 (0.6.1-2) ...\n",
 111 |             "Selecting previously unselected package libxpm4:amd64.\n",
 112 |             "Preparing to unpack .../07-libxpm4_1%3a3.5.12-1_amd64.deb ...\n",
 113 |             "Unpacking libxpm4:amd64 (1:3.5.12-1) ...\n",
 114 |             "Selecting previously unselected package libgd3:amd64.\n",
 115 |             "Preparing to unpack .../08-libgd3_2.2.5-4ubuntu0.2_amd64.deb ...\n",
 116 |             "Unpacking libgd3:amd64 (2.2.5-4ubuntu0.2) ...\n",
 117 |             "Selecting previously unselected package libgts-0.7-5:amd64.\n",
 118 |             "Preparing to unpack .../09-libgts-0.7-5_0.7.6+darcs121130-4_amd64.deb ...\n",
 119 |             "Unpacking libgts-0.7-5:amd64 (0.7.6+darcs121130-4) ...\n",
 120 |             "Selecting previously unselected package libpixman-1-0:amd64.\n",
 121 |             "Preparing to unpack .../10-libpixman-1-0_0.34.0-2_amd64.deb ...\n",
 122 |             "Unpacking libpixman-1-0:amd64 (0.34.0-2) ...\n",
 123 |             "Selecting previously unselected package libxcb-render0:amd64.\n",
 124 |             "Preparing to unpack .../11-libxcb-render0_1.13-1_amd64.deb ...\n",
 125 |             "Unpacking libxcb-render0:amd64 (1.13-1) ...\n",
 126 |             "Selecting previously unselected package libxcb-shm0:amd64.\n",
 127 |             "Preparing to unpack .../12-libxcb-shm0_1.13-1_amd64.deb ...\n",
 128 |             "Unpacking libxcb-shm0:amd64 (1.13-1) ...\n",
 129 |             "Selecting previously unselected package libcairo2:amd64.\n",
 130 |             "Preparing to unpack .../13-libcairo2_1.15.10-2_amd64.deb ...\n",
 131 |             "Unpacking libcairo2:amd64 (1.15.10-2) ...\n",
 132 |             "Selecting previously unselected package libltdl7:amd64.\n",
 133 |             "Preparing to unpack .../14-libltdl7_2.4.6-2_amd64.deb ...\n",
 134 |             "Unpacking libltdl7:amd64 (2.4.6-2) ...\n",
 135 |             "Selecting previously unselected package libthai-data.\n",
 136 |             "Preparing to unpack .../15-libthai-data_0.1.27-2_all.deb ...\n",
 137 |             "Unpacking libthai-data (0.1.27-2) ...\n",
 138 |             "Selecting previously unselected package libdatrie1:amd64.\n",
 139 |             "Preparing to unpack .../16-libdatrie1_0.2.10-7_amd64.deb ...\n",
 140 |             "Unpacking libdatrie1:amd64 (0.2.10-7) ...\n",
 141 |             "Selecting previously unselected package libthai0:amd64.\n",
 142 |             "Preparing to unpack .../17-libthai0_0.1.27-2_amd64.deb ...\n",
 143 |             "Unpacking libthai0:amd64 (0.1.27-2) ...\n",
 144 |             "Selecting previously unselected package libpango-1.0-0:amd64.\n",
 145 |             "Preparing to unpack .../18-libpango-1.0-0_1.40.14-1ubuntu0.1_amd64.deb ...\n",
 146 |             "Unpacking libpango-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n",
 147 |             "Selecting previously unselected package libpangoft2-1.0-0:amd64.\n",
 148 |             "Preparing to unpack .../19-libpangoft2-1.0-0_1.40.14-1ubuntu0.1_amd64.deb ...\n",
 149 |             "Unpacking libpangoft2-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n",
 150 |             "Selecting previously unselected package libpangocairo-1.0-0:amd64.\n",
 151 |             "Preparing to unpack .../20-libpangocairo-1.0-0_1.40.14-1ubuntu0.1_amd64.deb ...\n",
 152 |             "Unpacking libpangocairo-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n",
 153 |             "Selecting previously unselected package libpathplan4.\n",
 154 |             "Preparing to unpack .../21-libpathplan4_2.40.1-2_amd64.deb ...\n",
 155 |             "Unpacking libpathplan4 (2.40.1-2) ...\n",
 156 |             "Selecting previously unselected package libgvc6.\n",
 157 |             "Preparing to unpack .../22-libgvc6_2.40.1-2_amd64.deb ...\n",
 158 |             "Unpacking libgvc6 (2.40.1-2) ...\n",
 159 |             "Selecting previously unselected package libgvpr2.\n",
 160 |             "Preparing to unpack .../23-libgvpr2_2.40.1-2_amd64.deb ...\n",
 161 |             "Unpacking libgvpr2 (2.40.1-2) ...\n",
 162 |             "Selecting previously unselected package liblab-gamut1.\n",
 163 |             "Preparing to unpack .../24-liblab-gamut1_2.40.1-2_amd64.deb ...\n",
 164 |             "Unpacking liblab-gamut1 (2.40.1-2) ...\n",
 165 |             "Selecting previously unselected package libxt6:amd64.\n",
 166 |             "Preparing to unpack .../25-libxt6_1%3a1.1.5-1_amd64.deb ...\n",
 167 |             "Unpacking libxt6:amd64 (1:1.1.5-1) ...\n",
 168 |             "Selecting previously unselected package libxmu6:amd64.\n",
 169 |             "Preparing to unpack .../26-libxmu6_2%3a1.1.2-2_amd64.deb ...\n",
 170 |             "Unpacking libxmu6:amd64 (2:1.1.2-2) ...\n",
 171 |             "Selecting previously unselected package libxaw7:amd64.\n",
 172 |             "Preparing to unpack .../27-libxaw7_2%3a1.0.13-1_amd64.deb ...\n",
 173 |             "Unpacking libxaw7:amd64 (2:1.0.13-1) ...\n",
 174 |             "Selecting previously unselected package graphviz.\n",
 175 |             "Preparing to unpack .../28-graphviz_2.40.1-2_amd64.deb ...\n",
 176 |             "Unpacking graphviz (2.40.1-2) ...\n",
 177 |             "Selecting previously unselected package libgts-bin.\n",
 178 |             "Preparing to unpack .../29-libgts-bin_0.7.6+darcs121130-4_amd64.deb ...\n",
 179 |             "Unpacking libgts-bin (0.7.6+darcs121130-4) ...\n",
 180 |             "Setting up libgts-0.7-5:amd64 (0.7.6+darcs121130-4) ...\n",
 181 |             "Setting up libpathplan4 (2.40.1-2) ...\n",
 182 |             "Setting up liblab-gamut1 (2.40.1-2) ...\n",
 183 |             "Setting up libxcb-render0:amd64 (1.13-1) ...\n",
 184 |             "Setting up libjbig0:amd64 (2.1-3.1build1) ...\n",
 185 |             "Setting up libdatrie1:amd64 (0.2.10-7) ...\n",
 186 |             "Setting up libtiff5:amd64 (4.0.9-5) ...\n",
 187 |             "Setting up libpixman-1-0:amd64 (0.34.0-2) ...\n",
 188 |             "Processing triggers for libc-bin (2.27-3ubuntu1) ...\n",
 189 |             "Setting up libltdl7:amd64 (2.4.6-2) ...\n",
 190 |             "Setting up libann0 (1.1.2+doc-6) ...\n",
 191 |             "Setting up libxcb-shm0:amd64 (1.13-1) ...\n",
 192 |             "Setting up libxpm4:amd64 (1:3.5.12-1) ...\n",
 193 |             "Setting up libxt6:amd64 (1:1.1.5-1) ...\n",
 194 |             "Setting up libgts-bin (0.7.6+darcs121130-4) ...\n",
 195 |             "Setting up libthai-data (0.1.27-2) ...\n",
 196 |             "Setting up libcdt5 (2.40.1-2) ...\n",
 197 |             "Setting up fontconfig (2.12.6-0ubuntu2) ...\n",
 198 |             "Regenerating fonts cache... done.\n",
 199 |             "Setting up libcgraph6 (2.40.1-2) ...\n",
 200 |             "Setting up libwebp6:amd64 (0.6.1-2) ...\n",
 201 |             "Setting up libcairo2:amd64 (1.15.10-2) ...\n",
 202 |             "Setting up libgvpr2 (2.40.1-2) ...\n",
 203 |             "Setting up libgd3:amd64 (2.2.5-4ubuntu0.2) ...\n",
 204 |             "Setting up libthai0:amd64 (0.1.27-2) ...\n",
 205 |             "Setting up libxmu6:amd64 (2:1.1.2-2) ...\n",
 206 |             "Setting up libpango-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n",
 207 |             "Setting up libxaw7:amd64 (2:1.0.13-1) ...\n",
 208 |             "Setting up libpangoft2-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n",
 209 |             "Setting up libpangocairo-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n",
 210 |             "Setting up libgvc6 (2.40.1-2) ...\n",
 211 |             "Setting up graphviz (2.40.1-2) ...\n",
 212 |             "Processing triggers for libc-bin (2.27-3ubuntu1) ...\n"
 213 |           ],
 214 |           "name": "stdout"
 215 |         }
 216 |       ]
 217 |     },
 218 |     {
 219 |       "metadata": {
 220 |         "id": "y7ik04NlDZMA",
 221 |         "colab_type": "code",
 222 |         "colab": {
 223 |           "base_uri": "https://localhost:8080/",
 224 |           "height": 198
 225 |         },
 226 |         "outputId": "73c13868-60e4-4a25-9338-02f324cabb18"
 227 |       },
 228 |       "cell_type": "code",
 229 |       "source": [
 230 |         "import pandas as pd\n",
 231 |         "\n",
 232 |         "uri = \"https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv\"\n",
 233 |         "dados = pd.read_csv(uri)\n",
 234 |         "dados.head()"
 235 |       ],
 236 |       "execution_count": 1,
 237 |       "outputs": [
 238 |         {
 239 |           "output_type": "execute_result",
 240 |           "data": {
 241 |             "text/html": [
 242 |               "<div>\n",
 243 |               "<style scoped>\n",
 244 |               "    .dataframe tbody tr th:only-of-type {\n",
 245 |               "        vertical-align: middle;\n",
 246 |               "    }\n",
 247 |               "\n",
 248 |               "    .dataframe tbody tr th {\n",
 249 |               "        vertical-align: top;\n",
 250 |               "    }\n",
 251 |               "\n",
 252 |               "    .dataframe thead th {\n",
 253 |               "        text-align: right;\n",
 254 |               "    }\n",
 255 |               "</style>\n",
 256 |               "<table border=\"1\" class=\"dataframe\">\n",
 257 |               "  <thead>\n",
 258 |               "    <tr style=\"text-align: right;\">\n",
 259 |               "      <th></th>\n",
 260 |               "      <th>Unnamed: 0</th>\n",
 261 |               "      <th>mileage_per_year</th>\n",
 262 |               "      <th>model_year</th>\n",
 263 |               "      <th>price</th>\n",
 264 |               "      <th>sold</th>\n",
 265 |               "    </tr>\n",
 266 |               "  </thead>\n",
 267 |               "  <tbody>\n",
 268 |               "    <tr>\n",
 269 |               "      <th>0</th>\n",
 270 |               "      <td>0</td>\n",
 271 |               "      <td>21801</td>\n",
 272 |               "      <td>2000</td>\n",
 273 |               "      <td>30941.02</td>\n",
 274 |               "      <td>yes</td>\n",
 275 |               "    </tr>\n",
 276 |               "    <tr>\n",
 277 |               "      <th>1</th>\n",
 278 |               "      <td>1</td>\n",
 279 |               "      <td>7843</td>\n",
 280 |               "      <td>1998</td>\n",
 281 |               "      <td>40557.96</td>\n",
 282 |               "      <td>yes</td>\n",
 283 |               "    </tr>\n",
 284 |               "    <tr>\n",
 285 |               "      <th>2</th>\n",
 286 |               "      <td>2</td>\n",
 287 |               "      <td>7109</td>\n",
 288 |               "      <td>2006</td>\n",
 289 |               "      <td>89627.50</td>\n",
 290 |               "      <td>no</td>\n",
 291 |               "    </tr>\n",
 292 |               "    <tr>\n",
 293 |               "      <th>3</th>\n",
 294 |               "      <td>3</td>\n",
 295 |               "      <td>26823</td>\n",
 296 |               "      <td>2015</td>\n",
 297 |               "      <td>95276.14</td>\n",
 298 |               "      <td>no</td>\n",
 299 |               "    </tr>\n",
 300 |               "    <tr>\n",
 301 |               "      <th>4</th>\n",
 302 |               "      <td>4</td>\n",
 303 |               "      <td>7935</td>\n",
 304 |               "      <td>2014</td>\n",
 305 |               "      <td>117384.68</td>\n",
 306 |               "      <td>yes</td>\n",
 307 |               "    </tr>\n",
 308 |               "  </tbody>\n",
 309 |               "</table>\n",
 310 |               "</div>"
 311 |             ],
 312 |             "text/plain": [
 313 |               "   Unnamed: 0  mileage_per_year  model_year      price sold\n",
 314 |               "0           0             21801        2000   30941.02  yes\n",
 315 |               "1           1              7843        1998   40557.96  yes\n",
 316 |               "2           2              7109        2006   89627.50   no\n",
 317 |               "3           3             26823        2015   95276.14   no\n",
 318 |               "4           4              7935        2014  117384.68  yes"
 319 |             ]
 320 |           },
 321 |           "metadata": {
 322 |             "tags": []
 323 |           },
 324 |           "execution_count": 1
 325 |         }
 326 |       ]
 327 |     },
 328 |     {
 329 |       "metadata": {
 330 |         "id": "FqFrPmgJDhvM",
 331 |         "colab_type": "code",
 332 |         "colab": {
 333 |           "base_uri": "https://localhost:8080/",
 334 |           "height": 198
 335 |         },
 336 |         "outputId": "46b16944-2a4a-4b53-b970-5a96a9b7d867"
 337 |       },
 338 |       "cell_type": "code",
 339 |       "source": [
 340 |         "a_renomear = {\n",
 341 |         "    'mileage_per_year' : 'milhas_por_ano',\n",
 342 |         "    'model_year' : 'ano_do_modelo',\n",
 343 |         "    'price' : 'preco',\n",
 344 |         "    'sold' : 'vendido'\n",
 345 |         "}\n",
 346 |         "dados = dados.rename(columns=a_renomear)\n",
 347 |         "dados.head()"
 348 |       ],
 349 |       "execution_count": 2,
 350 |       "outputs": [
 351 |         {
 352 |           "output_type": "execute_result",
 353 |           "data": {
 354 |             "text/html": [
 355 |               "<div>\n",
 356 |               "<style scoped>\n",
 357 |               "    .dataframe tbody tr th:only-of-type {\n",
 358 |               "        vertical-align: middle;\n",
 359 |               "    }\n",
 360 |               "\n",
 361 |               "    .dataframe tbody tr th {\n",
 362 |               "        vertical-align: top;\n",
 363 |               "    }\n",
 364 |               "\n",
 365 |               "    .dataframe thead th {\n",
 366 |               "        text-align: right;\n",
 367 |               "    }\n",
 368 |               "</style>\n",
 369 |               "<table border=\"1\" class=\"dataframe\">\n",
 370 |               "  <thead>\n",
 371 |               "    <tr style=\"text-align: right;\">\n",
 372 |               "      <th></th>\n",
 373 |               "      <th>Unnamed: 0</th>\n",
 374 |               "      <th>milhas_por_ano</th>\n",
 375 |               "      <th>ano_do_modelo</th>\n",
 376 |               "      <th>preco</th>\n",
 377 |               "      <th>vendido</th>\n",
 378 |               "    </tr>\n",
 379 |               "  </thead>\n",
 380 |               "  <tbody>\n",
 381 |               "    <tr>\n",
 382 |               "      <th>0</th>\n",
 383 |               "      <td>0</td>\n",
 384 |               "      <td>21801</td>\n",
 385 |               "      <td>2000</td>\n",
 386 |               "      <td>30941.02</td>\n",
 387 |               "      <td>yes</td>\n",
 388 |               "    </tr>\n",
 389 |               "    <tr>\n",
 390 |               "      <th>1</th>\n",
 391 |               "      <td>1</td>\n",
 392 |               "      <td>7843</td>\n",
 393 |               "      <td>1998</td>\n",
 394 |               "      <td>40557.96</td>\n",
 395 |               "      <td>yes</td>\n",
 396 |               "    </tr>\n",
 397 |               "    <tr>\n",
 398 |               "      <th>2</th>\n",
 399 |               "      <td>2</td>\n",
 400 |               "      <td>7109</td>\n",
 401 |               "      <td>2006</td>\n",
 402 |               "      <td>89627.50</td>\n",
 403 |               "      <td>no</td>\n",
 404 |               "    </tr>\n",
 405 |               "    <tr>\n",
 406 |               "      <th>3</th>\n",
 407 |               "      <td>3</td>\n",
 408 |               "      <td>26823</td>\n",
 409 |               "      <td>2015</td>\n",
 410 |               "      <td>95276.14</td>\n",
 411 |               "      <td>no</td>\n",
 412 |               "    </tr>\n",
 413 |               "    <tr>\n",
 414 |               "      <th>4</th>\n",
 415 |               "      <td>4</td>\n",
 416 |               "      <td>7935</td>\n",
 417 |               "      <td>2014</td>\n",
 418 |               "      <td>117384.68</td>\n",
 419 |               "      <td>yes</td>\n",
 420 |               "    </tr>\n",
 421 |               "  </tbody>\n",
 422 |               "</table>\n",
 423 |               "</div>"
 424 |             ],
 425 |             "text/plain": [
 426 |               "   Unnamed: 0  milhas_por_ano  ano_do_modelo      preco vendido\n",
 427 |               "0           0           21801           2000   30941.02     yes\n",
 428 |               "1           1            7843           1998   40557.96     yes\n",
 429 |               "2           2            7109           2006   89627.50      no\n",
 430 |               "3           3           26823           2015   95276.14      no\n",
 431 |               "4           4            7935           2014  117384.68     yes"
 432 |             ]
 433 |           },
 434 |           "metadata": {
 435 |             "tags": []
 436 |           },
 437 |           "execution_count": 2
 438 |         }
 439 |       ]
 440 |     },
 441 |     {
 442 |       "metadata": {
 443 |         "id": "J31fUGbVEKpW",
 444 |         "colab_type": "code",
 445 |         "colab": {
 446 |           "base_uri": "https://localhost:8080/",
 447 |           "height": 198
 448 |         },
 449 |         "outputId": "ad0a651f-320a-4f5e-8e2c-706bf6937fbb"
 450 |       },
 451 |       "cell_type": "code",
 452 |       "source": [
 453 |         "a_trocar = {\n",
 454 |         "    'no' : 0,\n",
 455 |         "    'yes' : 1\n",
 456 |         "}\n",
 457 |         "dados.vendido = dados.vendido.map(a_trocar)\n",
 458 |         "dados.head()"
 459 |       ],
 460 |       "execution_count": 3,
 461 |       "outputs": [
 462 |         {
 463 |           "output_type": "execute_result",
 464 |           "data": {
 465 |             "text/html": [
 466 |               "<div>\n",
 467 |               "<style scoped>\n",
 468 |               "    .dataframe tbody tr th:only-of-type {\n",
 469 |               "        vertical-align: middle;\n",
 470 |               "    }\n",
 471 |               "\n",
 472 |               "    .dataframe tbody tr th {\n",
 473 |               "        vertical-align: top;\n",
 474 |               "    }\n",
 475 |               "\n",
 476 |               "    .dataframe thead th {\n",
 477 |               "        text-align: right;\n",
 478 |               "    }\n",
 479 |               "</style>\n",
 480 |               "<table border=\"1\" class=\"dataframe\">\n",
 481 |               "  <thead>\n",
 482 |               "    <tr style=\"text-align: right;\">\n",
 483 |               "      <th></th>\n",
 484 |               "      <th>Unnamed: 0</th>\n",
 485 |               "      <th>milhas_por_ano</th>\n",
 486 |               "      <th>ano_do_modelo</th>\n",
 487 |               "      <th>preco</th>\n",
 488 |               "      <th>vendido</th>\n",
 489 |               "    </tr>\n",
 490 |               "  </thead>\n",
 491 |               "  <tbody>\n",
 492 |               "    <tr>\n",
 493 |               "      <th>0</th>\n",
 494 |               "      <td>0</td>\n",
 495 |               "      <td>21801</td>\n",
 496 |               "      <td>2000</td>\n",
 497 |               "      <td>30941.02</td>\n",
 498 |               "      <td>1</td>\n",
 499 |               "    </tr>\n",
 500 |               "    <tr>\n",
 501 |               "      <th>1</th>\n",
 502 |               "      <td>1</td>\n",
 503 |               "      <td>7843</td>\n",
 504 |               "      <td>1998</td>\n",
 505 |               "      <td>40557.96</td>\n",
 506 |               "      <td>1</td>\n",
 507 |               "    </tr>\n",
 508 |               "    <tr>\n",
 509 |               "      <th>2</th>\n",
 510 |               "      <td>2</td>\n",
 511 |               "      <td>7109</td>\n",
 512 |               "      <td>2006</td>\n",
 513 |               "      <td>89627.50</td>\n",
 514 |               "      <td>0</td>\n",
 515 |               "    </tr>\n",
 516 |               "    <tr>\n",
 517 |               "      <th>3</th>\n",
 518 |               "      <td>3</td>\n",
 519 |               "      <td>26823</td>\n",
 520 |               "      <td>2015</td>\n",
 521 |               "      <td>95276.14</td>\n",
 522 |               "      <td>0</td>\n",
 523 |               "    </tr>\n",
 524 |               "    <tr>\n",
 525 |               "      <th>4</th>\n",
 526 |               "      <td>4</td>\n",
 527 |               "      <td>7935</td>\n",
 528 |               "      <td>2014</td>\n",
 529 |               "      <td>117384.68</td>\n",
 530 |               "      <td>1</td>\n",
 531 |               "    </tr>\n",
 532 |               "  </tbody>\n",
 533 |               "</table>\n",
 534 |               "</div>"
 535 |             ],
 536 |             "text/plain": [
 537 |               "   Unnamed: 0  milhas_por_ano  ano_do_modelo      preco  vendido\n",
 538 |               "0           0           21801           2000   30941.02        1\n",
 539 |               "1           1            7843           1998   40557.96        1\n",
 540 |               "2           2            7109           2006   89627.50        0\n",
 541 |               "3           3           26823           2015   95276.14        0\n",
 542 |               "4           4            7935           2014  117384.68        1"
 543 |             ]
 544 |           },
 545 |           "metadata": {
 546 |             "tags": []
 547 |           },
 548 |           "execution_count": 3
 549 |         }
 550 |       ]
 551 |     },
 552 |     {
 553 |       "metadata": {
 554 |         "id": "tZFog8O9EXYD",
 555 |         "colab_type": "code",
 556 |         "colab": {
 557 |           "base_uri": "https://localhost:8080/",
 558 |           "height": 198
 559 |         },
 560 |         "outputId": "5792a2f8-f777-4237-e813-697f60309951"
 561 |       },
 562 |       "cell_type": "code",
 563 |       "source": [
 564 |         "from datetime import datetime\n",
 565 |         "\n",
 566 |         "ano_atual = datetime.today().year\n",
 567 |         "dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo\n",
 568 |         "dados.head()"
 569 |       ],
 570 |       "execution_count": 4,
 571 |       "outputs": [
 572 |         {
 573 |           "output_type": "execute_result",
 574 |           "data": {
 575 |             "text/html": [
 576 |               "<div>\n",
 577 |               "<style scoped>\n",
 578 |               "    .dataframe tbody tr th:only-of-type {\n",
 579 |               "        vertical-align: middle;\n",
 580 |               "    }\n",
 581 |               "\n",
 582 |               "    .dataframe tbody tr th {\n",
 583 |               "        vertical-align: top;\n",
 584 |               "    }\n",
 585 |               "\n",
 586 |               "    .dataframe thead th {\n",
 587 |               "        text-align: right;\n",
 588 |               "    }\n",
 589 |               "</style>\n",
 590 |               "<table border=\"1\" class=\"dataframe\">\n",
 591 |               "  <thead>\n",
 592 |               "    <tr style=\"text-align: right;\">\n",
 593 |               "      <th></th>\n",
 594 |               "      <th>Unnamed: 0</th>\n",
 595 |               "      <th>milhas_por_ano</th>\n",
 596 |               "      <th>ano_do_modelo</th>\n",
 597 |               "      <th>preco</th>\n",
 598 |               "      <th>vendido</th>\n",
 599 |               "      <th>idade_do_modelo</th>\n",
 600 |               "    </tr>\n",
 601 |               "  </thead>\n",
 602 |               "  <tbody>\n",
 603 |               "    <tr>\n",
 604 |               "      <th>0</th>\n",
 605 |               "      <td>0</td>\n",
 606 |               "      <td>21801</td>\n",
 607 |               "      <td>2000</td>\n",
 608 |               "      <td>30941.02</td>\n",
 609 |               "      <td>1</td>\n",
 610 |               "      <td>18</td>\n",
 611 |               "    </tr>\n",
 612 |               "    <tr>\n",
 613 |               "      <th>1</th>\n",
 614 |               "      <td>1</td>\n",
 615 |               "      <td>7843</td>\n",
 616 |               "      <td>1998</td>\n",
 617 |               "      <td>40557.96</td>\n",
 618 |               "      <td>1</td>\n",
 619 |               "      <td>20</td>\n",
 620 |               "    </tr>\n",
 621 |               "    <tr>\n",
 622 |               "      <th>2</th>\n",
 623 |               "      <td>2</td>\n",
 624 |               "      <td>7109</td>\n",
 625 |               "      <td>2006</td>\n",
 626 |               "      <td>89627.50</td>\n",
 627 |               "      <td>0</td>\n",
 628 |               "      <td>12</td>\n",
 629 |               "    </tr>\n",
 630 |               "    <tr>\n",
 631 |               "      <th>3</th>\n",
 632 |               "      <td>3</td>\n",
 633 |               "      <td>26823</td>\n",
 634 |               "      <td>2015</td>\n",
 635 |               "      <td>95276.14</td>\n",
 636 |               "      <td>0</td>\n",
 637 |               "      <td>3</td>\n",
 638 |               "    </tr>\n",
 639 |               "    <tr>\n",
 640 |               "      <th>4</th>\n",
 641 |               "      <td>4</td>\n",
 642 |               "      <td>7935</td>\n",
 643 |               "      <td>2014</td>\n",
 644 |               "      <td>117384.68</td>\n",
 645 |               "      <td>1</td>\n",
 646 |               "      <td>4</td>\n",
 647 |               "    </tr>\n",
 648 |               "  </tbody>\n",
 649 |               "</table>\n",
 650 |               "</div>"
 651 |             ],
 652 |             "text/plain": [
 653 |               "   Unnamed: 0  milhas_por_ano  ano_do_modelo      preco  vendido  \\\n",
 654 |               "0           0           21801           2000   30941.02        1   \n",
 655 |               "1           1            7843           1998   40557.96        1   \n",
 656 |               "2           2            7109           2006   89627.50        0   \n",
 657 |               "3           3           26823           2015   95276.14        0   \n",
 658 |               "4           4            7935           2014  117384.68        1   \n",
 659 |               "\n",
 660 |               "   idade_do_modelo  \n",
 661 |               "0               18  \n",
 662 |               "1               20  \n",
 663 |               "2               12  \n",
 664 |               "3                3  \n",
 665 |               "4                4  "
 666 |             ]
 667 |           },
 668 |           "metadata": {
 669 |             "tags": []
 670 |           },
 671 |           "execution_count": 4
 672 |         }
 673 |       ]
 674 |     },
 675 |     {
 676 |       "metadata": {
 677 |         "id": "3wWWgxhcFbR9",
 678 |         "colab_type": "code",
 679 |         "colab": {
 680 |           "base_uri": "https://localhost:8080/",
 681 |           "height": 198
 682 |         },
 683 |         "outputId": "b4c147dc-c2e5-4bfd-e78f-5a34f72770d6"
 684 |       },
 685 |       "cell_type": "code",
 686 |       "source": [
 687 |         "dados['km_por_ano'] = dados.milhas_por_ano * 1.60934\n",
 688 |         "dados.head()"
 689 |       ],
 690 |       "execution_count": 5,
 691 |       "outputs": [
 692 |         {
 693 |           "output_type": "execute_result",
 694 |           "data": {
 695 |             "text/html": [
 696 |               "<div>\n",
 697 |               "<style scoped>\n",
 698 |               "    .dataframe tbody tr th:only-of-type {\n",
 699 |               "        vertical-align: middle;\n",
 700 |               "    }\n",
 701 |               "\n",
 702 |               "    .dataframe tbody tr th {\n",
 703 |               "        vertical-align: top;\n",
 704 |               "    }\n",
 705 |               "\n",
 706 |               "    .dataframe thead th {\n",
 707 |               "        text-align: right;\n",
 708 |               "    }\n",
 709 |               "</style>\n",
 710 |               "<table border=\"1\" class=\"dataframe\">\n",
 711 |               "  <thead>\n",
 712 |               "    <tr style=\"text-align: right;\">\n",
 713 |               "      <th></th>\n",
 714 |               "      <th>Unnamed: 0</th>\n",
 715 |               "      <th>milhas_por_ano</th>\n",
 716 |               "      <th>ano_do_modelo</th>\n",
 717 |               "      <th>preco</th>\n",
 718 |               "      <th>vendido</th>\n",
 719 |               "      <th>idade_do_modelo</th>\n",
 720 |               "      <th>km_por_ano</th>\n",
 721 |               "    </tr>\n",
 722 |               "  </thead>\n",
 723 |               "  <tbody>\n",
 724 |               "    <tr>\n",
 725 |               "      <th>0</th>\n",
 726 |               "      <td>0</td>\n",
 727 |               "      <td>21801</td>\n",
 728 |               "      <td>2000</td>\n",
 729 |               "      <td>30941.02</td>\n",
 730 |               "      <td>1</td>\n",
 731 |               "      <td>18</td>\n",
 732 |               "      <td>35085.22134</td>\n",
 733 |               "    </tr>\n",
 734 |               "    <tr>\n",
 735 |               "      <th>1</th>\n",
 736 |               "      <td>1</td>\n",
 737 |               "      <td>7843</td>\n",
 738 |               "      <td>1998</td>\n",
 739 |               "      <td>40557.96</td>\n",
 740 |               "      <td>1</td>\n",
 741 |               "      <td>20</td>\n",
 742 |               "      <td>12622.05362</td>\n",
 743 |               "    </tr>\n",
 744 |               "    <tr>\n",
 745 |               "      <th>2</th>\n",
 746 |               "      <td>2</td>\n",
 747 |               "      <td>7109</td>\n",
 748 |               "      <td>2006</td>\n",
 749 |               "      <td>89627.50</td>\n",
 750 |               "      <td>0</td>\n",
 751 |               "      <td>12</td>\n",
 752 |               "      <td>11440.79806</td>\n",
 753 |               "    </tr>\n",
 754 |               "    <tr>\n",
 755 |               "      <th>3</th>\n",
 756 |               "      <td>3</td>\n",
 757 |               "      <td>26823</td>\n",
 758 |               "      <td>2015</td>\n",
 759 |               "      <td>95276.14</td>\n",
 760 |               "      <td>0</td>\n",
 761 |               "      <td>3</td>\n",
 762 |               "      <td>43167.32682</td>\n",
 763 |               "    </tr>\n",
 764 |               "    <tr>\n",
 765 |               "      <th>4</th>\n",
 766 |               "      <td>4</td>\n",
 767 |               "      <td>7935</td>\n",
 768 |               "      <td>2014</td>\n",
 769 |               "      <td>117384.68</td>\n",
 770 |               "      <td>1</td>\n",
 771 |               "      <td>4</td>\n",
 772 |               "      <td>12770.11290</td>\n",
 773 |               "    </tr>\n",
 774 |               "  </tbody>\n",
 775 |               "</table>\n",
 776 |               "</div>"
 777 |             ],
 778 |             "text/plain": [
 779 |               "   Unnamed: 0  milhas_por_ano  ano_do_modelo      preco  vendido  \\\n",
 780 |               "0           0           21801           2000   30941.02        1   \n",
 781 |               "1           1            7843           1998   40557.96        1   \n",
 782 |               "2           2            7109           2006   89627.50        0   \n",
 783 |               "3           3           26823           2015   95276.14        0   \n",
 784 |               "4           4            7935           2014  117384.68        1   \n",
 785 |               "\n",
 786 |               "   idade_do_modelo   km_por_ano  \n",
 787 |               "0               18  35085.22134  \n",
 788 |               "1               20  12622.05362  \n",
 789 |               "2               12  11440.79806  \n",
 790 |               "3                3  43167.32682  \n",
 791 |               "4                4  12770.11290  "
 792 |             ]
 793 |           },
 794 |           "metadata": {
 795 |             "tags": []
 796 |           },
 797 |           "execution_count": 5
 798 |         }
 799 |       ]
 800 |     },
 801 |     {
 802 |       "metadata": {
 803 |         "id": "MfjQNKlyFo2S",
 804 |         "colab_type": "code",
 805 |         "colab": {
 806 |           "base_uri": "https://localhost:8080/",
 807 |           "height": 198
 808 |         },
 809 |         "outputId": "7588f6a5-ff37-4996-91a5-d9ee4b5543d7"
 810 |       },
 811 |       "cell_type": "code",
 812 |       "source": [
 813 |         "dados = dados.drop(columns = [\"Unnamed: 0\", \"milhas_por_ano\",\"ano_do_modelo\"], axis=1)\n",
 814 |         "dados.head()"
 815 |       ],
 816 |       "execution_count": 6,
 817 |       "outputs": [
 818 |         {
 819 |           "output_type": "execute_result",
 820 |           "data": {
 821 |             "text/html": [
 822 |               "<div>\n",
 823 |               "<style scoped>\n",
 824 |               "    .dataframe tbody tr th:only-of-type {\n",
 825 |               "        vertical-align: middle;\n",
 826 |               "    }\n",
 827 |               "\n",
 828 |               "    .dataframe tbody tr th {\n",
 829 |               "        vertical-align: top;\n",
 830 |               "    }\n",
 831 |               "\n",
 832 |               "    .dataframe thead th {\n",
 833 |               "        text-align: right;\n",
 834 |               "    }\n",
 835 |               "</style>\n",
 836 |               "<table border=\"1\" class=\"dataframe\">\n",
 837 |               "  <thead>\n",
 838 |               "    <tr style=\"text-align: right;\">\n",
 839 |               "      <th></th>\n",
 840 |               "      <th>preco</th>\n",
 841 |               "      <th>vendido</th>\n",
 842 |               "      <th>idade_do_modelo</th>\n",
 843 |               "      <th>km_por_ano</th>\n",
 844 |               "    </tr>\n",
 845 |               "  </thead>\n",
 846 |               "  <tbody>\n",
 847 |               "    <tr>\n",
 848 |               "      <th>0</th>\n",
 849 |               "      <td>30941.02</td>\n",
 850 |               "      <td>1</td>\n",
 851 |               "      <td>18</td>\n",
 852 |               "      <td>35085.22134</td>\n",
 853 |               "    </tr>\n",
 854 |               "    <tr>\n",
 855 |               "      <th>1</th>\n",
 856 |               "      <td>40557.96</td>\n",
 857 |               "      <td>1</td>\n",
 858 |               "      <td>20</td>\n",
 859 |               "      <td>12622.05362</td>\n",
 860 |               "    </tr>\n",
 861 |               "    <tr>\n",
 862 |               "      <th>2</th>\n",
 863 |               "      <td>89627.50</td>\n",
 864 |               "      <td>0</td>\n",
 865 |               "      <td>12</td>\n",
 866 |               "      <td>11440.79806</td>\n",
 867 |               "    </tr>\n",
 868 |               "    <tr>\n",
 869 |               "      <th>3</th>\n",
 870 |               "      <td>95276.14</td>\n",
 871 |               "      <td>0</td>\n",
 872 |               "      <td>3</td>\n",
 873 |               "      <td>43167.32682</td>\n",
 874 |               "    </tr>\n",
 875 |               "    <tr>\n",
 876 |               "      <th>4</th>\n",
 877 |               "      <td>117384.68</td>\n",
 878 |               "      <td>1</td>\n",
 879 |               "      <td>4</td>\n",
 880 |               "      <td>12770.11290</td>\n",
 881 |               "    </tr>\n",
 882 |               "  </tbody>\n",
 883 |               "</table>\n",
 884 |               "</div>"
 885 |             ],
 886 |             "text/plain": [
 887 |               "       preco  vendido  idade_do_modelo   km_por_ano\n",
 888 |               "0   30941.02        1               18  35085.22134\n",
 889 |               "1   40557.96        1               20  12622.05362\n",
 890 |               "2   89627.50        0               12  11440.79806\n",
 891 |               "3   95276.14        0                3  43167.32682\n",
 892 |               "4  117384.68        1                4  12770.11290"
 893 |             ]
 894 |           },
 895 |           "metadata": {
 896 |             "tags": []
 897 |           },
 898 |           "execution_count": 6
 899 |         }
 900 |       ]
 901 |     },
 902 |     {
 903 |       "metadata": {
 904 |         "id": "E3xebM4FF0Tc",
 905 |         "colab_type": "code",
 906 |         "colab": {
 907 |           "base_uri": "https://localhost:8080/",
 908 |           "height": 52
 909 |         },
 910 |         "outputId": "7d48ae66-beb0-4884-cbfa-c81c9ce95eb1"
 911 |       },
 912 |       "cell_type": "code",
 913 |       "source": [
 914 |         "import numpy as np\n",
 915 |         "from sklearn.model_selection import train_test_split\n",
 916 |         "from sklearn.svm import LinearSVC\n",
 917 |         "from sklearn.metrics import accuracy_score\n",
 918 |         "\n",
 919 |         "x = dados[[\"preco\", \"idade_do_modelo\",\"km_por_ano\"]]\n",
 920 |         "y = dados[\"vendido\"]\n",
 921 |         "\n",
 922 |         "SEED = 5\n",
 923 |         "np.random.seed(SEED)\n",
 924 |         "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n",
 925 |         "                                                         stratify = y)\n",
 926 |         "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
 927 |         "\n",
 928 |         "modelo = LinearSVC()\n",
 929 |         "modelo.fit(treino_x, treino_y)\n",
 930 |         "previsoes = modelo.predict(teste_x)\n",
 931 |         "\n",
 932 |         "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
 933 |         "print(\"A acurácia foi %.2f%%\" % acuracia)"
 934 |       ],
 935 |       "execution_count": 17,
 936 |       "outputs": [
 937 |         {
 938 |           "output_type": "stream",
 939 |           "text": [
 940 |             "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n",
 941 |             "A acurácia foi 57.88%\n"
 942 |           ],
 943 |           "name": "stdout"
 944 |         }
 945 |       ]
 946 |     },
 947 |     {
 948 |       "metadata": {
 949 |         "id": "G2ZFWoPkGONL",
 950 |         "colab_type": "code",
 951 |         "colab": {
 952 |           "base_uri": "https://localhost:8080/",
 953 |           "height": 35
 954 |         },
 955 |         "outputId": "f86fecd4-0528-420d-face-b3ab72af6a23"
 956 |       },
 957 |       "cell_type": "code",
 958 |       "source": [
 959 |         "from sklearn.dummy import DummyClassifier\n",
 960 |         "\n",
 961 |         "dummy_stratified = DummyClassifier()\n",
 962 |         "dummy_stratified.fit(treino_x, treino_y)\n",
 963 |         "acuracia = dummy_stratified.score(teste_x, teste_y) * 100\n",
 964 |         "\n",
 965 |         "print(\"A acurácia do dummy stratified foi %.2f%%\" % acuracia)"
 966 |       ],
 967 |       "execution_count": 18,
 968 |       "outputs": [
 969 |         {
 970 |           "output_type": "stream",
 971 |           "text": [
 972 |             "A acurácia do dummy stratified foi 52.44%\n"
 973 |           ],
 974 |           "name": "stdout"
 975 |         }
 976 |       ]
 977 |     },
 978 |     {
 979 |       "metadata": {
 980 |         "id": "469ZKbHJIHL0",
 981 |         "colab_type": "code",
 982 |         "colab": {
 983 |           "base_uri": "https://localhost:8080/",
 984 |           "height": 35
 985 |         },
 986 |         "outputId": "7348330f-4752-4e1e-8f27-2420acf2ad14"
 987 |       },
 988 |       "cell_type": "code",
 989 |       "source": [
 990 |         "from sklearn.dummy import DummyClassifier\n",
 991 |         "\n",
 992 |         "dummy_mostfrequent = DummyClassifier()\n",
 993 |         "dummy_mostfrequent.fit(treino_x, treino_y)\n",
 994 |         "acuracia = dummy_mostfrequent.score(teste_x, teste_y) * 100\n",
 995 |         "\n",
 996 |         "print(\"A acurácia do dummy mostfrequent foi %.2f%%\" % acuracia)"
 997 |       ],
 998 |       "execution_count": 19,
 999 |       "outputs": [
1000 |         {
1001 |           "output_type": "stream",
1002 |           "text": [
1003 |             "A acurácia do dummy mostfrequent foi 50.68%\n"
1004 |           ],
1005 |           "name": "stdout"
1006 |         }
1007 |       ]
1008 |     },
1009 |     {
1010 |       "metadata": {
1011 |         "id": "S_5AfFerIsH_",
1012 |         "colab_type": "code",
1013 |         "colab": {
1014 |           "base_uri": "https://localhost:8080/",
1015 |           "height": 52
1016 |         },
1017 |         "outputId": "569a686d-9b86-459f-d30d-5ce738bb6897"
1018 |       },
1019 |       "cell_type": "code",
1020 |       "source": [
1021 |         "from sklearn.preprocessing import StandardScaler\n",
1022 |         "from sklearn.model_selection import train_test_split\n",
1023 |         "from sklearn.svm import SVC\n",
1024 |         "from sklearn.metrics import accuracy_score\n",
1025 |         "\n",
1026 |         "SEED = 5\n",
1027 |         "np.random.seed(SEED)\n",
1028 |         "raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n",
1029 |         "                                                         stratify = y)\n",
1030 |         "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
1031 |         "\n",
1032 |         "scaler = StandardScaler()\n",
1033 |         "scaler.fit(raw_treino_x)\n",
1034 |         "treino_x = scaler.transform(raw_treino_x)\n",
1035 |         "teste_x = scaler.transform(raw_teste_x)\n",
1036 |         "\n",
1037 |         "modelo = SVC()\n",
1038 |         "modelo.fit(treino_x, treino_y)\n",
1039 |         "previsoes = modelo.predict(teste_x)\n",
1040 |         "\n",
1041 |         "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
1042 |         "print(\"A acurácia foi %.2f%%\" % acuracia)\n"
1043 |       ],
1044 |       "execution_count": 20,
1045 |       "outputs": [
1046 |         {
1047 |           "output_type": "stream",
1048 |           "text": [
1049 |             "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n",
1050 |             "A acurácia foi 77.48%\n"
1051 |           ],
1052 |           "name": "stdout"
1053 |         }
1054 |       ]
1055 |     },
1056 |     {
1057 |       "metadata": {
1058 |         "id": "-1YsD-whJuGR",
1059 |         "colab_type": "code",
1060 |         "colab": {
1061 |           "base_uri": "https://localhost:8080/",
1062 |           "height": 52
1063 |         },
1064 |         "outputId": "5e0c7f00-98ce-4b32-bbc6-908dde4cc2f8"
1065 |       },
1066 |       "cell_type": "code",
1067 |       "source": [
1068 |         "from sklearn.preprocessing import StandardScaler\n",
1069 |         "from sklearn.model_selection import train_test_split\n",
1070 |         "from sklearn.tree import DecisionTreeClassifier\n",
1071 |         "from sklearn.metrics import accuracy_score\n",
1072 |         "\n",
1073 |         "SEED = 5\n",
1074 |         "np.random.seed(SEED)\n",
1075 |         "raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n",
1076 |         "                                                         stratify = y)\n",
1077 |         "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
1078 |         "\n",
1079 |         "modelo = DecisionTreeClassifier(max_depth=3)\n",
1080 |         "modelo.fit(raw_treino_x, treino_y)\n",
1081 |         "previsoes = modelo.predict(raw_teste_x)\n",
1082 |         "\n",
1083 |         "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
1084 |         "print(\"A acurácia foi %.2f%%\" % acuracia)\n"
1085 |       ],
1086 |       "execution_count": 37,
1087 |       "outputs": [
1088 |         {
1089 |           "output_type": "stream",
1090 |           "text": [
1091 |             "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n",
1092 |             "A acurácia foi 79.28%\n"
1093 |           ],
1094 |           "name": "stdout"
1095 |         }
1096 |       ]
1097 |     },
1098 |     {
1099 |       "metadata": {
1100 |         "id": "-8O83jbOLctO",
1101 |         "colab_type": "code",
1102 |         "colab": {
1103 |           "base_uri": "https://localhost:8080/",
1104 |           "height": 619
1105 |         },
1106 |         "outputId": "8ddc6972-e0a7-40d3-adba-0877358ab355"
1107 |       },
1108 |       "cell_type": "code",
1109 |       "source": [
1110 |         "from sklearn.tree import export_graphviz\n",
1111 |         "import graphviz\n",
1112 |         "\n",
1113 |         "features = x.columns\n",
1114 |         "dot_data = export_graphviz(modelo, out_file=None,\n",
1115 |         "                           filled = True, rounded = True,\n",
1116 |         "                           feature_names = features,\n",
1117 |         "                          class_names = [\"não\", \"sim\"])\n",
1118 |         "grafico = graphviz.Source(dot_data)\n",
1119 |         "grafico"
1120 |       ],
1121 |       "execution_count": 38,
1122 |       "outputs": [
1123 |         {
1124 |           "output_type": "execute_result",
1125 |           "data": {
1126 |             "text/plain": [
1127 |               "<graphviz.files.Source at 0x7f66cd11fa20>"
1128 |             ],
1129 |             "image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 2.40.1 (20161225.0304)\n -->\n<!-- Title: Tree Pages: 1 -->\n<svg width=\"894pt\" height=\"433pt\"\n viewBox=\"0.00 0.00 894.00 433.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 429)\">\n<title>Tree</title>\n<polygon fill=\"#ffffff\" stroke=\"transparent\" points=\"-4,4 -4,-429 890,-429 890,4 -4,4\"/>\n<!-- 0 -->\n<g id=\"node1\" class=\"node\">\n<title>0</title>\n<path fill=\"#399de5\" fill-opacity=\"0.274510\" stroke=\"#000000\" d=\"M372.5,-425C372.5,-425 247.5,-425 247.5,-425 241.5,-425 235.5,-419 235.5,-413 235.5,-413 235.5,-354 235.5,-354 235.5,-348 241.5,-342 247.5,-342 247.5,-342 372.5,-342 372.5,-342 378.5,-342 384.5,-348 384.5,-354 384.5,-354 384.5,-413 384.5,-413 384.5,-419 378.5,-425 372.5,-425\"/>\n<text text-anchor=\"middle\" x=\"310\" y=\"-409.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">preco &lt;= 59999.074</text>\n<text text-anchor=\"middle\" x=\"310\" y=\"-394.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">gini = 0.487</text>\n<text text-anchor=\"middle\" x=\"310\" y=\"-379.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">samples = 7500</text>\n<text text-anchor=\"middle\" x=\"310\" y=\"-364.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">value = [3150, 4350]</text>\n<text text-anchor=\"middle\" x=\"310\" y=\"-349.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">class = sim</text>\n</g>\n<!-- 1 -->\n<g id=\"node2\" class=\"node\">\n<title>1</title>\n<path fill=\"#399de5\" fill-opacity=\"0.870588\" stroke=\"#000000\" d=\"M277.5,-306C277.5,-306 154.5,-306 154.5,-306 148.5,-306 142.5,-300 142.5,-294 142.5,-294 142.5,-235 142.5,-235 142.5,-229 148.5,-223 154.5,-223 154.5,-223 277.5,-223 277.5,-223 283.5,-223 289.5,-229 289.5,-235 289.5,-235 289.5,-294 289.5,-294 289.5,-300 283.5,-306 277.5,-306\"/>\n<text text-anchor=\"middle\" x=\"216\" y=\"-290.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">preco &lt;= 40070.156</text>\n<text text-anchor=\"middle\" x=\"216\" y=\"-275.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">gini = 0.202</text>\n<text text-anchor=\"middle\" x=\"216\" y=\"-260.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">samples = 3248</text>\n<text text-anchor=\"middle\" x=\"216\" y=\"-245.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">value = [370, 2878]</text>\n<text text-anchor=\"middle\" x=\"216\" y=\"-230.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">class = sim</text>\n</g>\n<!-- 0&#45;&gt;1 -->\n<g id=\"edge1\" class=\"edge\">\n<title>0&#45;&gt;1</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M277.1234,-341.8796C270.0803,-332.9633 262.5707,-323.4565 255.3126,-314.268\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"257.9621,-311.9757 249.017,-306.2981 252.4691,-316.3147 257.9621,-311.9757\"/>\n<text text-anchor=\"middle\" x=\"246.1027\" y=\"-327.4276\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">True</text>\n</g>\n<!-- 6 -->\n<g id=\"node7\" class=\"node\">\n<title>6</title>\n<path fill=\"#e58139\" fill-opacity=\"0.470588\" stroke=\"#000000\" d=\"M540,-306C540,-306 372,-306 372,-306 366,-306 360,-300 360,-294 360,-294 360,-235 360,-235 360,-229 366,-223 372,-223 372,-223 540,-223 540,-223 546,-223 552,-229 552,-235 552,-235 552,-294 552,-294 552,-300 546,-306 540,-306\"/>\n<text text-anchor=\"middle\" x=\"456\" y=\"-290.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">km_por_ano &lt;= 24112.742</text>\n<text text-anchor=\"middle\" x=\"456\" y=\"-275.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">gini = 0.453</text>\n<text text-anchor=\"middle\" x=\"456\" y=\"-260.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">samples = 4252</text>\n<text text-anchor=\"middle\" x=\"456\" y=\"-245.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">value = [2780, 1472]</text>\n<text text-anchor=\"middle\" x=\"456\" y=\"-230.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">class = não</text>\n</g>\n<!-- 0&#45;&gt;6 -->\n<g id=\"edge6\" class=\"edge\">\n<title>0&#45;&gt;6</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M361.0637,-341.8796C372.5555,-332.513 384.8468,-322.4948 396.645,-312.8784\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"399.1782,-315.329 404.7183,-306.2981 394.7556,-309.903 399.1782,-315.329\"/>\n<text text-anchor=\"middle\" x=\"402.1843\" y=\"-327.4693\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">False</text>\n</g>\n<!-- 2 -->\n<g id=\"node3\" class=\"node\">\n<title>2</title>\n<path fill=\"#399de5\" stroke=\"#000000\" d=\"M112,-179.5C112,-179.5 12,-179.5 12,-179.5 6,-179.5 0,-173.5 0,-167.5 0,-167.5 0,-123.5 0,-123.5 0,-117.5 6,-111.5 12,-111.5 12,-111.5 112,-111.5 112,-111.5 118,-111.5 124,-117.5 124,-123.5 124,-123.5 124,-167.5 124,-167.5 124,-173.5 118,-179.5 112,-179.5\"/>\n<text text-anchor=\"middle\" x=\"62\" y=\"-164.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">gini = 0.0</text>\n<text text-anchor=\"middle\" x=\"62\" y=\"-149.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">samples = 1441</text>\n<text text-anchor=\"middle\" x=\"62\" y=\"-134.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">value = [0, 1441]</text>\n<text text-anchor=\"middle\" x=\"62\" y=\"-119.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">class = sim</text>\n</g>\n<!-- 1&#45;&gt;2 -->\n<g id=\"edge2\" class=\"edge\">\n<title>1&#45;&gt;2</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M162.1383,-222.8796C146.7696,-211.0038 130.0453,-198.0804 114.7441,-186.2568\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"116.4605,-183.1599 106.4076,-179.8149 112.1804,-188.699 116.4605,-183.1599\"/>\n</g>\n<!-- 3 -->\n<g id=\"node4\" class=\"node\">\n<title>3</title>\n<path fill=\"#399de5\" fill-opacity=\"0.741176\" stroke=\"#000000\" d=\"M277.5,-187C277.5,-187 154.5,-187 154.5,-187 148.5,-187 142.5,-181 142.5,-175 142.5,-175 142.5,-116 142.5,-116 142.5,-110 148.5,-104 154.5,-104 154.5,-104 277.5,-104 277.5,-104 283.5,-104 289.5,-110 289.5,-116 289.5,-116 289.5,-175 289.5,-175 289.5,-181 283.5,-187 277.5,-187\"/>\n<text text-anchor=\"middle\" x=\"216\" y=\"-171.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">preco &lt;= 40723.648</text>\n<text text-anchor=\"middle\" x=\"216\" y=\"-156.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">gini = 0.326</text>\n<text text-anchor=\"middle\" x=\"216\" y=\"-141.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">samples = 1807</text>\n<text text-anchor=\"middle\" x=\"216\" y=\"-126.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">value = [370, 1437]</text>\n<text text-anchor=\"middle\" x=\"216\" y=\"-111.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">class = sim</text>\n</g>\n<!-- 1&#45;&gt;3 -->\n<g id=\"edge3\" class=\"edge\">\n<title>1&#45;&gt;3</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M216,-222.8796C216,-214.6838 216,-205.9891 216,-197.5013\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"219.5001,-197.298 216,-187.2981 212.5001,-197.2981 219.5001,-197.298\"/>\n</g>\n<!-- 4 -->\n<g id=\"node5\" class=\"node\">\n<title>4</title>\n<path fill=\"#399de5\" fill-opacity=\"0.513725\" stroke=\"#000000\" d=\"M116,-68C116,-68 24,-68 24,-68 18,-68 12,-62 12,-56 12,-56 12,-12 12,-12 12,-6 18,0 24,0 24,0 116,0 116,0 122,0 128,-6 128,-12 128,-12 128,-56 128,-56 128,-62 122,-68 116,-68\"/>\n<text text-anchor=\"middle\" x=\"70\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">gini = 0.441</text>\n<text text-anchor=\"middle\" x=\"70\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">samples = 61</text>\n<text text-anchor=\"middle\" x=\"70\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">value = [20, 41]</text>\n<text text-anchor=\"middle\" x=\"70\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">class = sim</text>\n</g>\n<!-- 3&#45;&gt;4 -->\n<g id=\"edge4\" class=\"edge\">\n<title>3&#45;&gt;4</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M161.635,-103.9815C148.9989,-94.3313 135.5644,-84.0714 122.9885,-74.4673\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"124.9372,-71.5515 114.8654,-68.2637 120.6886,-77.1148 124.9372,-71.5515\"/>\n</g>\n<!-- 5 -->\n<g id=\"node6\" class=\"node\">\n<title>5</title>\n<path fill=\"#399de5\" fill-opacity=\"0.749020\" stroke=\"#000000\" d=\"M274,-68C274,-68 158,-68 158,-68 152,-68 146,-62 146,-56 146,-56 146,-12 146,-12 146,-6 152,0 158,0 158,0 274,0 274,0 280,0 286,-6 286,-12 286,-12 286,-56 286,-56 286,-62 280,-68 274,-68\"/>\n<text text-anchor=\"middle\" x=\"216\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">gini = 0.321</text>\n<text text-anchor=\"middle\" x=\"216\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">samples = 1746</text>\n<text text-anchor=\"middle\" x=\"216\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">value = [350, 1396]</text>\n<text text-anchor=\"middle\" x=\"216\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">class = sim</text>\n</g>\n<!-- 3&#45;&gt;5 -->\n<g id=\"edge5\" class=\"edge\">\n<title>3&#45;&gt;5</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M216,-103.9815C216,-95.618 216,-86.7965 216,-78.3409\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"219.5001,-78.2636 216,-68.2637 212.5001,-78.2637 219.5001,-78.2636\"/>\n</g>\n<!-- 7 -->\n<g id=\"node8\" class=\"node\">\n<title>7</title>\n<path fill=\"#e58139\" fill-opacity=\"0.121569\" stroke=\"#000000\" d=\"M518.5,-187C518.5,-187 393.5,-187 393.5,-187 387.5,-187 381.5,-181 381.5,-175 381.5,-175 381.5,-116 381.5,-116 381.5,-110 387.5,-104 393.5,-104 393.5,-104 518.5,-104 518.5,-104 524.5,-104 530.5,-110 530.5,-116 530.5,-116 530.5,-175 530.5,-175 530.5,-181 524.5,-187 518.5,-187\"/>\n<text text-anchor=\"middle\" x=\"456\" y=\"-171.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">preco &lt;= 99974.406</text>\n<text text-anchor=\"middle\" x=\"456\" y=\"-156.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">gini = 0.498</text>\n<text text-anchor=\"middle\" x=\"456\" y=\"-141.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">samples = 2649</text>\n<text text-anchor=\"middle\" x=\"456\" y=\"-126.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">value = [1409, 1240]</text>\n<text text-anchor=\"middle\" x=\"456\" y=\"-111.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">class = não</text>\n</g>\n<!-- 6&#45;&gt;7 -->\n<g id=\"edge7\" class=\"edge\">\n<title>6&#45;&gt;7</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M456,-222.8796C456,-214.6838 456,-205.9891 456,-197.5013\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"459.5001,-197.298 456,-187.2981 452.5001,-197.2981 459.5001,-197.298\"/>\n</g>\n<!-- 10 -->\n<g id=\"node11\" class=\"node\">\n<title>10</title>\n<path fill=\"#e58139\" fill-opacity=\"0.831373\" stroke=\"#000000\" d=\"M743.5,-187C743.5,-187 612.5,-187 612.5,-187 606.5,-187 600.5,-181 600.5,-175 600.5,-175 600.5,-116 600.5,-116 600.5,-110 606.5,-104 612.5,-104 612.5,-104 743.5,-104 743.5,-104 749.5,-104 755.5,-110 755.5,-116 755.5,-116 755.5,-175 755.5,-175 755.5,-181 749.5,-187 743.5,-187\"/>\n<text text-anchor=\"middle\" x=\"678\" y=\"-171.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">preco &lt;= 100076.953</text>\n<text text-anchor=\"middle\" x=\"678\" y=\"-156.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">gini = 0.248</text>\n<text text-anchor=\"middle\" x=\"678\" y=\"-141.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">samples = 1603</text>\n<text text-anchor=\"middle\" x=\"678\" y=\"-126.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">value = [1371, 232]</text>\n<text text-anchor=\"middle\" x=\"678\" y=\"-111.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">class = não</text>\n</g>\n<!-- 6&#45;&gt;10 -->\n<g id=\"edge10\" class=\"edge\">\n<title>6&#45;&gt;10</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M533.6448,-222.8796C552.3186,-212.8697 572.3809,-202.1156 591.4328,-191.9031\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"593.2389,-194.9062 600.399,-187.0969 589.9318,-188.7366 593.2389,-194.9062\"/>\n</g>\n<!-- 8 -->\n<g id=\"node9\" class=\"node\">\n<title>8</title>\n<path fill=\"#e58139\" fill-opacity=\"0.243137\" stroke=\"#000000\" d=\"M432,-68C432,-68 316,-68 316,-68 310,-68 304,-62 304,-56 304,-56 304,-12 304,-12 304,-6 310,0 316,0 316,0 432,0 432,0 438,0 444,-6 444,-12 444,-12 444,-56 444,-56 444,-62 438,-68 432,-68\"/>\n<text text-anchor=\"middle\" x=\"374\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">gini = 0.49</text>\n<text text-anchor=\"middle\" x=\"374\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">samples = 2223</text>\n<text text-anchor=\"middle\" x=\"374\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">value = [1266, 957]</text>\n<text text-anchor=\"middle\" x=\"374\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">class = não</text>\n</g>\n<!-- 7&#45;&gt;8 -->\n<g id=\"edge8\" class=\"edge\">\n<title>7&#45;&gt;8</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M425.4662,-103.9815C418.8424,-94.9747 411.8279,-85.4367 405.1795,-76.3965\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"407.9426,-74.2461 399.1984,-68.2637 402.3034,-78.3933 407.9426,-74.2461\"/>\n</g>\n<!-- 9 -->\n<g id=\"node10\" class=\"node\">\n<title>9</title>\n<path fill=\"#399de5\" fill-opacity=\"0.494118\" stroke=\"#000000\" d=\"M582,-68C582,-68 474,-68 474,-68 468,-68 462,-62 462,-56 462,-56 462,-12 462,-12 462,-6 468,0 474,0 474,0 582,0 582,0 588,0 594,-6 594,-12 594,-12 594,-56 594,-56 594,-62 588,-68 582,-68\"/>\n<text text-anchor=\"middle\" x=\"528\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">gini = 0.446</text>\n<text text-anchor=\"middle\" x=\"528\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">samples = 426</text>\n<text text-anchor=\"middle\" x=\"528\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">value = [143, 283]</text>\n<text text-anchor=\"middle\" x=\"528\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">class = sim</text>\n</g>\n<!-- 7&#45;&gt;9 -->\n<g id=\"edge9\" class=\"edge\">\n<title>7&#45;&gt;9</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M482.8101,-103.9815C488.5669,-95.0666 494.6596,-85.6313 500.4441,-76.6734\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"503.3901,-78.5631 505.8746,-68.2637 497.5096,-74.7658 503.3901,-78.5631\"/>\n</g>\n<!-- 11 -->\n<g id=\"node12\" class=\"node\">\n<title>11</title>\n<path fill=\"#e58139\" fill-opacity=\"0.952941\" stroke=\"#000000\" d=\"M732,-68C732,-68 624,-68 624,-68 618,-68 612,-62 612,-56 612,-56 612,-12 612,-12 612,-6 618,0 624,0 624,0 732,0 732,0 738,0 744,-6 744,-12 744,-12 744,-56 744,-56 744,-62 738,-68 732,-68\"/>\n<text text-anchor=\"middle\" x=\"678\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">gini = 0.086</text>\n<text text-anchor=\"middle\" x=\"678\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">samples = 1356</text>\n<text text-anchor=\"middle\" x=\"678\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">value = [1295, 61]</text>\n<text text-anchor=\"middle\" x=\"678\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">class = não</text>\n</g>\n<!-- 10&#45;&gt;11 -->\n<g id=\"edge11\" class=\"edge\">\n<title>10&#45;&gt;11</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M678,-103.9815C678,-95.618 678,-86.7965 678,-78.3409\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"681.5001,-78.2636 678,-68.2637 674.5001,-78.2637 681.5001,-78.2636\"/>\n</g>\n<!-- 12 -->\n<g id=\"node13\" class=\"node\">\n<title>12</title>\n<path fill=\"#399de5\" fill-opacity=\"0.556863\" stroke=\"#000000\" d=\"M874,-68C874,-68 774,-68 774,-68 768,-68 762,-62 762,-56 762,-56 762,-12 762,-12 762,-6 768,0 774,0 774,0 874,0 874,0 880,0 886,-6 886,-12 886,-12 886,-56 886,-56 886,-62 880,-68 874,-68\"/>\n<text text-anchor=\"middle\" x=\"824\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">gini = 0.426</text>\n<text text-anchor=\"middle\" x=\"824\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">samples = 247</text>\n<text text-anchor=\"middle\" x=\"824\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">value = [76, 171]</text>\n<text text-anchor=\"middle\" x=\"824\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\" fill=\"#000000\">class = sim</text>\n</g>\n<!-- 10&#45;&gt;12 -->\n<g id=\"edge12\" class=\"edge\">\n<title>10&#45;&gt;12</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M732.365,-103.9815C745.0011,-94.3313 758.4356,-84.0714 771.0115,-74.4673\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"773.3114,-77.1148 779.1346,-68.2637 769.0628,-71.5515 773.3114,-77.1148\"/>\n</g>\n</g>\n</svg>\n"
1130 |           },
1131 |           "metadata": {
1132 |             "tags": []
1133 |           },
1134 |           "execution_count": 38
1135 |         }
1136 |       ]
1137 |     },
1138 |     {
1139 |       "metadata": {
1140 |         "id": "TLIr9EPALvM4",
1141 |         "colab_type": "code",
1142 |         "colab": {}
1143 |       },
1144 |       "cell_type": "code",
1145 |       "source": [
1146 |         ""
1147 |       ],
1148 |       "execution_count": 0,
1149 |       "outputs": []
1150 |     }
1151 |   ]
1152 | }


--------------------------------------------------------------------------------
/aula5.4/introdução_a_machine_learning_4.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Introdução a Machine Learning - 4.ipynb
  3 | 
  4 | Automatically generated by Colaboratory.
  5 | 
  6 | Original file is located at
  7 |     https://colab.research.google.com/drive/1RpYAAROMa4C86iZscVUzaWIeVYSJapyE
  8 | """
  9 | 
 10 | !pip install graphviz==0.10
 11 | !apt-get install graphviz
 12 | 
 13 | import pandas as pd
 14 | 
 15 | uri = "https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv"
 16 | dados = pd.read_csv(uri)
 17 | dados.head()
 18 | 
 19 | a_renomear = {
 20 |     'mileage_per_year' : 'milhas_por_ano',
 21 |     'model_year' : 'ano_do_modelo',
 22 |     'price' : 'preco',
 23 |     'sold' : 'vendido'
 24 | }
 25 | dados = dados.rename(columns=a_renomear)
 26 | dados.head()
 27 | 
 28 | a_trocar = {
 29 |     'no' : 0,
 30 |     'yes' : 1
 31 | }
 32 | dados.vendido = dados.vendido.map(a_trocar)
 33 | dados.head()
 34 | 
 35 | from datetime import datetime
 36 | 
 37 | ano_atual = datetime.today().year
 38 | dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo
 39 | dados.head()
 40 | 
 41 | dados['km_por_ano'] = dados.milhas_por_ano * 1.60934
 42 | dados.head()
 43 | 
 44 | dados = dados.drop(columns = ["Unnamed: 0", "milhas_por_ano","ano_do_modelo"], axis=1)
 45 | dados.head()
 46 | 
 47 | import numpy as np
 48 | from sklearn.model_selection import train_test_split
 49 | from sklearn.svm import LinearSVC
 50 | from sklearn.metrics import accuracy_score
 51 | 
 52 | x = dados[["preco", "idade_do_modelo","km_por_ano"]]
 53 | y = dados["vendido"]
 54 | 
 55 | SEED = 5
 56 | np.random.seed(SEED)
 57 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
 58 |                                                          stratify = y)
 59 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
 60 | 
 61 | modelo = LinearSVC()
 62 | modelo.fit(treino_x, treino_y)
 63 | previsoes = modelo.predict(teste_x)
 64 | 
 65 | acuracia = accuracy_score(teste_y, previsoes) * 100
 66 | print("A acurácia foi %.2f%%" % acuracia)
 67 | 
 68 | from sklearn.dummy import DummyClassifier
 69 | 
 70 | dummy_stratified = DummyClassifier()
 71 | dummy_stratified.fit(treino_x, treino_y)
 72 | acuracia = dummy_stratified.score(teste_x, teste_y) * 100
 73 | 
 74 | print("A acurácia do dummy stratified foi %.2f%%" % acuracia)
 75 | 
 76 | from sklearn.dummy import DummyClassifier
 77 | 
 78 | dummy_mostfrequent = DummyClassifier()
 79 | dummy_mostfrequent.fit(treino_x, treino_y)
 80 | acuracia = dummy_mostfrequent.score(teste_x, teste_y) * 100
 81 | 
 82 | print("A acurácia do dummy mostfrequent foi %.2f%%" % acuracia)
 83 | 
 84 | from sklearn.preprocessing import StandardScaler
 85 | from sklearn.model_selection import train_test_split
 86 | from sklearn.svm import SVC
 87 | from sklearn.metrics import accuracy_score
 88 | 
 89 | SEED = 5
 90 | np.random.seed(SEED)
 91 | raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
 92 |                                                          stratify = y)
 93 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
 94 | 
 95 | scaler = StandardScaler()
 96 | scaler.fit(raw_treino_x)
 97 | treino_x = scaler.transform(raw_treino_x)
 98 | teste_x = scaler.transform(raw_teste_x)
 99 | 
100 | modelo = SVC()
101 | modelo.fit(treino_x, treino_y)
102 | previsoes = modelo.predict(teste_x)
103 | 
104 | acuracia = accuracy_score(teste_y, previsoes) * 100
105 | print("A acurácia foi %.2f%%" % acuracia)
106 | 
107 | from sklearn.preprocessing import StandardScaler
108 | from sklearn.model_selection import train_test_split
109 | from sklearn.tree import DecisionTreeClassifier
110 | from sklearn.metrics import accuracy_score
111 | 
112 | SEED = 5
113 | np.random.seed(SEED)
114 | raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
115 |                                                          stratify = y)
116 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
117 | 
118 | modelo = DecisionTreeClassifier(max_depth=3)
119 | modelo.fit(raw_treino_x, treino_y)
120 | previsoes = modelo.predict(raw_teste_x)
121 | 
122 | acuracia = accuracy_score(teste_y, previsoes) * 100
123 | print("A acurácia foi %.2f%%" % acuracia)
124 | 
125 | from sklearn.tree import export_graphviz
126 | import graphviz
127 | 
128 | features = x.columns
129 | dot_data = export_graphviz(modelo, out_file=None,
130 |                            filled = True, rounded = True,
131 |                            feature_names = features,
132 |                           class_names = ["não", "sim"])
133 | grafico = graphviz.Source(dot_data)
134 | grafico
135 | 
136 | 


--------------------------------------------------------------------------------