├── aula1.3
├── Introdução_a_Machine_Learning_e_Classificação_1.ipynb
└── introdução_a_machine_learning_e_classificação_1.py
├── aula1.4
├── Introdução_a_Machine_Learning_e_Classificação_1.ipynb
└── introdução_a_machine_learning_e_classificação_1.py
├── aula2.1
├── Introdução_a_Machine_Learning_Classificação_2.ipynb
└── introdução_a_machine_learning_classificação_2.py
├── aula2.2
├── Introdução_a_Machine_Learning_Classificação_2.ipynb
└── introdução_a_machine_learning_classificação_2.py
├── aula3.1
├── Introdução_a_Machine_Learning_3.ipynb
└── introdução_a_machine_learning_3 (1).py
├── aula4.1
├── Introdução_a_Machine_Learning_3.ipynb
└── introdução_a_machine_learning_3.py
├── aula5.1
├── Introdução_a_Machine_Learning_4.ipynb
└── introdução_a_machine_learning_4.py
├── aula5.2
├── Introdução_a_Machine_Learning_4.ipynb
└── introdução_a_machine_learning_4.py
└── aula5.4
├── Introdução_a_Machine_Learning_4.ipynb
└── introdução_a_machine_learning_4.py
/aula1.3/Introdução_a_Machine_Learning_e_Classificação_1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Introdução a Machine Learning e Classificação - 1.ipynb",
7 | "version": "0.3.2",
8 | "provenance": []
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | }
14 | },
15 | "cells": [
16 | {
17 | "metadata": {
18 | "id": "iN35zFmNyYIc",
19 | "colab_type": "code",
20 | "colab": {}
21 | },
22 | "cell_type": "code",
23 | "source": [
24 | "# features (1 sim, 0 não)\n",
25 | "# pelo longo?\n",
26 | "# perna curta?\n",
27 | "# faz auau?\n",
28 | "porco1 = [0, 1, 0]\n",
29 | "porco2 = [0, 1, 1]\n",
30 | "porco3 = [1, 1, 0]\n",
31 | "\n",
32 | "cachorro1 = [0, 1, 1]\n",
33 | "cachorro2 = [1, 0, 1]\n",
34 | "cachorro3 = [1, 1, 1]\n",
35 | "\n",
36 | "# 1 => porco, 0 => cachorro\n",
37 | "dados = [porco1, porco2, porco3, cachorro1, cachorro2, cachorro3]\n",
38 | "classes = [1,1,1,0,0,0]"
39 | ],
40 | "execution_count": 0,
41 | "outputs": []
42 | },
43 | {
44 | "metadata": {
45 | "id": "tcWrSPHkzaby",
46 | "colab_type": "code",
47 | "colab": {
48 | "base_uri": "https://localhost:8080/",
49 | "height": 86
50 | },
51 | "outputId": "17cd7227-3924-457c-b41a-2498a22c141e"
52 | },
53 | "cell_type": "code",
54 | "source": [
55 | "from sklearn.svm import LinearSVC\n",
56 | "\n",
57 | "model = LinearSVC()\n",
58 | "model.fit(dados, classes)"
59 | ],
60 | "execution_count": 7,
61 | "outputs": [
62 | {
63 | "output_type": "execute_result",
64 | "data": {
65 | "text/plain": [
66 | "LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n",
67 | " intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n",
68 | " multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n",
69 | " verbose=0)"
70 | ]
71 | },
72 | "metadata": {
73 | "tags": []
74 | },
75 | "execution_count": 7
76 | }
77 | ]
78 | },
79 | {
80 | "metadata": {
81 | "id": "YaUuFWOx0YZF",
82 | "colab_type": "code",
83 | "colab": {
84 | "base_uri": "https://localhost:8080/",
85 | "height": 34
86 | },
87 | "outputId": "43e963d4-9b76-40a4-9896-e513dd23fd8f"
88 | },
89 | "cell_type": "code",
90 | "source": [
91 | "animal_misterioso = [1,1,1]\n",
92 | "model.predict([animal_misterioso])"
93 | ],
94 | "execution_count": 9,
95 | "outputs": [
96 | {
97 | "output_type": "execute_result",
98 | "data": {
99 | "text/plain": [
100 | "array([0])"
101 | ]
102 | },
103 | "metadata": {
104 | "tags": []
105 | },
106 | "execution_count": 9
107 | }
108 | ]
109 | },
110 | {
111 | "metadata": {
112 | "id": "yJEFM8mx0jtR",
113 | "colab_type": "code",
114 | "colab": {}
115 | },
116 | "cell_type": "code",
117 | "source": [
118 | "misterio1 = [1,1,1]\n",
119 | "misterio2 = [1,1,0]\n",
120 | "misterio3 = [0,1,1]\n",
121 | "\n",
122 | "testes = [misterio1, misterio2, misterio3]\n",
123 | "previsoes = model.predict(testes)"
124 | ],
125 | "execution_count": 0,
126 | "outputs": []
127 | },
128 | {
129 | "metadata": {
130 | "id": "3xOA4L4e03GO",
131 | "colab_type": "code",
132 | "colab": {}
133 | },
134 | "cell_type": "code",
135 | "source": [
136 | "testes_classes = [0, 1, 1]"
137 | ],
138 | "execution_count": 0,
139 | "outputs": []
140 | },
141 | {
142 | "metadata": {
143 | "id": "cUaaDQol1b_D",
144 | "colab_type": "code",
145 | "colab": {
146 | "base_uri": "https://localhost:8080/",
147 | "height": 34
148 | },
149 | "outputId": "6b335fa6-9f1b-4e60-de6c-5dd417ff3b08"
150 | },
151 | "cell_type": "code",
152 | "source": [
153 | "corretos = (previsoes == testes_classes).sum()\n",
154 | "total = len(testes)\n",
155 | "taxa_de_acerto = corretos/total\n",
156 | "print(\"Taxa de acerto: \", taxa_de_acerto * 100)"
157 | ],
158 | "execution_count": 24,
159 | "outputs": [
160 | {
161 | "output_type": "stream",
162 | "text": [
163 | "Taxa de acerto: 66.66666666666666\n"
164 | ],
165 | "name": "stdout"
166 | }
167 | ]
168 | },
169 | {
170 | "metadata": {
171 | "id": "T6zhFhQa1c28",
172 | "colab_type": "code",
173 | "colab": {
174 | "base_uri": "https://localhost:8080/",
175 | "height": 34
176 | },
177 | "outputId": "3f8b5c7b-2d17-4233-c8de-e9cc9a3b7476"
178 | },
179 | "cell_type": "code",
180 | "source": [
181 | "from sklearn.metrics import accuracy_score\n",
182 | "\n",
183 | "taxa_de_acerto = accuracy_score(testes_classes, previsoes)\n",
184 | "print(\"Taxa de acerto\", taxa_de_acerto * 100)"
185 | ],
186 | "execution_count": 26,
187 | "outputs": [
188 | {
189 | "output_type": "stream",
190 | "text": [
191 | "Taxa de acerto 66.66666666666666\n"
192 | ],
193 | "name": "stdout"
194 | }
195 | ]
196 | },
197 | {
198 | "metadata": {
199 | "id": "Z9wOaemi2fzE",
200 | "colab_type": "code",
201 | "colab": {}
202 | },
203 | "cell_type": "code",
204 | "source": [
205 | ""
206 | ],
207 | "execution_count": 0,
208 | "outputs": []
209 | }
210 | ]
211 | }
--------------------------------------------------------------------------------
/aula1.3/introdução_a_machine_learning_e_classificação_1.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Introdução a Machine Learning e Classificação - 1.ipynb
3 |
4 | Automatically generated by Colaboratory.
5 |
6 | Original file is located at
7 | https://colab.research.google.com/drive/1SNvuZmre0mDEJgTBBXBzvptPtth7q_IX
8 | """
9 |
10 | # features (1 sim, 0 não)
11 | # pelo longo?
12 | # perna curta?
13 | # faz auau?
14 | porco1 = [0, 1, 0]
15 | porco2 = [0, 1, 1]
16 | porco3 = [1, 1, 0]
17 |
18 | cachorro1 = [0, 1, 1]
19 | cachorro2 = [1, 0, 1]
20 | cachorro3 = [1, 1, 1]
21 |
22 | # 1 => porco, 0 => cachorro
23 | dados = [porco1, porco2, porco3, cachorro1, cachorro2, cachorro3]
24 | classes = [1,1,1,0,0,0]
25 |
26 | from sklearn.svm import LinearSVC
27 |
28 | model = LinearSVC()
29 | model.fit(dados, classes)
30 |
31 | animal_misterioso = [1,1,1]
32 | model.predict([animal_misterioso])
33 |
34 | misterio1 = [1,1,1]
35 | misterio2 = [1,1,0]
36 | misterio3 = [0,1,1]
37 |
38 | testes = [misterio1, misterio2, misterio3]
39 | previsoes = model.predict(testes)
40 |
41 | testes_classes = [0, 1, 1]
42 |
43 | corretos = (previsoes == testes_classes).sum()
44 | total = len(testes)
45 | taxa_de_acerto = corretos/total
46 | print("Taxa de acerto: ", taxa_de_acerto * 100)
47 |
48 | from sklearn.metrics import accuracy_score
49 |
50 | taxa_de_acerto = accuracy_score(testes_classes, previsoes)
51 | print("Taxa de acerto", taxa_de_acerto * 100)
52 |
53 |
--------------------------------------------------------------------------------
/aula1.4/Introdução_a_Machine_Learning_e_Classificação_1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Introdução a Machine Learning e Classificação - 1.ipynb",
7 | "version": "0.3.2",
8 | "provenance": [],
9 | "collapsed_sections": []
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "metadata": {
19 | "id": "iN35zFmNyYIc",
20 | "colab_type": "code",
21 | "colab": {}
22 | },
23 | "cell_type": "code",
24 | "source": [
25 | "# features (1 sim, 0 não)\n",
26 | "# pelo longo?\n",
27 | "# perna curta?\n",
28 | "# faz auau?\n",
29 | "porco1 = [0, 1, 0]\n",
30 | "porco2 = [0, 1, 1]\n",
31 | "porco3 = [1, 1, 0]\n",
32 | "\n",
33 | "cachorro1 = [0, 1, 1]\n",
34 | "cachorro2 = [1, 0, 1]\n",
35 | "cachorro3 = [1, 1, 1]\n",
36 | "\n",
37 | "# 1 => porco, 0 => cachorro\n",
38 | "treino_x = [porco1, porco2, porco3, cachorro1, cachorro2, cachorro3]\n",
39 | "treino_y = [1,1,1,0,0,0] # labels / etiqueta"
40 | ],
41 | "execution_count": 0,
42 | "outputs": []
43 | },
44 | {
45 | "metadata": {
46 | "id": "tcWrSPHkzaby",
47 | "colab_type": "code",
48 | "colab": {
49 | "base_uri": "https://localhost:8080/",
50 | "height": 87
51 | },
52 | "outputId": "0675eaa5-68bd-4df2-cafe-a3d94a9fcec9"
53 | },
54 | "cell_type": "code",
55 | "source": [
56 | "from sklearn.svm import LinearSVC\n",
57 | "\n",
58 | "model = LinearSVC()\n",
59 | "model.fit(treino_x, treino_y)"
60 | ],
61 | "execution_count": 2,
62 | "outputs": [
63 | {
64 | "output_type": "execute_result",
65 | "data": {
66 | "text/plain": [
67 | "LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n",
68 | " intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n",
69 | " multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n",
70 | " verbose=0)"
71 | ]
72 | },
73 | "metadata": {
74 | "tags": []
75 | },
76 | "execution_count": 2
77 | }
78 | ]
79 | },
80 | {
81 | "metadata": {
82 | "id": "YaUuFWOx0YZF",
83 | "colab_type": "code",
84 | "colab": {
85 | "base_uri": "https://localhost:8080/",
86 | "height": 35
87 | },
88 | "outputId": "defc66aa-ce1b-460c-895c-e422ffe287c5"
89 | },
90 | "cell_type": "code",
91 | "source": [
92 | "animal_misterioso = [1,1,1]\n",
93 | "model.predict([animal_misterioso])"
94 | ],
95 | "execution_count": 3,
96 | "outputs": [
97 | {
98 | "output_type": "execute_result",
99 | "data": {
100 | "text/plain": [
101 | "array([0])"
102 | ]
103 | },
104 | "metadata": {
105 | "tags": []
106 | },
107 | "execution_count": 3
108 | }
109 | ]
110 | },
111 | {
112 | "metadata": {
113 | "id": "yJEFM8mx0jtR",
114 | "colab_type": "code",
115 | "colab": {}
116 | },
117 | "cell_type": "code",
118 | "source": [
119 | "misterio1 = [1,1,1]\n",
120 | "misterio2 = [1,1,0]\n",
121 | "misterio3 = [0,1,1]\n",
122 | "\n",
123 | "teste_x = [misterio1, misterio2, misterio3]\n",
124 | "teste_y = [0, 1, 1]"
125 | ],
126 | "execution_count": 0,
127 | "outputs": []
128 | },
129 | {
130 | "metadata": {
131 | "id": "3xOA4L4e03GO",
132 | "colab_type": "code",
133 | "colab": {}
134 | },
135 | "cell_type": "code",
136 | "source": [
137 | "previsoes = model.predict(teste_x)"
138 | ],
139 | "execution_count": 0,
140 | "outputs": []
141 | },
142 | {
143 | "metadata": {
144 | "id": "cUaaDQol1b_D",
145 | "colab_type": "code",
146 | "colab": {
147 | "base_uri": "https://localhost:8080/",
148 | "height": 35
149 | },
150 | "outputId": "4cd12076-c2e0-433c-b023-b0c3936b5b36"
151 | },
152 | "cell_type": "code",
153 | "source": [
154 | "corretos = (previsoes == teste_y).sum()\n",
155 | "total = len(teste_x)\n",
156 | "taxa_de_acerto = corretos/total\n",
157 | "print(\"Taxa de acerto %.2f\" % (taxa_de_acerto * 100))"
158 | ],
159 | "execution_count": 8,
160 | "outputs": [
161 | {
162 | "output_type": "stream",
163 | "text": [
164 | "Taxa de acerto 66.67\n"
165 | ],
166 | "name": "stdout"
167 | }
168 | ]
169 | },
170 | {
171 | "metadata": {
172 | "id": "T6zhFhQa1c28",
173 | "colab_type": "code",
174 | "colab": {
175 | "base_uri": "https://localhost:8080/",
176 | "height": 34
177 | },
178 | "outputId": "29ed71f9-5a9d-4de3-84f5-825121a92a0b"
179 | },
180 | "cell_type": "code",
181 | "source": [
182 | "from sklearn.metrics import accuracy_score\n",
183 | "\n",
184 | "taxa_de_acerto = accuracy_score(teste_y, previsoes)\n",
185 | "print(\"Taxa de acerto %.2f\" % (taxa_de_acerto * 100))"
186 | ],
187 | "execution_count": 9,
188 | "outputs": [
189 | {
190 | "output_type": "stream",
191 | "text": [
192 | "Taxa de acerto 66.67\n"
193 | ],
194 | "name": "stdout"
195 | }
196 | ]
197 | },
198 | {
199 | "metadata": {
200 | "id": "Z9wOaemi2fzE",
201 | "colab_type": "code",
202 | "colab": {}
203 | },
204 | "cell_type": "code",
205 | "source": [
206 | ""
207 | ],
208 | "execution_count": 0,
209 | "outputs": []
210 | }
211 | ]
212 | }
--------------------------------------------------------------------------------
/aula1.4/introdução_a_machine_learning_e_classificação_1.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Introdução a Machine Learning e Classificação - 1.ipynb
3 |
4 | Automatically generated by Colaboratory.
5 |
6 | Original file is located at
7 | https://colab.research.google.com/drive/1SNvuZmre0mDEJgTBBXBzvptPtth7q_IX
8 | """
9 |
10 | # features (1 sim, 0 não)
11 | # pelo longo?
12 | # perna curta?
13 | # faz auau?
14 | porco1 = [0, 1, 0]
15 | porco2 = [0, 1, 1]
16 | porco3 = [1, 1, 0]
17 |
18 | cachorro1 = [0, 1, 1]
19 | cachorro2 = [1, 0, 1]
20 | cachorro3 = [1, 1, 1]
21 |
22 | # 1 => porco, 0 => cachorro
23 | treino_x = [porco1, porco2, porco3, cachorro1, cachorro2, cachorro3]
24 | treino_y = [1,1,1,0,0,0] # labels / etiqueta
25 |
26 | from sklearn.svm import LinearSVC
27 |
28 | model = LinearSVC()
29 | model.fit(treino_x, treino_y)
30 |
31 | animal_misterioso = [1,1,1]
32 | model.predict([animal_misterioso])
33 |
34 | misterio1 = [1,1,1]
35 | misterio2 = [1,1,0]
36 | misterio3 = [0,1,1]
37 |
38 | teste_x = [misterio1, misterio2, misterio3]
39 | teste_y = [0, 1, 1]
40 |
41 | previsoes = model.predict(teste_x)
42 |
43 | corretos = (previsoes == teste_y).sum()
44 | total = len(teste_x)
45 | taxa_de_acerto = corretos/total
46 | print("Taxa de acerto %.2f" % (taxa_de_acerto * 100))
47 |
48 | from sklearn.metrics import accuracy_score
49 |
50 | taxa_de_acerto = accuracy_score(teste_y, previsoes)
51 | print("Taxa de acerto %.2f" % (taxa_de_acerto * 100))
52 |
53 |
--------------------------------------------------------------------------------
/aula2.1/Introdução_a_Machine_Learning_Classificação_2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Introdução a Machine Learning Classificação - 2.ipynb",
7 | "version": "0.3.2",
8 | "provenance": [],
9 | "collapsed_sections": []
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "metadata": {
19 | "id": "mEIOC06i7QEJ",
20 | "colab_type": "code",
21 | "colab": {
22 | "base_uri": "https://localhost:8080/",
23 | "height": 202
24 | },
25 | "outputId": "f6dbd4b4-8372-44b6-c3d1-92a4e08d83ff"
26 | },
27 | "cell_type": "code",
28 | "source": [
29 | "import pandas as pd\n",
30 | "\n",
31 | "uri = \"https://gist.githubusercontent.com/guilhermesilveira/2d2efa37d66b6c84a722ea627a897ced/raw/10968b997d885cbded1c92938c7a9912ba41c615/tracking.csv\"\n",
32 | "dados = pd.read_csv(uri)\n",
33 | "dados.head()"
34 | ],
35 | "execution_count": 2,
36 | "outputs": [
37 | {
38 | "output_type": "execute_result",
39 | "data": {
40 | "text/html": [
41 | "
\n",
42 | "\n",
55 | "
\n",
56 | " \n",
57 | " \n",
58 | " | \n",
59 | " home | \n",
60 | " how_it_works | \n",
61 | " contact | \n",
62 | " bought | \n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " 0 | \n",
68 | " 1 | \n",
69 | " 1 | \n",
70 | " 0 | \n",
71 | " 0 | \n",
72 | "
\n",
73 | " \n",
74 | " 1 | \n",
75 | " 1 | \n",
76 | " 1 | \n",
77 | " 0 | \n",
78 | " 0 | \n",
79 | "
\n",
80 | " \n",
81 | " 2 | \n",
82 | " 1 | \n",
83 | " 1 | \n",
84 | " 0 | \n",
85 | " 0 | \n",
86 | "
\n",
87 | " \n",
88 | " 3 | \n",
89 | " 1 | \n",
90 | " 1 | \n",
91 | " 0 | \n",
92 | " 0 | \n",
93 | "
\n",
94 | " \n",
95 | " 4 | \n",
96 | " 1 | \n",
97 | " 1 | \n",
98 | " 0 | \n",
99 | " 0 | \n",
100 | "
\n",
101 | " \n",
102 | "
\n",
103 | "
"
104 | ],
105 | "text/plain": [
106 | " home how_it_works contact bought\n",
107 | "0 1 1 0 0\n",
108 | "1 1 1 0 0\n",
109 | "2 1 1 0 0\n",
110 | "3 1 1 0 0\n",
111 | "4 1 1 0 0"
112 | ]
113 | },
114 | "metadata": {
115 | "tags": []
116 | },
117 | "execution_count": 2
118 | }
119 | ]
120 | },
121 | {
122 | "metadata": {
123 | "id": "uDu0eTJn7x0D",
124 | "colab_type": "code",
125 | "colab": {}
126 | },
127 | "cell_type": "code",
128 | "source": [
129 | "mapa = {\n",
130 | " \"home\" : \"principal\",\n",
131 | " \"how_it_works\" : \"como_funciona\",\n",
132 | " \"contact\" : \"contato\",\n",
133 | " \"bought\" : \"comprou\"\n",
134 | "}\n",
135 | "dados = dados.rename(columns = mapa)"
136 | ],
137 | "execution_count": 0,
138 | "outputs": []
139 | },
140 | {
141 | "metadata": {
142 | "id": "9En1V0PM7e8V",
143 | "colab_type": "code",
144 | "colab": {
145 | "base_uri": "https://localhost:8080/",
146 | "height": 202
147 | },
148 | "outputId": "0f54cba9-4f0c-433e-b7a0-b0fa15d39f5a"
149 | },
150 | "cell_type": "code",
151 | "source": [
152 | "x = dados[[\"principal\",\"como_funciona\",\"contato\"]]\n",
153 | "x.head()"
154 | ],
155 | "execution_count": 10,
156 | "outputs": [
157 | {
158 | "output_type": "execute_result",
159 | "data": {
160 | "text/html": [
161 | "\n",
162 | "\n",
175 | "
\n",
176 | " \n",
177 | " \n",
178 | " | \n",
179 | " principal | \n",
180 | " como_funciona | \n",
181 | " contato | \n",
182 | "
\n",
183 | " \n",
184 | " \n",
185 | " \n",
186 | " 0 | \n",
187 | " 1 | \n",
188 | " 1 | \n",
189 | " 0 | \n",
190 | "
\n",
191 | " \n",
192 | " 1 | \n",
193 | " 1 | \n",
194 | " 1 | \n",
195 | " 0 | \n",
196 | "
\n",
197 | " \n",
198 | " 2 | \n",
199 | " 1 | \n",
200 | " 1 | \n",
201 | " 0 | \n",
202 | "
\n",
203 | " \n",
204 | " 3 | \n",
205 | " 1 | \n",
206 | " 1 | \n",
207 | " 0 | \n",
208 | "
\n",
209 | " \n",
210 | " 4 | \n",
211 | " 1 | \n",
212 | " 1 | \n",
213 | " 0 | \n",
214 | "
\n",
215 | " \n",
216 | "
\n",
217 | "
"
218 | ],
219 | "text/plain": [
220 | " principal como_funciona contato\n",
221 | "0 1 1 0\n",
222 | "1 1 1 0\n",
223 | "2 1 1 0\n",
224 | "3 1 1 0\n",
225 | "4 1 1 0"
226 | ]
227 | },
228 | "metadata": {
229 | "tags": []
230 | },
231 | "execution_count": 10
232 | }
233 | ]
234 | },
235 | {
236 | "metadata": {
237 | "id": "qDr2YoWu8O3O",
238 | "colab_type": "code",
239 | "colab": {
240 | "base_uri": "https://localhost:8080/",
241 | "height": 121
242 | },
243 | "outputId": "29495494-aff4-4b5f-b8ae-95ea280f3bce"
244 | },
245 | "cell_type": "code",
246 | "source": [
247 | "y = dados[\"comprou\"]\n",
248 | "y.head()"
249 | ],
250 | "execution_count": 11,
251 | "outputs": [
252 | {
253 | "output_type": "execute_result",
254 | "data": {
255 | "text/plain": [
256 | "0 0\n",
257 | "1 0\n",
258 | "2 0\n",
259 | "3 0\n",
260 | "4 0\n",
261 | "Name: comprou, dtype: int64"
262 | ]
263 | },
264 | "metadata": {
265 | "tags": []
266 | },
267 | "execution_count": 11
268 | }
269 | ]
270 | },
271 | {
272 | "metadata": {
273 | "id": "X5pZ6xcZ8fYq",
274 | "colab_type": "code",
275 | "colab": {
276 | "base_uri": "https://localhost:8080/",
277 | "height": 35
278 | },
279 | "outputId": "a31766ff-52df-4b74-97a7-605916419c87"
280 | },
281 | "cell_type": "code",
282 | "source": [
283 | "dados.shape"
284 | ],
285 | "execution_count": 12,
286 | "outputs": [
287 | {
288 | "output_type": "execute_result",
289 | "data": {
290 | "text/plain": [
291 | "(99, 4)"
292 | ]
293 | },
294 | "metadata": {
295 | "tags": []
296 | },
297 | "execution_count": 12
298 | }
299 | ]
300 | },
301 | {
302 | "metadata": {
303 | "id": "TLZ9eTvP9U9q",
304 | "colab_type": "code",
305 | "colab": {
306 | "base_uri": "https://localhost:8080/",
307 | "height": 35
308 | },
309 | "outputId": "5c392acb-77ff-496c-d8f9-573a8c6414d2"
310 | },
311 | "cell_type": "code",
312 | "source": [
313 | "treino_x = x[:75]\n",
314 | "treino_y = y[:75]\n",
315 | "teste_x = x[75:]\n",
316 | "teste_y = y[75:]\n",
317 | "\n",
318 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))"
319 | ],
320 | "execution_count": 16,
321 | "outputs": [
322 | {
323 | "output_type": "stream",
324 | "text": [
325 | "Treinaremos com 75 elementos e testaremos com 24 elementos\n"
326 | ],
327 | "name": "stdout"
328 | }
329 | ]
330 | },
331 | {
332 | "metadata": {
333 | "id": "pZZjbQxh9jn8",
334 | "colab_type": "code",
335 | "colab": {
336 | "base_uri": "https://localhost:8080/",
337 | "height": 34
338 | },
339 | "outputId": "b7feb2a2-2694-4e6a-aa6c-9fb33e25917f"
340 | },
341 | "cell_type": "code",
342 | "source": [
343 | "from sklearn.svm import LinearSVC\n",
344 | "from sklearn.metrics import accuracy_score\n",
345 | "\n",
346 | "modelo = LinearSVC()\n",
347 | "modelo.fit(treino_x, treino_y)\n",
348 | "previsoes = modelo.predict(teste_x)\n",
349 | "\n",
350 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
351 | "print(\"A acurácia foi %.2f%%\" % acuracia)"
352 | ],
353 | "execution_count": 20,
354 | "outputs": [
355 | {
356 | "output_type": "stream",
357 | "text": [
358 | "A acurácia foi 95.83%\n"
359 | ],
360 | "name": "stdout"
361 | }
362 | ]
363 | },
364 | {
365 | "metadata": {
366 | "id": "rA-z0_a6-CM1",
367 | "colab_type": "code",
368 | "colab": {}
369 | },
370 | "cell_type": "code",
371 | "source": [
372 | ""
373 | ],
374 | "execution_count": 0,
375 | "outputs": []
376 | }
377 | ]
378 | }
--------------------------------------------------------------------------------
/aula2.1/introdução_a_machine_learning_classificação_2.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Introdução a Machine Learning Classificação - 2.ipynb
3 |
4 | Automatically generated by Colaboratory.
5 |
6 | Original file is located at
7 | https://colab.research.google.com/drive/1nIhP3F_nGiAQayvsPziHuEOZva-HvzLn
8 | """
9 |
10 | import pandas as pd
11 |
12 | uri = "https://gist.githubusercontent.com/guilhermesilveira/2d2efa37d66b6c84a722ea627a897ced/raw/10968b997d885cbded1c92938c7a9912ba41c615/tracking.csv"
13 | dados = pd.read_csv(uri)
14 | dados.head()
15 |
16 | mapa = {
17 | "home" : "principal",
18 | "how_it_works" : "como_funciona",
19 | "contact" : "contato",
20 | "bought" : "comprou"
21 | }
22 | dados = dados.rename(columns = mapa)
23 |
24 | x = dados[["principal","como_funciona","contato"]]
25 | x.head()
26 |
27 | y = dados["comprou"]
28 | y.head()
29 |
30 | dados.shape
31 |
32 | treino_x = x[:75]
33 | treino_y = y[:75]
34 | teste_x = x[75:]
35 | teste_y = y[75:]
36 |
37 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
38 |
39 | from sklearn.svm import LinearSVC
40 | from sklearn.metrics import accuracy_score
41 |
42 | modelo = LinearSVC()
43 | modelo.fit(treino_x, treino_y)
44 | previsoes = modelo.predict(teste_x)
45 |
46 | acuracia = accuracy_score(teste_y, previsoes) * 100
47 | print("A acurácia foi %.2f%%" % acuracia)
48 |
49 |
--------------------------------------------------------------------------------
/aula2.2/Introdução_a_Machine_Learning_Classificação_2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Introdução a Machine Learning Classificação - 2.ipynb",
7 | "version": "0.3.2",
8 | "provenance": [],
9 | "collapsed_sections": []
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "metadata": {
19 | "id": "mEIOC06i7QEJ",
20 | "colab_type": "code",
21 | "colab": {
22 | "base_uri": "https://localhost:8080/",
23 | "height": 195
24 | },
25 | "outputId": "f6dbd4b4-8372-44b6-c3d1-92a4e08d83ff"
26 | },
27 | "cell_type": "code",
28 | "source": [
29 | "import pandas as pd\n",
30 | "\n",
31 | "uri = \"https://gist.githubusercontent.com/guilhermesilveira/2d2efa37d66b6c84a722ea627a897ced/raw/10968b997d885cbded1c92938c7a9912ba41c615/tracking.csv\"\n",
32 | "dados = pd.read_csv(uri)\n",
33 | "dados.head()"
34 | ],
35 | "execution_count": 2,
36 | "outputs": [
37 | {
38 | "output_type": "execute_result",
39 | "data": {
40 | "text/html": [
41 | "\n",
42 | "\n",
55 | "
\n",
56 | " \n",
57 | " \n",
58 | " | \n",
59 | " home | \n",
60 | " how_it_works | \n",
61 | " contact | \n",
62 | " bought | \n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " 0 | \n",
68 | " 1 | \n",
69 | " 1 | \n",
70 | " 0 | \n",
71 | " 0 | \n",
72 | "
\n",
73 | " \n",
74 | " 1 | \n",
75 | " 1 | \n",
76 | " 1 | \n",
77 | " 0 | \n",
78 | " 0 | \n",
79 | "
\n",
80 | " \n",
81 | " 2 | \n",
82 | " 1 | \n",
83 | " 1 | \n",
84 | " 0 | \n",
85 | " 0 | \n",
86 | "
\n",
87 | " \n",
88 | " 3 | \n",
89 | " 1 | \n",
90 | " 1 | \n",
91 | " 0 | \n",
92 | " 0 | \n",
93 | "
\n",
94 | " \n",
95 | " 4 | \n",
96 | " 1 | \n",
97 | " 1 | \n",
98 | " 0 | \n",
99 | " 0 | \n",
100 | "
\n",
101 | " \n",
102 | "
\n",
103 | "
"
104 | ],
105 | "text/plain": [
106 | " home how_it_works contact bought\n",
107 | "0 1 1 0 0\n",
108 | "1 1 1 0 0\n",
109 | "2 1 1 0 0\n",
110 | "3 1 1 0 0\n",
111 | "4 1 1 0 0"
112 | ]
113 | },
114 | "metadata": {
115 | "tags": []
116 | },
117 | "execution_count": 2
118 | }
119 | ]
120 | },
121 | {
122 | "metadata": {
123 | "id": "uDu0eTJn7x0D",
124 | "colab_type": "code",
125 | "colab": {}
126 | },
127 | "cell_type": "code",
128 | "source": [
129 | "mapa = {\n",
130 | " \"home\" : \"principal\",\n",
131 | " \"how_it_works\" : \"como_funciona\",\n",
132 | " \"contact\" : \"contato\",\n",
133 | " \"bought\" : \"comprou\"\n",
134 | "}\n",
135 | "dados = dados.rename(columns = mapa)"
136 | ],
137 | "execution_count": 0,
138 | "outputs": []
139 | },
140 | {
141 | "metadata": {
142 | "id": "9En1V0PM7e8V",
143 | "colab_type": "code",
144 | "colab": {
145 | "base_uri": "https://localhost:8080/",
146 | "height": 195
147 | },
148 | "outputId": "0f54cba9-4f0c-433e-b7a0-b0fa15d39f5a"
149 | },
150 | "cell_type": "code",
151 | "source": [
152 | "x = dados[[\"principal\",\"como_funciona\",\"contato\"]]\n",
153 | "x.head()"
154 | ],
155 | "execution_count": 10,
156 | "outputs": [
157 | {
158 | "output_type": "execute_result",
159 | "data": {
160 | "text/html": [
161 | "\n",
162 | "\n",
175 | "
\n",
176 | " \n",
177 | " \n",
178 | " | \n",
179 | " principal | \n",
180 | " como_funciona | \n",
181 | " contato | \n",
182 | "
\n",
183 | " \n",
184 | " \n",
185 | " \n",
186 | " 0 | \n",
187 | " 1 | \n",
188 | " 1 | \n",
189 | " 0 | \n",
190 | "
\n",
191 | " \n",
192 | " 1 | \n",
193 | " 1 | \n",
194 | " 1 | \n",
195 | " 0 | \n",
196 | "
\n",
197 | " \n",
198 | " 2 | \n",
199 | " 1 | \n",
200 | " 1 | \n",
201 | " 0 | \n",
202 | "
\n",
203 | " \n",
204 | " 3 | \n",
205 | " 1 | \n",
206 | " 1 | \n",
207 | " 0 | \n",
208 | "
\n",
209 | " \n",
210 | " 4 | \n",
211 | " 1 | \n",
212 | " 1 | \n",
213 | " 0 | \n",
214 | "
\n",
215 | " \n",
216 | "
\n",
217 | "
"
218 | ],
219 | "text/plain": [
220 | " principal como_funciona contato\n",
221 | "0 1 1 0\n",
222 | "1 1 1 0\n",
223 | "2 1 1 0\n",
224 | "3 1 1 0\n",
225 | "4 1 1 0"
226 | ]
227 | },
228 | "metadata": {
229 | "tags": []
230 | },
231 | "execution_count": 10
232 | }
233 | ]
234 | },
235 | {
236 | "metadata": {
237 | "id": "qDr2YoWu8O3O",
238 | "colab_type": "code",
239 | "colab": {
240 | "base_uri": "https://localhost:8080/",
241 | "height": 118
242 | },
243 | "outputId": "29495494-aff4-4b5f-b8ae-95ea280f3bce"
244 | },
245 | "cell_type": "code",
246 | "source": [
247 | "y = dados[\"comprou\"]\n",
248 | "y.head()"
249 | ],
250 | "execution_count": 11,
251 | "outputs": [
252 | {
253 | "output_type": "execute_result",
254 | "data": {
255 | "text/plain": [
256 | "0 0\n",
257 | "1 0\n",
258 | "2 0\n",
259 | "3 0\n",
260 | "4 0\n",
261 | "Name: comprou, dtype: int64"
262 | ]
263 | },
264 | "metadata": {
265 | "tags": []
266 | },
267 | "execution_count": 11
268 | }
269 | ]
270 | },
271 | {
272 | "metadata": {
273 | "id": "X5pZ6xcZ8fYq",
274 | "colab_type": "code",
275 | "colab": {
276 | "base_uri": "https://localhost:8080/",
277 | "height": 34
278 | },
279 | "outputId": "a31766ff-52df-4b74-97a7-605916419c87"
280 | },
281 | "cell_type": "code",
282 | "source": [
283 | "dados.shape"
284 | ],
285 | "execution_count": 12,
286 | "outputs": [
287 | {
288 | "output_type": "execute_result",
289 | "data": {
290 | "text/plain": [
291 | "(99, 4)"
292 | ]
293 | },
294 | "metadata": {
295 | "tags": []
296 | },
297 | "execution_count": 12
298 | }
299 | ]
300 | },
301 | {
302 | "metadata": {
303 | "id": "TLZ9eTvP9U9q",
304 | "colab_type": "code",
305 | "colab": {
306 | "base_uri": "https://localhost:8080/",
307 | "height": 34
308 | },
309 | "outputId": "5c392acb-77ff-496c-d8f9-573a8c6414d2"
310 | },
311 | "cell_type": "code",
312 | "source": [
313 | "treino_x = x[:75]\n",
314 | "treino_y = y[:75]\n",
315 | "teste_x = x[75:]\n",
316 | "teste_y = y[75:]\n",
317 | "\n",
318 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))"
319 | ],
320 | "execution_count": 16,
321 | "outputs": [
322 | {
323 | "output_type": "stream",
324 | "text": [
325 | "Treinaremos com 75 elementos e testaremos com 24 elementos\n"
326 | ],
327 | "name": "stdout"
328 | }
329 | ]
330 | },
331 | {
332 | "metadata": {
333 | "id": "pZZjbQxh9jn8",
334 | "colab_type": "code",
335 | "colab": {
336 | "base_uri": "https://localhost:8080/",
337 | "height": 34
338 | },
339 | "outputId": "b7feb2a2-2694-4e6a-aa6c-9fb33e25917f"
340 | },
341 | "cell_type": "code",
342 | "source": [
343 | "from sklearn.svm import LinearSVC\n",
344 | "from sklearn.metrics import accuracy_score\n",
345 | "\n",
346 | "modelo = LinearSVC()\n",
347 | "modelo.fit(treino_x, treino_y)\n",
348 | "previsoes = modelo.predict(teste_x)\n",
349 | "\n",
350 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
351 | "print(\"A acurácia foi %.2f%%\" % acuracia)"
352 | ],
353 | "execution_count": 20,
354 | "outputs": [
355 | {
356 | "output_type": "stream",
357 | "text": [
358 | "A acurácia foi 95.83%\n"
359 | ],
360 | "name": "stdout"
361 | }
362 | ]
363 | },
364 | {
365 | "metadata": {
366 | "id": "2iVcuGkyA5tK",
367 | "colab_type": "text"
368 | },
369 | "cell_type": "markdown",
370 | "source": [
371 | "# Usando a biblioteca para separar treino e teste"
372 | ]
373 | },
374 | {
375 | "metadata": {
376 | "id": "rA-z0_a6-CM1",
377 | "colab_type": "code",
378 | "colab": {
379 | "base_uri": "https://localhost:8080/",
380 | "height": 50
381 | },
382 | "outputId": "f3287dba-50a8-4cd8-9001-ce41278c8bb1"
383 | },
384 | "cell_type": "code",
385 | "source": [
386 | "from sklearn.model_selection import train_test_split\n",
387 | "from sklearn.svm import LinearSVC\n",
388 | "from sklearn.metrics import accuracy_score\n",
389 | "\n",
390 | "SEED = 20\n",
391 | "\n",
392 | "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, random_state = SEED, test_size = 0.25)\n",
393 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
394 | "\n",
395 | "modelo = LinearSVC()\n",
396 | "modelo.fit(treino_x, treino_y)\n",
397 | "previsoes = modelo.predict(teste_x)\n",
398 | "\n",
399 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
400 | "print(\"A acurácia foi %.2f%%\" % acuracia)"
401 | ],
402 | "execution_count": 35,
403 | "outputs": [
404 | {
405 | "output_type": "stream",
406 | "text": [
407 | "Treinaremos com 74 elementos e testaremos com 25 elementos\n",
408 | "A acurácia foi 96.00%\n"
409 | ],
410 | "name": "stdout"
411 | }
412 | ]
413 | },
414 | {
415 | "metadata": {
416 | "id": "JWFKlQccAk1F",
417 | "colab_type": "code",
418 | "colab": {
419 | "base_uri": "https://localhost:8080/",
420 | "height": 68
421 | },
422 | "outputId": "749d52d7-a4b3-488b-a7ad-01ab66793ef7"
423 | },
424 | "cell_type": "code",
425 | "source": [
426 | "treino_y.value_counts()"
427 | ],
428 | "execution_count": 37,
429 | "outputs": [
430 | {
431 | "output_type": "execute_result",
432 | "data": {
433 | "text/plain": [
434 | "0 47\n",
435 | "1 27\n",
436 | "Name: comprou, dtype: int64"
437 | ]
438 | },
439 | "metadata": {
440 | "tags": []
441 | },
442 | "execution_count": 37
443 | }
444 | ]
445 | },
446 | {
447 | "metadata": {
448 | "id": "fdORezxWBcwX",
449 | "colab_type": "code",
450 | "colab": {
451 | "base_uri": "https://localhost:8080/",
452 | "height": 68
453 | },
454 | "outputId": "7ce6be19-5f39-498a-cf61-e76af3990271"
455 | },
456 | "cell_type": "code",
457 | "source": [
458 | "teste_y.value_counts()"
459 | ],
460 | "execution_count": 38,
461 | "outputs": [
462 | {
463 | "output_type": "execute_result",
464 | "data": {
465 | "text/plain": [
466 | "0 19\n",
467 | "1 6\n",
468 | "Name: comprou, dtype: int64"
469 | ]
470 | },
471 | "metadata": {
472 | "tags": []
473 | },
474 | "execution_count": 38
475 | }
476 | ]
477 | },
478 | {
479 | "metadata": {
480 | "id": "k_kFDoBABh4B",
481 | "colab_type": "code",
482 | "colab": {
483 | "base_uri": "https://localhost:8080/",
484 | "height": 51
485 | },
486 | "outputId": "ba8b4464-305a-4e94-9422-445600888353"
487 | },
488 | "cell_type": "code",
489 | "source": [
490 | "from sklearn.model_selection import train_test_split\n",
491 | "from sklearn.svm import LinearSVC\n",
492 | "from sklearn.metrics import accuracy_score\n",
493 | "\n",
494 | "SEED = 20\n",
495 | "\n",
496 | "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y,\n",
497 | " random_state = SEED, test_size = 0.25,\n",
498 | " stratify = y)\n",
499 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
500 | "\n",
501 | "modelo = LinearSVC()\n",
502 | "modelo.fit(treino_x, treino_y)\n",
503 | "previsoes = modelo.predict(teste_x)\n",
504 | "\n",
505 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
506 | "print(\"A acurácia foi %.2f%%\" % acuracia)"
507 | ],
508 | "execution_count": 41,
509 | "outputs": [
510 | {
511 | "output_type": "stream",
512 | "text": [
513 | "Treinaremos com 74 elementos e testaremos com 25 elementos\n",
514 | "A acurácia foi 96.00%\n"
515 | ],
516 | "name": "stdout"
517 | }
518 | ]
519 | },
520 | {
521 | "metadata": {
522 | "id": "SS4n0CVXB6Fo",
523 | "colab_type": "code",
524 | "colab": {
525 | "base_uri": "https://localhost:8080/",
526 | "height": 67
527 | },
528 | "outputId": "60d3e7e9-4a79-4d96-f3f5-6ef1a843519a"
529 | },
530 | "cell_type": "code",
531 | "source": [
532 | "treino_y.value_counts()"
533 | ],
534 | "execution_count": 42,
535 | "outputs": [
536 | {
537 | "output_type": "execute_result",
538 | "data": {
539 | "text/plain": [
540 | "0 49\n",
541 | "1 25\n",
542 | "Name: comprou, dtype: int64"
543 | ]
544 | },
545 | "metadata": {
546 | "tags": []
547 | },
548 | "execution_count": 42
549 | }
550 | ]
551 | },
552 | {
553 | "metadata": {
554 | "id": "hgvSvos6CHIk",
555 | "colab_type": "code",
556 | "colab": {
557 | "base_uri": "https://localhost:8080/",
558 | "height": 67
559 | },
560 | "outputId": "8ec62784-d5a3-4197-81fb-accc0632bdf8"
561 | },
562 | "cell_type": "code",
563 | "source": [
564 | "teste_y.value_counts()"
565 | ],
566 | "execution_count": 43,
567 | "outputs": [
568 | {
569 | "output_type": "execute_result",
570 | "data": {
571 | "text/plain": [
572 | "0 17\n",
573 | "1 8\n",
574 | "Name: comprou, dtype: int64"
575 | ]
576 | },
577 | "metadata": {
578 | "tags": []
579 | },
580 | "execution_count": 43
581 | }
582 | ]
583 | },
584 | {
585 | "metadata": {
586 | "id": "bqhIUWBsCH8w",
587 | "colab_type": "code",
588 | "colab": {}
589 | },
590 | "cell_type": "code",
591 | "source": [
592 | ""
593 | ],
594 | "execution_count": 0,
595 | "outputs": []
596 | }
597 | ]
598 | }
--------------------------------------------------------------------------------
/aula2.2/introdução_a_machine_learning_classificação_2.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Introdução a Machine Learning Classificação - 2.ipynb
3 |
4 | Automatically generated by Colaboratory.
5 |
6 | Original file is located at
7 | https://colab.research.google.com/drive/1nIhP3F_nGiAQayvsPziHuEOZva-HvzLn
8 | """
9 |
10 | import pandas as pd
11 |
12 | uri = "https://gist.githubusercontent.com/guilhermesilveira/2d2efa37d66b6c84a722ea627a897ced/raw/10968b997d885cbded1c92938c7a9912ba41c615/tracking.csv"
13 | dados = pd.read_csv(uri)
14 | dados.head()
15 |
16 | mapa = {
17 | "home" : "principal",
18 | "how_it_works" : "como_funciona",
19 | "contact" : "contato",
20 | "bought" : "comprou"
21 | }
22 | dados = dados.rename(columns = mapa)
23 |
24 | x = dados[["principal","como_funciona","contato"]]
25 | x.head()
26 |
27 | y = dados["comprou"]
28 | y.head()
29 |
30 | dados.shape
31 |
32 | treino_x = x[:75]
33 | treino_y = y[:75]
34 | teste_x = x[75:]
35 | teste_y = y[75:]
36 |
37 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
38 |
39 | from sklearn.svm import LinearSVC
40 | from sklearn.metrics import accuracy_score
41 |
42 | modelo = LinearSVC()
43 | modelo.fit(treino_x, treino_y)
44 | previsoes = modelo.predict(teste_x)
45 |
46 | acuracia = accuracy_score(teste_y, previsoes) * 100
47 | print("A acurácia foi %.2f%%" % acuracia)
48 |
49 | """# Usando a biblioteca para separar treino e teste"""
50 |
51 | from sklearn.model_selection import train_test_split
52 | from sklearn.svm import LinearSVC
53 | from sklearn.metrics import accuracy_score
54 |
55 | SEED = 20
56 |
57 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, random_state = SEED, test_size = 0.25)
58 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
59 |
60 | modelo = LinearSVC()
61 | modelo.fit(treino_x, treino_y)
62 | previsoes = modelo.predict(teste_x)
63 |
64 | acuracia = accuracy_score(teste_y, previsoes) * 100
65 | print("A acurácia foi %.2f%%" % acuracia)
66 |
67 | treino_y.value_counts()
68 |
69 | teste_y.value_counts()
70 |
71 | from sklearn.model_selection import train_test_split
72 | from sklearn.svm import LinearSVC
73 | from sklearn.metrics import accuracy_score
74 |
75 | SEED = 20
76 |
77 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y,
78 | random_state = SEED, test_size = 0.25,
79 | stratify = y)
80 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
81 |
82 | modelo = LinearSVC()
83 | modelo.fit(treino_x, treino_y)
84 | previsoes = modelo.predict(teste_x)
85 |
86 | acuracia = accuracy_score(teste_y, previsoes) * 100
87 | print("A acurácia foi %.2f%%" % acuracia)
88 |
89 | treino_y.value_counts()
90 |
91 | teste_y.value_counts()
92 |
93 |
--------------------------------------------------------------------------------
/aula3.1/introdução_a_machine_learning_3 (1).py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Introdução a Machine Learning 3.ipynb
3 |
4 | Automatically generated by Colaboratory.
5 |
6 | Original file is located at
7 | https://colab.research.google.com/drive/1r4UlftWbCZA3w-glDhPlo1TUK-Pf6-Sa
8 | """
9 |
10 | !pip install seaborn==0.9.0
11 |
12 | import pandas as pd
13 |
14 | uri = "https://gist.githubusercontent.com/guilhermesilveira/1b7d5475863c15f484ac495bd70975cf/raw/16aff7a0aee67e7c100a2a48b676a2d2d142f646/projects.csv"
15 | dados = pd.read_csv(uri)
16 | dados.head()
17 |
18 | a_renomear = {
19 | 'expected_hours' : 'horas_esperadas',
20 | 'price' : 'preco',
21 | 'unfinished' : 'nao_finalizado'
22 | }
23 | dados = dados.rename(columns = a_renomear)
24 | dados.head()
25 |
26 | troca = {
27 | 0 : 1,
28 | 1 : 0
29 | }
30 | dados['finalizado'] = dados.nao_finalizado.map(troca)
31 | dados.head()
32 |
33 | dados.tail()
34 |
35 | import seaborn as sns
36 |
37 | sns.scatterplot(x="horas_esperadas", y="preco", data=dados)
38 |
39 | sns.scatterplot(x="horas_esperadas", y="preco", hue="finalizado", data=dados)
40 |
41 | sns.relplot(x="horas_esperadas", y="preco", hue="finalizado", col="finalizado", data=dados)
42 |
43 | x = dados[['horas_esperadas', 'preco']]
44 | y = dados['finalizado']
45 |
46 | from sklearn.model_selection import train_test_split
47 | from sklearn.svm import LinearSVC
48 | from sklearn.metrics import accuracy_score
49 |
50 | SEED = 20
51 |
52 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y,
53 | random_state = SEED, test_size = 0.25,
54 | stratify = y)
55 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
56 |
57 | modelo = LinearSVC()
58 | modelo.fit(treino_x, treino_y)
59 | previsoes = modelo.predict(teste_x)
60 |
61 | acuracia = accuracy_score(teste_y, previsoes) * 100
62 | print("A acurácia foi %.2f%%" % acuracia)
63 |
64 | import numpy as np
65 | previsoes_de_base = np.ones(540)
66 | acuracia = accuracy_score(teste_y, previsoes_de_base) * 100
67 | print("A acurácia do algoritmo de baseline foi %.2f%%" % acuracia)
68 |
69 |
--------------------------------------------------------------------------------
/aula4.1/introdução_a_machine_learning_3.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Introdução a Machine Learning 3.ipynb
3 |
4 | Automatically generated by Colaboratory.
5 |
6 | Original file is located at
7 | https://colab.research.google.com/drive/1r4UlftWbCZA3w-glDhPlo1TUK-Pf6-Sa
8 | """
9 |
10 | !pip install seaborn==0.9.0
11 |
12 | import pandas as pd
13 |
14 | uri = "https://gist.githubusercontent.com/guilhermesilveira/1b7d5475863c15f484ac495bd70975cf/raw/16aff7a0aee67e7c100a2a48b676a2d2d142f646/projects.csv"
15 | dados = pd.read_csv(uri)
16 | dados.head()
17 |
18 | a_renomear = {
19 | 'expected_hours' : 'horas_esperadas',
20 | 'price' : 'preco',
21 | 'unfinished' : 'nao_finalizado'
22 | }
23 | dados = dados.rename(columns = a_renomear)
24 | dados.head()
25 |
26 | troca = {
27 | 0 : 1,
28 | 1 : 0
29 | }
30 | dados['finalizado'] = dados.nao_finalizado.map(troca)
31 | dados.head()
32 |
33 | dados.tail()
34 |
35 | import seaborn as sns
36 |
37 | sns.scatterplot(x="horas_esperadas", y="preco", data=dados)
38 |
39 | sns.scatterplot(x="horas_esperadas", y="preco", hue="finalizado", data=dados)
40 |
41 | sns.relplot(x="horas_esperadas", y="preco", hue="finalizado", col="finalizado", data=dados)
42 |
43 | x = dados[['horas_esperadas', 'preco']]
44 | y = dados['finalizado']
45 |
46 | from sklearn.model_selection import train_test_split
47 | from sklearn.svm import LinearSVC
48 | from sklearn.metrics import accuracy_score
49 |
50 | SEED = 5
51 | np.random.seed(SEED)
52 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
53 | stratify = y)
54 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
55 |
56 | modelo = LinearSVC()
57 | modelo.fit(treino_x, treino_y)
58 | previsoes = modelo.predict(teste_x)
59 |
60 | acuracia = accuracy_score(teste_y, previsoes) * 100
61 | print("A acurácia foi %.2f%%" % acuracia)
62 |
63 | import numpy as np
64 | previsoes_de_base = np.ones(540)
65 | acuracia = accuracy_score(teste_y, previsoes_de_base) * 100
66 | print("A acurácia do algoritmo de baseline foi %.2f%%" % acuracia)
67 |
68 | sns.scatterplot(x="horas_esperadas", y="preco", hue=teste_y, data=teste_x)
69 |
70 | x_min = teste_x.horas_esperadas.min()
71 | x_max = teste_x.horas_esperadas.max()
72 | y_min = teste_x.preco.min()
73 | y_max = teste_x.preco.max()
74 | print(x_min, x_max,y_min,y_max)
75 |
76 | pixels = 100
77 | eixo_x = np.arange(x_min, x_max, (x_max - x_min) / pixels)
78 | eixo_y = np.arange(y_min, y_max, (y_max - y_min) / pixels)
79 |
80 | xx, yy = np.meshgrid(eixo_x, eixo_y)
81 | pontos = np.c_[xx.ravel(), yy.ravel()]
82 | pontos
83 |
84 | Z = modelo.predict(pontos)
85 | Z = Z.reshape(xx.shape)
86 | Z
87 |
88 | import matplotlib.pyplot as plt
89 |
90 | plt.contourf(xx, yy, Z, alpha=0.3)
91 | plt.scatter(teste_x.horas_esperadas, teste_x.preco, c=teste_y, s=1)
92 |
93 | # DECISION BOUNDARY
94 |
95 | from sklearn.model_selection import train_test_split
96 | from sklearn.svm import SVC
97 | from sklearn.metrics import accuracy_score
98 |
99 | SEED = 5
100 | np.random.seed(SEED)
101 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
102 | stratify = y)
103 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
104 |
105 | modelo = SVC()
106 | modelo.fit(treino_x, treino_y)
107 | previsoes = modelo.predict(teste_x)
108 |
109 | acuracia = accuracy_score(teste_y, previsoes) * 100
110 | print("A acurácia foi %.2f%%" % acuracia)
111 |
112 | x_min = teste_x.horas_esperadas.min()
113 | x_max = teste_x.horas_esperadas.max()
114 | y_min = teste_x.preco.min()
115 | y_max = teste_x.preco.max()
116 |
117 | pixels = 100
118 | eixo_x = np.arange(x_min, x_max, (x_max - x_min) / pixels)
119 | eixo_y = np.arange(y_min, y_max, (y_max - y_min) / pixels)
120 |
121 | xx, yy = np.meshgrid(eixo_x, eixo_y)
122 | pontos = np.c_[xx.ravel(), yy.ravel()]
123 |
124 | Z = modelo.predict(pontos)
125 | Z = Z.reshape(xx.shape)
126 |
127 | import matplotlib.pyplot as plt
128 |
129 | plt.contourf(xx, yy, Z, alpha=0.3)
130 | plt.scatter(teste_x.horas_esperadas, teste_x.preco, c=teste_y, s=1)
131 |
132 | # DECISION BOUNDARY
133 |
134 | from sklearn.preprocessing import StandardScaler
135 | from sklearn.model_selection import train_test_split
136 | from sklearn.svm import SVC
137 | from sklearn.metrics import accuracy_score
138 |
139 | SEED = 5
140 | np.random.seed(SEED)
141 | raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
142 | stratify = y)
143 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
144 |
145 | scaler = StandardScaler()
146 | scaler.fit(raw_treino_x)
147 | treino_x = scaler.transform(raw_treino_x)
148 | teste_x = scaler.transform(raw_teste_x)
149 |
150 | modelo = SVC()
151 | modelo.fit(treino_x, treino_y)
152 | previsoes = modelo.predict(teste_x)
153 |
154 | acuracia = accuracy_score(teste_y, previsoes) * 100
155 | print("A acurácia foi %.2f%%" % acuracia)
156 |
157 | treino_x
158 |
159 | data_x = teste_x[:,0]
160 | data_y = teste_x[:,1]
161 |
162 | x_min = data_x.min()
163 | x_max = data_x.max()
164 | y_min = data_y.min()
165 | y_max = data_y.max()
166 |
167 | pixels = 100
168 | eixo_x = np.arange(x_min, x_max, (x_max - x_min) / pixels)
169 | eixo_y = np.arange(y_min, y_max, (y_max - y_min) / pixels)
170 |
171 | xx, yy = np.meshgrid(eixo_x, eixo_y)
172 | pontos = np.c_[xx.ravel(), yy.ravel()]
173 |
174 | Z = modelo.predict(pontos)
175 | Z = Z.reshape(xx.shape)
176 |
177 | import matplotlib.pyplot as plt
178 |
179 | plt.contourf(xx, yy, Z, alpha=0.3)
180 | plt.scatter(data_x, data_y, c=teste_y, s=1)
181 |
182 | # DECISION BOUNDARY
183 |
184 |
--------------------------------------------------------------------------------
/aula5.1/Introdução_a_Machine_Learning_4.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Introdução a Machine Learning - 4.ipynb",
7 | "version": "0.3.2",
8 | "provenance": [],
9 | "collapsed_sections": []
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "metadata": {
19 | "id": "y7ik04NlDZMA",
20 | "colab_type": "code",
21 | "colab": {
22 | "base_uri": "https://localhost:8080/",
23 | "height": 195
24 | },
25 | "outputId": "0bdc8cf3-ab12-4a1c-83e4-3691f646e930"
26 | },
27 | "cell_type": "code",
28 | "source": [
29 | "import pandas as pd\n",
30 | "\n",
31 | "uri = \"https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv\"\n",
32 | "dados = pd.read_csv(uri)\n",
33 | "dados.head()"
34 | ],
35 | "execution_count": 4,
36 | "outputs": [
37 | {
38 | "output_type": "execute_result",
39 | "data": {
40 | "text/html": [
41 | "\n",
42 | "\n",
55 | "
\n",
56 | " \n",
57 | " \n",
58 | " | \n",
59 | " Unnamed: 0 | \n",
60 | " mileage_per_year | \n",
61 | " model_year | \n",
62 | " price | \n",
63 | " sold | \n",
64 | "
\n",
65 | " \n",
66 | " \n",
67 | " \n",
68 | " 0 | \n",
69 | " 0 | \n",
70 | " 21801 | \n",
71 | " 2000 | \n",
72 | " 30941.02 | \n",
73 | " yes | \n",
74 | "
\n",
75 | " \n",
76 | " 1 | \n",
77 | " 1 | \n",
78 | " 7843 | \n",
79 | " 1998 | \n",
80 | " 40557.96 | \n",
81 | " yes | \n",
82 | "
\n",
83 | " \n",
84 | " 2 | \n",
85 | " 2 | \n",
86 | " 7109 | \n",
87 | " 2006 | \n",
88 | " 89627.50 | \n",
89 | " no | \n",
90 | "
\n",
91 | " \n",
92 | " 3 | \n",
93 | " 3 | \n",
94 | " 26823 | \n",
95 | " 2015 | \n",
96 | " 95276.14 | \n",
97 | " no | \n",
98 | "
\n",
99 | " \n",
100 | " 4 | \n",
101 | " 4 | \n",
102 | " 7935 | \n",
103 | " 2014 | \n",
104 | " 117384.68 | \n",
105 | " yes | \n",
106 | "
\n",
107 | " \n",
108 | "
\n",
109 | "
"
110 | ],
111 | "text/plain": [
112 | " Unnamed: 0 mileage_per_year model_year price sold\n",
113 | "0 0 21801 2000 30941.02 yes\n",
114 | "1 1 7843 1998 40557.96 yes\n",
115 | "2 2 7109 2006 89627.50 no\n",
116 | "3 3 26823 2015 95276.14 no\n",
117 | "4 4 7935 2014 117384.68 yes"
118 | ]
119 | },
120 | "metadata": {
121 | "tags": []
122 | },
123 | "execution_count": 4
124 | }
125 | ]
126 | },
127 | {
128 | "metadata": {
129 | "id": "FqFrPmgJDhvM",
130 | "colab_type": "code",
131 | "colab": {
132 | "base_uri": "https://localhost:8080/",
133 | "height": 195
134 | },
135 | "outputId": "2c6b015c-4c70-461a-a5be-a1a5f225a5f7"
136 | },
137 | "cell_type": "code",
138 | "source": [
139 | "a_renomear = {\n",
140 | " 'mileage_per_year' : 'milhas_por_ano',\n",
141 | " 'model_year' : 'ano_do_modelo',\n",
142 | " 'price' : 'preco',\n",
143 | " 'sold' : 'vendido'\n",
144 | "}\n",
145 | "dados = dados.rename(columns=a_renomear)\n",
146 | "dados.head()"
147 | ],
148 | "execution_count": 6,
149 | "outputs": [
150 | {
151 | "output_type": "execute_result",
152 | "data": {
153 | "text/html": [
154 | "\n",
155 | "\n",
168 | "
\n",
169 | " \n",
170 | " \n",
171 | " | \n",
172 | " Unnamed: 0 | \n",
173 | " milhas_por_ano | \n",
174 | " ano_do_modelo | \n",
175 | " preco | \n",
176 | " vendido | \n",
177 | "
\n",
178 | " \n",
179 | " \n",
180 | " \n",
181 | " 0 | \n",
182 | " 0 | \n",
183 | " 21801 | \n",
184 | " 2000 | \n",
185 | " 30941.02 | \n",
186 | " yes | \n",
187 | "
\n",
188 | " \n",
189 | " 1 | \n",
190 | " 1 | \n",
191 | " 7843 | \n",
192 | " 1998 | \n",
193 | " 40557.96 | \n",
194 | " yes | \n",
195 | "
\n",
196 | " \n",
197 | " 2 | \n",
198 | " 2 | \n",
199 | " 7109 | \n",
200 | " 2006 | \n",
201 | " 89627.50 | \n",
202 | " no | \n",
203 | "
\n",
204 | " \n",
205 | " 3 | \n",
206 | " 3 | \n",
207 | " 26823 | \n",
208 | " 2015 | \n",
209 | " 95276.14 | \n",
210 | " no | \n",
211 | "
\n",
212 | " \n",
213 | " 4 | \n",
214 | " 4 | \n",
215 | " 7935 | \n",
216 | " 2014 | \n",
217 | " 117384.68 | \n",
218 | " yes | \n",
219 | "
\n",
220 | " \n",
221 | "
\n",
222 | "
"
223 | ],
224 | "text/plain": [
225 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido\n",
226 | "0 0 21801 2000 30941.02 yes\n",
227 | "1 1 7843 1998 40557.96 yes\n",
228 | "2 2 7109 2006 89627.50 no\n",
229 | "3 3 26823 2015 95276.14 no\n",
230 | "4 4 7935 2014 117384.68 yes"
231 | ]
232 | },
233 | "metadata": {
234 | "tags": []
235 | },
236 | "execution_count": 6
237 | }
238 | ]
239 | },
240 | {
241 | "metadata": {
242 | "id": "J31fUGbVEKpW",
243 | "colab_type": "code",
244 | "colab": {
245 | "base_uri": "https://localhost:8080/",
246 | "height": 195
247 | },
248 | "outputId": "f25f070a-ee26-40ce-9b02-7e37aecc7b2f"
249 | },
250 | "cell_type": "code",
251 | "source": [
252 | "a_trocar = {\n",
253 | " 'no' : 0,\n",
254 | " 'yes' : 1\n",
255 | "}\n",
256 | "dados.vendido = dados.vendido.map(a_trocar)\n",
257 | "dados.head()"
258 | ],
259 | "execution_count": 9,
260 | "outputs": [
261 | {
262 | "output_type": "execute_result",
263 | "data": {
264 | "text/html": [
265 | "\n",
266 | "\n",
279 | "
\n",
280 | " \n",
281 | " \n",
282 | " | \n",
283 | " Unnamed: 0 | \n",
284 | " milhas_por_ano | \n",
285 | " ano_do_modelo | \n",
286 | " preco | \n",
287 | " vendido | \n",
288 | "
\n",
289 | " \n",
290 | " \n",
291 | " \n",
292 | " 0 | \n",
293 | " 0 | \n",
294 | " 21801 | \n",
295 | " 2000 | \n",
296 | " 30941.02 | \n",
297 | " 1 | \n",
298 | "
\n",
299 | " \n",
300 | " 1 | \n",
301 | " 1 | \n",
302 | " 7843 | \n",
303 | " 1998 | \n",
304 | " 40557.96 | \n",
305 | " 1 | \n",
306 | "
\n",
307 | " \n",
308 | " 2 | \n",
309 | " 2 | \n",
310 | " 7109 | \n",
311 | " 2006 | \n",
312 | " 89627.50 | \n",
313 | " 0 | \n",
314 | "
\n",
315 | " \n",
316 | " 3 | \n",
317 | " 3 | \n",
318 | " 26823 | \n",
319 | " 2015 | \n",
320 | " 95276.14 | \n",
321 | " 0 | \n",
322 | "
\n",
323 | " \n",
324 | " 4 | \n",
325 | " 4 | \n",
326 | " 7935 | \n",
327 | " 2014 | \n",
328 | " 117384.68 | \n",
329 | " 1 | \n",
330 | "
\n",
331 | " \n",
332 | "
\n",
333 | "
"
334 | ],
335 | "text/plain": [
336 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido\n",
337 | "0 0 21801 2000 30941.02 1\n",
338 | "1 1 7843 1998 40557.96 1\n",
339 | "2 2 7109 2006 89627.50 0\n",
340 | "3 3 26823 2015 95276.14 0\n",
341 | "4 4 7935 2014 117384.68 1"
342 | ]
343 | },
344 | "metadata": {
345 | "tags": []
346 | },
347 | "execution_count": 9
348 | }
349 | ]
350 | },
351 | {
352 | "metadata": {
353 | "id": "tZFog8O9EXYD",
354 | "colab_type": "code",
355 | "colab": {
356 | "base_uri": "https://localhost:8080/",
357 | "height": 195
358 | },
359 | "outputId": "09240207-0e20-4c07-822c-3a23186b99fe"
360 | },
361 | "cell_type": "code",
362 | "source": [
363 | "from datetime import datetime\n",
364 | "\n",
365 | "ano_atual = datetime.today().year\n",
366 | "dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo\n",
367 | "dados.head()"
368 | ],
369 | "execution_count": 12,
370 | "outputs": [
371 | {
372 | "output_type": "execute_result",
373 | "data": {
374 | "text/html": [
375 | "\n",
376 | "\n",
389 | "
\n",
390 | " \n",
391 | " \n",
392 | " | \n",
393 | " Unnamed: 0 | \n",
394 | " milhas_por_ano | \n",
395 | " ano_do_modelo | \n",
396 | " preco | \n",
397 | " vendido | \n",
398 | " idade_do_modelo | \n",
399 | "
\n",
400 | " \n",
401 | " \n",
402 | " \n",
403 | " 0 | \n",
404 | " 0 | \n",
405 | " 21801 | \n",
406 | " 2000 | \n",
407 | " 30941.02 | \n",
408 | " 1 | \n",
409 | " 18 | \n",
410 | "
\n",
411 | " \n",
412 | " 1 | \n",
413 | " 1 | \n",
414 | " 7843 | \n",
415 | " 1998 | \n",
416 | " 40557.96 | \n",
417 | " 1 | \n",
418 | " 20 | \n",
419 | "
\n",
420 | " \n",
421 | " 2 | \n",
422 | " 2 | \n",
423 | " 7109 | \n",
424 | " 2006 | \n",
425 | " 89627.50 | \n",
426 | " 0 | \n",
427 | " 12 | \n",
428 | "
\n",
429 | " \n",
430 | " 3 | \n",
431 | " 3 | \n",
432 | " 26823 | \n",
433 | " 2015 | \n",
434 | " 95276.14 | \n",
435 | " 0 | \n",
436 | " 3 | \n",
437 | "
\n",
438 | " \n",
439 | " 4 | \n",
440 | " 4 | \n",
441 | " 7935 | \n",
442 | " 2014 | \n",
443 | " 117384.68 | \n",
444 | " 1 | \n",
445 | " 4 | \n",
446 | "
\n",
447 | " \n",
448 | "
\n",
449 | "
"
450 | ],
451 | "text/plain": [
452 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido \\\n",
453 | "0 0 21801 2000 30941.02 1 \n",
454 | "1 1 7843 1998 40557.96 1 \n",
455 | "2 2 7109 2006 89627.50 0 \n",
456 | "3 3 26823 2015 95276.14 0 \n",
457 | "4 4 7935 2014 117384.68 1 \n",
458 | "\n",
459 | " idade_do_modelo \n",
460 | "0 18 \n",
461 | "1 20 \n",
462 | "2 12 \n",
463 | "3 3 \n",
464 | "4 4 "
465 | ]
466 | },
467 | "metadata": {
468 | "tags": []
469 | },
470 | "execution_count": 12
471 | }
472 | ]
473 | },
474 | {
475 | "metadata": {
476 | "id": "3wWWgxhcFbR9",
477 | "colab_type": "code",
478 | "colab": {
479 | "base_uri": "https://localhost:8080/",
480 | "height": 195
481 | },
482 | "outputId": "5c1a4a30-6d60-44b7-d232-0c8d47bb4d22"
483 | },
484 | "cell_type": "code",
485 | "source": [
486 | "dados['km_por_ano'] = dados.milhas_por_ano * 1.60934\n",
487 | "dados.head()"
488 | ],
489 | "execution_count": 15,
490 | "outputs": [
491 | {
492 | "output_type": "execute_result",
493 | "data": {
494 | "text/html": [
495 | "\n",
496 | "\n",
509 | "
\n",
510 | " \n",
511 | " \n",
512 | " | \n",
513 | " Unnamed: 0 | \n",
514 | " milhas_por_ano | \n",
515 | " ano_do_modelo | \n",
516 | " preco | \n",
517 | " vendido | \n",
518 | " idade_do_modelo | \n",
519 | " km_por_ano | \n",
520 | "
\n",
521 | " \n",
522 | " \n",
523 | " \n",
524 | " 0 | \n",
525 | " 0 | \n",
526 | " 21801 | \n",
527 | " 2000 | \n",
528 | " 30941.02 | \n",
529 | " 1 | \n",
530 | " 18 | \n",
531 | " 35085.22134 | \n",
532 | "
\n",
533 | " \n",
534 | " 1 | \n",
535 | " 1 | \n",
536 | " 7843 | \n",
537 | " 1998 | \n",
538 | " 40557.96 | \n",
539 | " 1 | \n",
540 | " 20 | \n",
541 | " 12622.05362 | \n",
542 | "
\n",
543 | " \n",
544 | " 2 | \n",
545 | " 2 | \n",
546 | " 7109 | \n",
547 | " 2006 | \n",
548 | " 89627.50 | \n",
549 | " 0 | \n",
550 | " 12 | \n",
551 | " 11440.79806 | \n",
552 | "
\n",
553 | " \n",
554 | " 3 | \n",
555 | " 3 | \n",
556 | " 26823 | \n",
557 | " 2015 | \n",
558 | " 95276.14 | \n",
559 | " 0 | \n",
560 | " 3 | \n",
561 | " 43167.32682 | \n",
562 | "
\n",
563 | " \n",
564 | " 4 | \n",
565 | " 4 | \n",
566 | " 7935 | \n",
567 | " 2014 | \n",
568 | " 117384.68 | \n",
569 | " 1 | \n",
570 | " 4 | \n",
571 | " 12770.11290 | \n",
572 | "
\n",
573 | " \n",
574 | "
\n",
575 | "
"
576 | ],
577 | "text/plain": [
578 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido \\\n",
579 | "0 0 21801 2000 30941.02 1 \n",
580 | "1 1 7843 1998 40557.96 1 \n",
581 | "2 2 7109 2006 89627.50 0 \n",
582 | "3 3 26823 2015 95276.14 0 \n",
583 | "4 4 7935 2014 117384.68 1 \n",
584 | "\n",
585 | " idade_do_modelo km_por_ano \n",
586 | "0 18 35085.22134 \n",
587 | "1 20 12622.05362 \n",
588 | "2 12 11440.79806 \n",
589 | "3 3 43167.32682 \n",
590 | "4 4 12770.11290 "
591 | ]
592 | },
593 | "metadata": {
594 | "tags": []
595 | },
596 | "execution_count": 15
597 | }
598 | ]
599 | },
600 | {
601 | "metadata": {
602 | "id": "MfjQNKlyFo2S",
603 | "colab_type": "code",
604 | "colab": {
605 | "base_uri": "https://localhost:8080/",
606 | "height": 195
607 | },
608 | "outputId": "908e17b5-9b48-48d0-f9e3-fbe69343a121"
609 | },
610 | "cell_type": "code",
611 | "source": [
612 | "dados = dados.drop(columns = [\"Unnamed: 0\", \"milhas_por_ano\",\"ano_do_modelo\"], axis=1)\n",
613 | "dados.head()"
614 | ],
615 | "execution_count": 16,
616 | "outputs": [
617 | {
618 | "output_type": "execute_result",
619 | "data": {
620 | "text/html": [
621 | "\n",
622 | "\n",
635 | "
\n",
636 | " \n",
637 | " \n",
638 | " | \n",
639 | " preco | \n",
640 | " vendido | \n",
641 | " idade_do_modelo | \n",
642 | " km_por_ano | \n",
643 | "
\n",
644 | " \n",
645 | " \n",
646 | " \n",
647 | " 0 | \n",
648 | " 30941.02 | \n",
649 | " 1 | \n",
650 | " 18 | \n",
651 | " 35085.22134 | \n",
652 | "
\n",
653 | " \n",
654 | " 1 | \n",
655 | " 40557.96 | \n",
656 | " 1 | \n",
657 | " 20 | \n",
658 | " 12622.05362 | \n",
659 | "
\n",
660 | " \n",
661 | " 2 | \n",
662 | " 89627.50 | \n",
663 | " 0 | \n",
664 | " 12 | \n",
665 | " 11440.79806 | \n",
666 | "
\n",
667 | " \n",
668 | " 3 | \n",
669 | " 95276.14 | \n",
670 | " 0 | \n",
671 | " 3 | \n",
672 | " 43167.32682 | \n",
673 | "
\n",
674 | " \n",
675 | " 4 | \n",
676 | " 117384.68 | \n",
677 | " 1 | \n",
678 | " 4 | \n",
679 | " 12770.11290 | \n",
680 | "
\n",
681 | " \n",
682 | "
\n",
683 | "
"
684 | ],
685 | "text/plain": [
686 | " preco vendido idade_do_modelo km_por_ano\n",
687 | "0 30941.02 1 18 35085.22134\n",
688 | "1 40557.96 1 20 12622.05362\n",
689 | "2 89627.50 0 12 11440.79806\n",
690 | "3 95276.14 0 3 43167.32682\n",
691 | "4 117384.68 1 4 12770.11290"
692 | ]
693 | },
694 | "metadata": {
695 | "tags": []
696 | },
697 | "execution_count": 16
698 | }
699 | ]
700 | },
701 | {
702 | "metadata": {
703 | "id": "E3xebM4FF0Tc",
704 | "colab_type": "code",
705 | "colab": {
706 | "base_uri": "https://localhost:8080/",
707 | "height": 51
708 | },
709 | "outputId": "51eb40fc-4bb1-4183-85d5-de0fd699c5d7"
710 | },
711 | "cell_type": "code",
712 | "source": [
713 | "import numpy as np\n",
714 | "from sklearn.model_selection import train_test_split\n",
715 | "from sklearn.svm import LinearSVC\n",
716 | "from sklearn.metrics import accuracy_score\n",
717 | "\n",
718 | "x = dados[[\"preco\", \"idade_do_modelo\",\"km_por_ano\"]]\n",
719 | "y = dados[\"vendido\"]\n",
720 | "\n",
721 | "SEED = 5\n",
722 | "np.random.seed(SEED)\n",
723 | "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n",
724 | " stratify = y)\n",
725 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
726 | "\n",
727 | "modelo = LinearSVC()\n",
728 | "modelo.fit(treino_x, treino_y)\n",
729 | "previsoes = modelo.predict(teste_x)\n",
730 | "\n",
731 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
732 | "print(\"A acurácia foi %.2f%%\" % acuracia)"
733 | ],
734 | "execution_count": 18,
735 | "outputs": [
736 | {
737 | "output_type": "stream",
738 | "text": [
739 | "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n",
740 | "A acurácia foi 57.88%\n"
741 | ],
742 | "name": "stdout"
743 | }
744 | ]
745 | },
746 | {
747 | "metadata": {
748 | "id": "G2ZFWoPkGONL",
749 | "colab_type": "code",
750 | "colab": {}
751 | },
752 | "cell_type": "code",
753 | "source": [
754 | ""
755 | ],
756 | "execution_count": 0,
757 | "outputs": []
758 | }
759 | ]
760 | }
--------------------------------------------------------------------------------
/aula5.1/introdução_a_machine_learning_4.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Introdução a Machine Learning - 4.ipynb
3 |
4 | Automatically generated by Colaboratory.
5 |
6 | Original file is located at
7 | https://colab.research.google.com/drive/1RpYAAROMa4C86iZscVUzaWIeVYSJapyE
8 | """
9 |
10 | import pandas as pd
11 |
12 | uri = "https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv"
13 | dados = pd.read_csv(uri)
14 | dados.head()
15 |
16 | a_renomear = {
17 | 'mileage_per_year' : 'milhas_por_ano',
18 | 'model_year' : 'ano_do_modelo',
19 | 'price' : 'preco',
20 | 'sold' : 'vendido'
21 | }
22 | dados = dados.rename(columns=a_renomear)
23 | dados.head()
24 |
25 | a_trocar = {
26 | 'no' : 0,
27 | 'yes' : 1
28 | }
29 | dados.vendido = dados.vendido.map(a_trocar)
30 | dados.head()
31 |
32 | from datetime import datetime
33 |
34 | ano_atual = datetime.today().year
35 | dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo
36 | dados.head()
37 |
38 | dados['km_por_ano'] = dados.milhas_por_ano * 1.60934
39 | dados.head()
40 |
41 | dados = dados.drop(columns = ["Unnamed: 0", "milhas_por_ano","ano_do_modelo"], axis=1)
42 | dados.head()
43 |
44 | import numpy as np
45 | from sklearn.model_selection import train_test_split
46 | from sklearn.svm import LinearSVC
47 | from sklearn.metrics import accuracy_score
48 |
49 | x = dados[["preco", "idade_do_modelo","km_por_ano"]]
50 | y = dados["vendido"]
51 |
52 | SEED = 5
53 | np.random.seed(SEED)
54 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
55 | stratify = y)
56 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
57 |
58 | modelo = LinearSVC()
59 | modelo.fit(treino_x, treino_y)
60 | previsoes = modelo.predict(teste_x)
61 |
62 | acuracia = accuracy_score(teste_y, previsoes) * 100
63 | print("A acurácia foi %.2f%%" % acuracia)
64 |
65 |
--------------------------------------------------------------------------------
/aula5.2/Introdução_a_Machine_Learning_4.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Introdução a Machine Learning - 4.ipynb",
7 | "version": "0.3.2",
8 | "provenance": [],
9 | "collapsed_sections": []
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "metadata": {
19 | "id": "y7ik04NlDZMA",
20 | "colab_type": "code",
21 | "colab": {
22 | "base_uri": "https://localhost:8080/",
23 | "height": 204
24 | },
25 | "outputId": "73c13868-60e4-4a25-9338-02f324cabb18"
26 | },
27 | "cell_type": "code",
28 | "source": [
29 | "import pandas as pd\n",
30 | "\n",
31 | "uri = \"https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv\"\n",
32 | "dados = pd.read_csv(uri)\n",
33 | "dados.head()"
34 | ],
35 | "execution_count": 1,
36 | "outputs": [
37 | {
38 | "output_type": "execute_result",
39 | "data": {
40 | "text/html": [
41 | "\n",
42 | "\n",
55 | "
\n",
56 | " \n",
57 | " \n",
58 | " | \n",
59 | " Unnamed: 0 | \n",
60 | " mileage_per_year | \n",
61 | " model_year | \n",
62 | " price | \n",
63 | " sold | \n",
64 | "
\n",
65 | " \n",
66 | " \n",
67 | " \n",
68 | " 0 | \n",
69 | " 0 | \n",
70 | " 21801 | \n",
71 | " 2000 | \n",
72 | " 30941.02 | \n",
73 | " yes | \n",
74 | "
\n",
75 | " \n",
76 | " 1 | \n",
77 | " 1 | \n",
78 | " 7843 | \n",
79 | " 1998 | \n",
80 | " 40557.96 | \n",
81 | " yes | \n",
82 | "
\n",
83 | " \n",
84 | " 2 | \n",
85 | " 2 | \n",
86 | " 7109 | \n",
87 | " 2006 | \n",
88 | " 89627.50 | \n",
89 | " no | \n",
90 | "
\n",
91 | " \n",
92 | " 3 | \n",
93 | " 3 | \n",
94 | " 26823 | \n",
95 | " 2015 | \n",
96 | " 95276.14 | \n",
97 | " no | \n",
98 | "
\n",
99 | " \n",
100 | " 4 | \n",
101 | " 4 | \n",
102 | " 7935 | \n",
103 | " 2014 | \n",
104 | " 117384.68 | \n",
105 | " yes | \n",
106 | "
\n",
107 | " \n",
108 | "
\n",
109 | "
"
110 | ],
111 | "text/plain": [
112 | " Unnamed: 0 mileage_per_year model_year price sold\n",
113 | "0 0 21801 2000 30941.02 yes\n",
114 | "1 1 7843 1998 40557.96 yes\n",
115 | "2 2 7109 2006 89627.50 no\n",
116 | "3 3 26823 2015 95276.14 no\n",
117 | "4 4 7935 2014 117384.68 yes"
118 | ]
119 | },
120 | "metadata": {
121 | "tags": []
122 | },
123 | "execution_count": 1
124 | }
125 | ]
126 | },
127 | {
128 | "metadata": {
129 | "id": "FqFrPmgJDhvM",
130 | "colab_type": "code",
131 | "colab": {
132 | "base_uri": "https://localhost:8080/",
133 | "height": 195
134 | },
135 | "outputId": "46b16944-2a4a-4b53-b970-5a96a9b7d867"
136 | },
137 | "cell_type": "code",
138 | "source": [
139 | "a_renomear = {\n",
140 | " 'mileage_per_year' : 'milhas_por_ano',\n",
141 | " 'model_year' : 'ano_do_modelo',\n",
142 | " 'price' : 'preco',\n",
143 | " 'sold' : 'vendido'\n",
144 | "}\n",
145 | "dados = dados.rename(columns=a_renomear)\n",
146 | "dados.head()"
147 | ],
148 | "execution_count": 2,
149 | "outputs": [
150 | {
151 | "output_type": "execute_result",
152 | "data": {
153 | "text/html": [
154 | "\n",
155 | "\n",
168 | "
\n",
169 | " \n",
170 | " \n",
171 | " | \n",
172 | " Unnamed: 0 | \n",
173 | " milhas_por_ano | \n",
174 | " ano_do_modelo | \n",
175 | " preco | \n",
176 | " vendido | \n",
177 | "
\n",
178 | " \n",
179 | " \n",
180 | " \n",
181 | " 0 | \n",
182 | " 0 | \n",
183 | " 21801 | \n",
184 | " 2000 | \n",
185 | " 30941.02 | \n",
186 | " yes | \n",
187 | "
\n",
188 | " \n",
189 | " 1 | \n",
190 | " 1 | \n",
191 | " 7843 | \n",
192 | " 1998 | \n",
193 | " 40557.96 | \n",
194 | " yes | \n",
195 | "
\n",
196 | " \n",
197 | " 2 | \n",
198 | " 2 | \n",
199 | " 7109 | \n",
200 | " 2006 | \n",
201 | " 89627.50 | \n",
202 | " no | \n",
203 | "
\n",
204 | " \n",
205 | " 3 | \n",
206 | " 3 | \n",
207 | " 26823 | \n",
208 | " 2015 | \n",
209 | " 95276.14 | \n",
210 | " no | \n",
211 | "
\n",
212 | " \n",
213 | " 4 | \n",
214 | " 4 | \n",
215 | " 7935 | \n",
216 | " 2014 | \n",
217 | " 117384.68 | \n",
218 | " yes | \n",
219 | "
\n",
220 | " \n",
221 | "
\n",
222 | "
"
223 | ],
224 | "text/plain": [
225 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido\n",
226 | "0 0 21801 2000 30941.02 yes\n",
227 | "1 1 7843 1998 40557.96 yes\n",
228 | "2 2 7109 2006 89627.50 no\n",
229 | "3 3 26823 2015 95276.14 no\n",
230 | "4 4 7935 2014 117384.68 yes"
231 | ]
232 | },
233 | "metadata": {
234 | "tags": []
235 | },
236 | "execution_count": 2
237 | }
238 | ]
239 | },
240 | {
241 | "metadata": {
242 | "id": "J31fUGbVEKpW",
243 | "colab_type": "code",
244 | "colab": {
245 | "base_uri": "https://localhost:8080/",
246 | "height": 195
247 | },
248 | "outputId": "ad0a651f-320a-4f5e-8e2c-706bf6937fbb"
249 | },
250 | "cell_type": "code",
251 | "source": [
252 | "a_trocar = {\n",
253 | " 'no' : 0,\n",
254 | " 'yes' : 1\n",
255 | "}\n",
256 | "dados.vendido = dados.vendido.map(a_trocar)\n",
257 | "dados.head()"
258 | ],
259 | "execution_count": 3,
260 | "outputs": [
261 | {
262 | "output_type": "execute_result",
263 | "data": {
264 | "text/html": [
265 | "\n",
266 | "\n",
279 | "
\n",
280 | " \n",
281 | " \n",
282 | " | \n",
283 | " Unnamed: 0 | \n",
284 | " milhas_por_ano | \n",
285 | " ano_do_modelo | \n",
286 | " preco | \n",
287 | " vendido | \n",
288 | "
\n",
289 | " \n",
290 | " \n",
291 | " \n",
292 | " 0 | \n",
293 | " 0 | \n",
294 | " 21801 | \n",
295 | " 2000 | \n",
296 | " 30941.02 | \n",
297 | " 1 | \n",
298 | "
\n",
299 | " \n",
300 | " 1 | \n",
301 | " 1 | \n",
302 | " 7843 | \n",
303 | " 1998 | \n",
304 | " 40557.96 | \n",
305 | " 1 | \n",
306 | "
\n",
307 | " \n",
308 | " 2 | \n",
309 | " 2 | \n",
310 | " 7109 | \n",
311 | " 2006 | \n",
312 | " 89627.50 | \n",
313 | " 0 | \n",
314 | "
\n",
315 | " \n",
316 | " 3 | \n",
317 | " 3 | \n",
318 | " 26823 | \n",
319 | " 2015 | \n",
320 | " 95276.14 | \n",
321 | " 0 | \n",
322 | "
\n",
323 | " \n",
324 | " 4 | \n",
325 | " 4 | \n",
326 | " 7935 | \n",
327 | " 2014 | \n",
328 | " 117384.68 | \n",
329 | " 1 | \n",
330 | "
\n",
331 | " \n",
332 | "
\n",
333 | "
"
334 | ],
335 | "text/plain": [
336 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido\n",
337 | "0 0 21801 2000 30941.02 1\n",
338 | "1 1 7843 1998 40557.96 1\n",
339 | "2 2 7109 2006 89627.50 0\n",
340 | "3 3 26823 2015 95276.14 0\n",
341 | "4 4 7935 2014 117384.68 1"
342 | ]
343 | },
344 | "metadata": {
345 | "tags": []
346 | },
347 | "execution_count": 3
348 | }
349 | ]
350 | },
351 | {
352 | "metadata": {
353 | "id": "tZFog8O9EXYD",
354 | "colab_type": "code",
355 | "colab": {
356 | "base_uri": "https://localhost:8080/",
357 | "height": 195
358 | },
359 | "outputId": "5792a2f8-f777-4237-e813-697f60309951"
360 | },
361 | "cell_type": "code",
362 | "source": [
363 | "from datetime import datetime\n",
364 | "\n",
365 | "ano_atual = datetime.today().year\n",
366 | "dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo\n",
367 | "dados.head()"
368 | ],
369 | "execution_count": 4,
370 | "outputs": [
371 | {
372 | "output_type": "execute_result",
373 | "data": {
374 | "text/html": [
375 | "\n",
376 | "\n",
389 | "
\n",
390 | " \n",
391 | " \n",
392 | " | \n",
393 | " Unnamed: 0 | \n",
394 | " milhas_por_ano | \n",
395 | " ano_do_modelo | \n",
396 | " preco | \n",
397 | " vendido | \n",
398 | " idade_do_modelo | \n",
399 | "
\n",
400 | " \n",
401 | " \n",
402 | " \n",
403 | " 0 | \n",
404 | " 0 | \n",
405 | " 21801 | \n",
406 | " 2000 | \n",
407 | " 30941.02 | \n",
408 | " 1 | \n",
409 | " 18 | \n",
410 | "
\n",
411 | " \n",
412 | " 1 | \n",
413 | " 1 | \n",
414 | " 7843 | \n",
415 | " 1998 | \n",
416 | " 40557.96 | \n",
417 | " 1 | \n",
418 | " 20 | \n",
419 | "
\n",
420 | " \n",
421 | " 2 | \n",
422 | " 2 | \n",
423 | " 7109 | \n",
424 | " 2006 | \n",
425 | " 89627.50 | \n",
426 | " 0 | \n",
427 | " 12 | \n",
428 | "
\n",
429 | " \n",
430 | " 3 | \n",
431 | " 3 | \n",
432 | " 26823 | \n",
433 | " 2015 | \n",
434 | " 95276.14 | \n",
435 | " 0 | \n",
436 | " 3 | \n",
437 | "
\n",
438 | " \n",
439 | " 4 | \n",
440 | " 4 | \n",
441 | " 7935 | \n",
442 | " 2014 | \n",
443 | " 117384.68 | \n",
444 | " 1 | \n",
445 | " 4 | \n",
446 | "
\n",
447 | " \n",
448 | "
\n",
449 | "
"
450 | ],
451 | "text/plain": [
452 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido \\\n",
453 | "0 0 21801 2000 30941.02 1 \n",
454 | "1 1 7843 1998 40557.96 1 \n",
455 | "2 2 7109 2006 89627.50 0 \n",
456 | "3 3 26823 2015 95276.14 0 \n",
457 | "4 4 7935 2014 117384.68 1 \n",
458 | "\n",
459 | " idade_do_modelo \n",
460 | "0 18 \n",
461 | "1 20 \n",
462 | "2 12 \n",
463 | "3 3 \n",
464 | "4 4 "
465 | ]
466 | },
467 | "metadata": {
468 | "tags": []
469 | },
470 | "execution_count": 4
471 | }
472 | ]
473 | },
474 | {
475 | "metadata": {
476 | "id": "3wWWgxhcFbR9",
477 | "colab_type": "code",
478 | "colab": {
479 | "base_uri": "https://localhost:8080/",
480 | "height": 204
481 | },
482 | "outputId": "b4c147dc-c2e5-4bfd-e78f-5a34f72770d6"
483 | },
484 | "cell_type": "code",
485 | "source": [
486 | "dados['km_por_ano'] = dados.milhas_por_ano * 1.60934\n",
487 | "dados.head()"
488 | ],
489 | "execution_count": 5,
490 | "outputs": [
491 | {
492 | "output_type": "execute_result",
493 | "data": {
494 | "text/html": [
495 | "\n",
496 | "\n",
509 | "
\n",
510 | " \n",
511 | " \n",
512 | " | \n",
513 | " Unnamed: 0 | \n",
514 | " milhas_por_ano | \n",
515 | " ano_do_modelo | \n",
516 | " preco | \n",
517 | " vendido | \n",
518 | " idade_do_modelo | \n",
519 | " km_por_ano | \n",
520 | "
\n",
521 | " \n",
522 | " \n",
523 | " \n",
524 | " 0 | \n",
525 | " 0 | \n",
526 | " 21801 | \n",
527 | " 2000 | \n",
528 | " 30941.02 | \n",
529 | " 1 | \n",
530 | " 18 | \n",
531 | " 35085.22134 | \n",
532 | "
\n",
533 | " \n",
534 | " 1 | \n",
535 | " 1 | \n",
536 | " 7843 | \n",
537 | " 1998 | \n",
538 | " 40557.96 | \n",
539 | " 1 | \n",
540 | " 20 | \n",
541 | " 12622.05362 | \n",
542 | "
\n",
543 | " \n",
544 | " 2 | \n",
545 | " 2 | \n",
546 | " 7109 | \n",
547 | " 2006 | \n",
548 | " 89627.50 | \n",
549 | " 0 | \n",
550 | " 12 | \n",
551 | " 11440.79806 | \n",
552 | "
\n",
553 | " \n",
554 | " 3 | \n",
555 | " 3 | \n",
556 | " 26823 | \n",
557 | " 2015 | \n",
558 | " 95276.14 | \n",
559 | " 0 | \n",
560 | " 3 | \n",
561 | " 43167.32682 | \n",
562 | "
\n",
563 | " \n",
564 | " 4 | \n",
565 | " 4 | \n",
566 | " 7935 | \n",
567 | " 2014 | \n",
568 | " 117384.68 | \n",
569 | " 1 | \n",
570 | " 4 | \n",
571 | " 12770.11290 | \n",
572 | "
\n",
573 | " \n",
574 | "
\n",
575 | "
"
576 | ],
577 | "text/plain": [
578 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido \\\n",
579 | "0 0 21801 2000 30941.02 1 \n",
580 | "1 1 7843 1998 40557.96 1 \n",
581 | "2 2 7109 2006 89627.50 0 \n",
582 | "3 3 26823 2015 95276.14 0 \n",
583 | "4 4 7935 2014 117384.68 1 \n",
584 | "\n",
585 | " idade_do_modelo km_por_ano \n",
586 | "0 18 35085.22134 \n",
587 | "1 20 12622.05362 \n",
588 | "2 12 11440.79806 \n",
589 | "3 3 43167.32682 \n",
590 | "4 4 12770.11290 "
591 | ]
592 | },
593 | "metadata": {
594 | "tags": []
595 | },
596 | "execution_count": 5
597 | }
598 | ]
599 | },
600 | {
601 | "metadata": {
602 | "id": "MfjQNKlyFo2S",
603 | "colab_type": "code",
604 | "colab": {
605 | "base_uri": "https://localhost:8080/",
606 | "height": 204
607 | },
608 | "outputId": "7588f6a5-ff37-4996-91a5-d9ee4b5543d7"
609 | },
610 | "cell_type": "code",
611 | "source": [
612 | "dados = dados.drop(columns = [\"Unnamed: 0\", \"milhas_por_ano\",\"ano_do_modelo\"], axis=1)\n",
613 | "dados.head()"
614 | ],
615 | "execution_count": 6,
616 | "outputs": [
617 | {
618 | "output_type": "execute_result",
619 | "data": {
620 | "text/html": [
621 | "\n",
622 | "\n",
635 | "
\n",
636 | " \n",
637 | " \n",
638 | " | \n",
639 | " preco | \n",
640 | " vendido | \n",
641 | " idade_do_modelo | \n",
642 | " km_por_ano | \n",
643 | "
\n",
644 | " \n",
645 | " \n",
646 | " \n",
647 | " 0 | \n",
648 | " 30941.02 | \n",
649 | " 1 | \n",
650 | " 18 | \n",
651 | " 35085.22134 | \n",
652 | "
\n",
653 | " \n",
654 | " 1 | \n",
655 | " 40557.96 | \n",
656 | " 1 | \n",
657 | " 20 | \n",
658 | " 12622.05362 | \n",
659 | "
\n",
660 | " \n",
661 | " 2 | \n",
662 | " 89627.50 | \n",
663 | " 0 | \n",
664 | " 12 | \n",
665 | " 11440.79806 | \n",
666 | "
\n",
667 | " \n",
668 | " 3 | \n",
669 | " 95276.14 | \n",
670 | " 0 | \n",
671 | " 3 | \n",
672 | " 43167.32682 | \n",
673 | "
\n",
674 | " \n",
675 | " 4 | \n",
676 | " 117384.68 | \n",
677 | " 1 | \n",
678 | " 4 | \n",
679 | " 12770.11290 | \n",
680 | "
\n",
681 | " \n",
682 | "
\n",
683 | "
"
684 | ],
685 | "text/plain": [
686 | " preco vendido idade_do_modelo km_por_ano\n",
687 | "0 30941.02 1 18 35085.22134\n",
688 | "1 40557.96 1 20 12622.05362\n",
689 | "2 89627.50 0 12 11440.79806\n",
690 | "3 95276.14 0 3 43167.32682\n",
691 | "4 117384.68 1 4 12770.11290"
692 | ]
693 | },
694 | "metadata": {
695 | "tags": []
696 | },
697 | "execution_count": 6
698 | }
699 | ]
700 | },
701 | {
702 | "metadata": {
703 | "id": "E3xebM4FF0Tc",
704 | "colab_type": "code",
705 | "colab": {
706 | "base_uri": "https://localhost:8080/",
707 | "height": 50
708 | },
709 | "outputId": "7d48ae66-beb0-4884-cbfa-c81c9ce95eb1"
710 | },
711 | "cell_type": "code",
712 | "source": [
713 | "import numpy as np\n",
714 | "from sklearn.model_selection import train_test_split\n",
715 | "from sklearn.svm import LinearSVC\n",
716 | "from sklearn.metrics import accuracy_score\n",
717 | "\n",
718 | "x = dados[[\"preco\", \"idade_do_modelo\",\"km_por_ano\"]]\n",
719 | "y = dados[\"vendido\"]\n",
720 | "\n",
721 | "SEED = 5\n",
722 | "np.random.seed(SEED)\n",
723 | "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n",
724 | " stratify = y)\n",
725 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
726 | "\n",
727 | "modelo = LinearSVC()\n",
728 | "modelo.fit(treino_x, treino_y)\n",
729 | "previsoes = modelo.predict(teste_x)\n",
730 | "\n",
731 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
732 | "print(\"A acurácia foi %.2f%%\" % acuracia)"
733 | ],
734 | "execution_count": 17,
735 | "outputs": [
736 | {
737 | "output_type": "stream",
738 | "text": [
739 | "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n",
740 | "A acurácia foi 57.88%\n"
741 | ],
742 | "name": "stdout"
743 | }
744 | ]
745 | },
746 | {
747 | "metadata": {
748 | "id": "G2ZFWoPkGONL",
749 | "colab_type": "code",
750 | "colab": {
751 | "base_uri": "https://localhost:8080/",
752 | "height": 34
753 | },
754 | "outputId": "f86fecd4-0528-420d-face-b3ab72af6a23"
755 | },
756 | "cell_type": "code",
757 | "source": [
758 | "from sklearn.dummy import DummyClassifier\n",
759 | "\n",
760 | "dummy_stratified = DummyClassifier()\n",
761 | "dummy_stratified.fit(treino_x, treino_y)\n",
762 | "acuracia = dummy_stratified.score(teste_x, teste_y) * 100\n",
763 | "\n",
764 | "print(\"A acurácia do dummy stratified foi %.2f%%\" % acuracia)"
765 | ],
766 | "execution_count": 18,
767 | "outputs": [
768 | {
769 | "output_type": "stream",
770 | "text": [
771 | "A acurácia do dummy stratified foi 52.44%\n"
772 | ],
773 | "name": "stdout"
774 | }
775 | ]
776 | },
777 | {
778 | "metadata": {
779 | "id": "469ZKbHJIHL0",
780 | "colab_type": "code",
781 | "colab": {
782 | "base_uri": "https://localhost:8080/",
783 | "height": 34
784 | },
785 | "outputId": "7348330f-4752-4e1e-8f27-2420acf2ad14"
786 | },
787 | "cell_type": "code",
788 | "source": [
789 | "from sklearn.dummy import DummyClassifier\n",
790 | "\n",
791 | "dummy_mostfrequent = DummyClassifier()\n",
792 | "dummy_mostfrequent.fit(treino_x, treino_y)\n",
793 | "acuracia = dummy_mostfrequent.score(teste_x, teste_y) * 100\n",
794 | "\n",
795 | "print(\"A acurácia do dummy mostfrequent foi %.2f%%\" % acuracia)"
796 | ],
797 | "execution_count": 19,
798 | "outputs": [
799 | {
800 | "output_type": "stream",
801 | "text": [
802 | "A acurácia do dummy mostfrequent foi 50.68%\n"
803 | ],
804 | "name": "stdout"
805 | }
806 | ]
807 | },
808 | {
809 | "metadata": {
810 | "id": "S_5AfFerIsH_",
811 | "colab_type": "code",
812 | "colab": {
813 | "base_uri": "https://localhost:8080/",
814 | "height": 50
815 | },
816 | "outputId": "569a686d-9b86-459f-d30d-5ce738bb6897"
817 | },
818 | "cell_type": "code",
819 | "source": [
820 | "from sklearn.preprocessing import StandardScaler\n",
821 | "from sklearn.model_selection import train_test_split\n",
822 | "from sklearn.svm import SVC\n",
823 | "from sklearn.metrics import accuracy_score\n",
824 | "\n",
825 | "SEED = 5\n",
826 | "np.random.seed(SEED)\n",
827 | "raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n",
828 | " stratify = y)\n",
829 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
830 | "\n",
831 | "scaler = StandardScaler()\n",
832 | "scaler.fit(raw_treino_x)\n",
833 | "treino_x = scaler.transform(raw_treino_x)\n",
834 | "teste_x = scaler.transform(raw_teste_x)\n",
835 | "\n",
836 | "modelo = SVC()\n",
837 | "modelo.fit(treino_x, treino_y)\n",
838 | "previsoes = modelo.predict(teste_x)\n",
839 | "\n",
840 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
841 | "print(\"A acurácia foi %.2f%%\" % acuracia)\n"
842 | ],
843 | "execution_count": 20,
844 | "outputs": [
845 | {
846 | "output_type": "stream",
847 | "text": [
848 | "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n",
849 | "A acurácia foi 77.48%\n"
850 | ],
851 | "name": "stdout"
852 | }
853 | ]
854 | },
855 | {
856 | "metadata": {
857 | "id": "-1YsD-whJuGR",
858 | "colab_type": "code",
859 | "colab": {}
860 | },
861 | "cell_type": "code",
862 | "source": [
863 | ""
864 | ],
865 | "execution_count": 0,
866 | "outputs": []
867 | }
868 | ]
869 | }
--------------------------------------------------------------------------------
/aula5.2/introdução_a_machine_learning_4.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Introdução a Machine Learning - 4.ipynb
3 |
4 | Automatically generated by Colaboratory.
5 |
6 | Original file is located at
7 | https://colab.research.google.com/drive/1RpYAAROMa4C86iZscVUzaWIeVYSJapyE
8 | """
9 |
10 | import pandas as pd
11 |
12 | uri = "https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv"
13 | dados = pd.read_csv(uri)
14 | dados.head()
15 |
16 | a_renomear = {
17 | 'mileage_per_year' : 'milhas_por_ano',
18 | 'model_year' : 'ano_do_modelo',
19 | 'price' : 'preco',
20 | 'sold' : 'vendido'
21 | }
22 | dados = dados.rename(columns=a_renomear)
23 | dados.head()
24 |
25 | a_trocar = {
26 | 'no' : 0,
27 | 'yes' : 1
28 | }
29 | dados.vendido = dados.vendido.map(a_trocar)
30 | dados.head()
31 |
32 | from datetime import datetime
33 |
34 | ano_atual = datetime.today().year
35 | dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo
36 | dados.head()
37 |
38 | dados['km_por_ano'] = dados.milhas_por_ano * 1.60934
39 | dados.head()
40 |
41 | dados = dados.drop(columns = ["Unnamed: 0", "milhas_por_ano","ano_do_modelo"], axis=1)
42 | dados.head()
43 |
44 | import numpy as np
45 | from sklearn.model_selection import train_test_split
46 | from sklearn.svm import LinearSVC
47 | from sklearn.metrics import accuracy_score
48 |
49 | x = dados[["preco", "idade_do_modelo","km_por_ano"]]
50 | y = dados["vendido"]
51 |
52 | SEED = 5
53 | np.random.seed(SEED)
54 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
55 | stratify = y)
56 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
57 |
58 | modelo = LinearSVC()
59 | modelo.fit(treino_x, treino_y)
60 | previsoes = modelo.predict(teste_x)
61 |
62 | acuracia = accuracy_score(teste_y, previsoes) * 100
63 | print("A acurácia foi %.2f%%" % acuracia)
64 |
65 | from sklearn.dummy import DummyClassifier
66 |
67 | dummy_stratified = DummyClassifier()
68 | dummy_stratified.fit(treino_x, treino_y)
69 | acuracia = dummy_stratified.score(teste_x, teste_y) * 100
70 |
71 | print("A acurácia do dummy stratified foi %.2f%%" % acuracia)
72 |
73 | from sklearn.dummy import DummyClassifier
74 |
75 | dummy_mostfrequent = DummyClassifier()
76 | dummy_mostfrequent.fit(treino_x, treino_y)
77 | acuracia = dummy_mostfrequent.score(teste_x, teste_y) * 100
78 |
79 | print("A acurácia do dummy mostfrequent foi %.2f%%" % acuracia)
80 |
81 | from sklearn.preprocessing import StandardScaler
82 | from sklearn.model_selection import train_test_split
83 | from sklearn.svm import SVC
84 | from sklearn.metrics import accuracy_score
85 |
86 | SEED = 5
87 | np.random.seed(SEED)
88 | raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
89 | stratify = y)
90 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
91 |
92 | scaler = StandardScaler()
93 | scaler.fit(raw_treino_x)
94 | treino_x = scaler.transform(raw_treino_x)
95 | teste_x = scaler.transform(raw_teste_x)
96 |
97 | modelo = SVC()
98 | modelo.fit(treino_x, treino_y)
99 | previsoes = modelo.predict(teste_x)
100 |
101 | acuracia = accuracy_score(teste_y, previsoes) * 100
102 | print("A acurácia foi %.2f%%" % acuracia)
103 |
104 |
--------------------------------------------------------------------------------
/aula5.4/Introdução_a_Machine_Learning_4.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Introdução a Machine Learning - 4.ipynb",
7 | "version": "0.3.2",
8 | "provenance": [],
9 | "collapsed_sections": []
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "metadata": {
19 | "id": "T3ywJhRBMGvY",
20 | "colab_type": "code",
21 | "colab": {
22 | "base_uri": "https://localhost:8080/",
23 | "height": 3057
24 | },
25 | "outputId": "fdc1e620-4be1-4b38-f525-a075971a2572"
26 | },
27 | "cell_type": "code",
28 | "source": [
29 | "!pip install graphviz==0.10\n",
30 | "!apt-get install graphviz"
31 | ],
32 | "execution_count": 28,
33 | "outputs": [
34 | {
35 | "output_type": "stream",
36 | "text": [
37 | "Requirement already satisfied: graphviz==0.10 in /usr/local/lib/python3.6/dist-packages (0.10)\n",
38 | "Reading package lists... Done\n",
39 | "Building dependency tree \n",
40 | "Reading state information... Done\n",
41 | "The following additional packages will be installed:\n",
42 | " fontconfig libann0 libcairo2 libcdt5 libcgraph6 libdatrie1 libgd3\n",
43 | " libgts-0.7-5 libgts-bin libgvc6 libgvpr2 libjbig0 liblab-gamut1 libltdl7\n",
44 | " libpango-1.0-0 libpangocairo-1.0-0 libpangoft2-1.0-0 libpathplan4\n",
45 | " libpixman-1-0 libthai-data libthai0 libtiff5 libwebp6 libxaw7 libxcb-render0\n",
46 | " libxcb-shm0 libxmu6 libxpm4 libxt6\n",
47 | "Suggested packages:\n",
48 | " gsfonts graphviz-doc libgd-tools\n",
49 | "The following NEW packages will be installed:\n",
50 | " fontconfig graphviz libann0 libcairo2 libcdt5 libcgraph6 libdatrie1 libgd3\n",
51 | " libgts-0.7-5 libgts-bin libgvc6 libgvpr2 libjbig0 liblab-gamut1 libltdl7\n",
52 | " libpango-1.0-0 libpangocairo-1.0-0 libpangoft2-1.0-0 libpathplan4\n",
53 | " libpixman-1-0 libthai-data libthai0 libtiff5 libwebp6 libxaw7 libxcb-render0\n",
54 | " libxcb-shm0 libxmu6 libxpm4 libxt6\n",
55 | "0 upgraded, 30 newly installed, 0 to remove and 12 not upgraded.\n",
56 | "Need to get 4,154 kB of archives.\n",
57 | "After this operation, 16.1 MB of additional disk space will be used.\n",
58 | "Get:1 http://archive.ubuntu.com/ubuntu bionic/main amd64 fontconfig amd64 2.12.6-0ubuntu2 [169 kB]\n",
59 | "Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libann0 amd64 1.1.2+doc-6 [24.8 kB]\n",
60 | "Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libcdt5 amd64 2.40.1-2 [19.6 kB]\n",
61 | "Get:4 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libcgraph6 amd64 2.40.1-2 [40.8 kB]\n",
62 | "Get:5 http://archive.ubuntu.com/ubuntu bionic/main amd64 libjbig0 amd64 2.1-3.1build1 [26.7 kB]\n",
63 | "Get:6 http://archive.ubuntu.com/ubuntu bionic/main amd64 libtiff5 amd64 4.0.9-5 [152 kB]\n",
64 | "Get:7 http://archive.ubuntu.com/ubuntu bionic/main amd64 libwebp6 amd64 0.6.1-2 [185 kB]\n",
65 | "Get:8 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxpm4 amd64 1:3.5.12-1 [34.0 kB]\n",
66 | "Get:9 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libgd3 amd64 2.2.5-4ubuntu0.2 [119 kB]\n",
67 | "Get:10 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgts-0.7-5 amd64 0.7.6+darcs121130-4 [150 kB]\n",
68 | "Get:11 http://archive.ubuntu.com/ubuntu bionic/main amd64 libpixman-1-0 amd64 0.34.0-2 [229 kB]\n",
69 | "Get:12 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxcb-render0 amd64 1.13-1 [14.7 kB]\n",
70 | "Get:13 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxcb-shm0 amd64 1.13-1 [5,572 B]\n",
71 | "Get:14 http://archive.ubuntu.com/ubuntu bionic/main amd64 libcairo2 amd64 1.15.10-2 [580 kB]\n",
72 | "Get:15 http://archive.ubuntu.com/ubuntu bionic/main amd64 libltdl7 amd64 2.4.6-2 [38.8 kB]\n",
73 | "Get:16 http://archive.ubuntu.com/ubuntu bionic/main amd64 libthai-data all 0.1.27-2 [133 kB]\n",
74 | "Get:17 http://archive.ubuntu.com/ubuntu bionic/main amd64 libdatrie1 amd64 0.2.10-7 [17.8 kB]\n",
75 | "Get:18 http://archive.ubuntu.com/ubuntu bionic/main amd64 libthai0 amd64 0.1.27-2 [18.0 kB]\n",
76 | "Get:19 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libpango-1.0-0 amd64 1.40.14-1ubuntu0.1 [153 kB]\n",
77 | "Get:20 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libpangoft2-1.0-0 amd64 1.40.14-1ubuntu0.1 [33.2 kB]\n",
78 | "Get:21 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libpangocairo-1.0-0 amd64 1.40.14-1ubuntu0.1 [20.8 kB]\n",
79 | "Get:22 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libpathplan4 amd64 2.40.1-2 [22.6 kB]\n",
80 | "Get:23 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgvc6 amd64 2.40.1-2 [601 kB]\n",
81 | "Get:24 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgvpr2 amd64 2.40.1-2 [169 kB]\n",
82 | "Get:25 http://archive.ubuntu.com/ubuntu bionic/universe amd64 liblab-gamut1 amd64 2.40.1-2 [178 kB]\n",
83 | "Get:26 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxt6 amd64 1:1.1.5-1 [160 kB]\n",
84 | "Get:27 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxmu6 amd64 2:1.1.2-2 [46.0 kB]\n",
85 | "Get:28 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxaw7 amd64 2:1.0.13-1 [173 kB]\n",
86 | "Get:29 http://archive.ubuntu.com/ubuntu bionic/universe amd64 graphviz amd64 2.40.1-2 [601 kB]\n",
87 | "Get:30 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgts-bin amd64 0.7.6+darcs121130-4 [41.3 kB]\n",
88 | "Fetched 4,154 kB in 2s (2,256 kB/s)\n",
89 | "Selecting previously unselected package fontconfig.\n",
90 | "(Reading database ... 22278 files and directories currently installed.)\n",
91 | "Preparing to unpack .../00-fontconfig_2.12.6-0ubuntu2_amd64.deb ...\n",
92 | "Unpacking fontconfig (2.12.6-0ubuntu2) ...\n",
93 | "Selecting previously unselected package libann0.\n",
94 | "Preparing to unpack .../01-libann0_1.1.2+doc-6_amd64.deb ...\n",
95 | "Unpacking libann0 (1.1.2+doc-6) ...\n",
96 | "Selecting previously unselected package libcdt5.\n",
97 | "Preparing to unpack .../02-libcdt5_2.40.1-2_amd64.deb ...\n",
98 | "Unpacking libcdt5 (2.40.1-2) ...\n",
99 | "Selecting previously unselected package libcgraph6.\n",
100 | "Preparing to unpack .../03-libcgraph6_2.40.1-2_amd64.deb ...\n",
101 | "Unpacking libcgraph6 (2.40.1-2) ...\n",
102 | "Selecting previously unselected package libjbig0:amd64.\n",
103 | "Preparing to unpack .../04-libjbig0_2.1-3.1build1_amd64.deb ...\n",
104 | "Unpacking libjbig0:amd64 (2.1-3.1build1) ...\n",
105 | "Selecting previously unselected package libtiff5:amd64.\n",
106 | "Preparing to unpack .../05-libtiff5_4.0.9-5_amd64.deb ...\n",
107 | "Unpacking libtiff5:amd64 (4.0.9-5) ...\n",
108 | "Selecting previously unselected package libwebp6:amd64.\n",
109 | "Preparing to unpack .../06-libwebp6_0.6.1-2_amd64.deb ...\n",
110 | "Unpacking libwebp6:amd64 (0.6.1-2) ...\n",
111 | "Selecting previously unselected package libxpm4:amd64.\n",
112 | "Preparing to unpack .../07-libxpm4_1%3a3.5.12-1_amd64.deb ...\n",
113 | "Unpacking libxpm4:amd64 (1:3.5.12-1) ...\n",
114 | "Selecting previously unselected package libgd3:amd64.\n",
115 | "Preparing to unpack .../08-libgd3_2.2.5-4ubuntu0.2_amd64.deb ...\n",
116 | "Unpacking libgd3:amd64 (2.2.5-4ubuntu0.2) ...\n",
117 | "Selecting previously unselected package libgts-0.7-5:amd64.\n",
118 | "Preparing to unpack .../09-libgts-0.7-5_0.7.6+darcs121130-4_amd64.deb ...\n",
119 | "Unpacking libgts-0.7-5:amd64 (0.7.6+darcs121130-4) ...\n",
120 | "Selecting previously unselected package libpixman-1-0:amd64.\n",
121 | "Preparing to unpack .../10-libpixman-1-0_0.34.0-2_amd64.deb ...\n",
122 | "Unpacking libpixman-1-0:amd64 (0.34.0-2) ...\n",
123 | "Selecting previously unselected package libxcb-render0:amd64.\n",
124 | "Preparing to unpack .../11-libxcb-render0_1.13-1_amd64.deb ...\n",
125 | "Unpacking libxcb-render0:amd64 (1.13-1) ...\n",
126 | "Selecting previously unselected package libxcb-shm0:amd64.\n",
127 | "Preparing to unpack .../12-libxcb-shm0_1.13-1_amd64.deb ...\n",
128 | "Unpacking libxcb-shm0:amd64 (1.13-1) ...\n",
129 | "Selecting previously unselected package libcairo2:amd64.\n",
130 | "Preparing to unpack .../13-libcairo2_1.15.10-2_amd64.deb ...\n",
131 | "Unpacking libcairo2:amd64 (1.15.10-2) ...\n",
132 | "Selecting previously unselected package libltdl7:amd64.\n",
133 | "Preparing to unpack .../14-libltdl7_2.4.6-2_amd64.deb ...\n",
134 | "Unpacking libltdl7:amd64 (2.4.6-2) ...\n",
135 | "Selecting previously unselected package libthai-data.\n",
136 | "Preparing to unpack .../15-libthai-data_0.1.27-2_all.deb ...\n",
137 | "Unpacking libthai-data (0.1.27-2) ...\n",
138 | "Selecting previously unselected package libdatrie1:amd64.\n",
139 | "Preparing to unpack .../16-libdatrie1_0.2.10-7_amd64.deb ...\n",
140 | "Unpacking libdatrie1:amd64 (0.2.10-7) ...\n",
141 | "Selecting previously unselected package libthai0:amd64.\n",
142 | "Preparing to unpack .../17-libthai0_0.1.27-2_amd64.deb ...\n",
143 | "Unpacking libthai0:amd64 (0.1.27-2) ...\n",
144 | "Selecting previously unselected package libpango-1.0-0:amd64.\n",
145 | "Preparing to unpack .../18-libpango-1.0-0_1.40.14-1ubuntu0.1_amd64.deb ...\n",
146 | "Unpacking libpango-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n",
147 | "Selecting previously unselected package libpangoft2-1.0-0:amd64.\n",
148 | "Preparing to unpack .../19-libpangoft2-1.0-0_1.40.14-1ubuntu0.1_amd64.deb ...\n",
149 | "Unpacking libpangoft2-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n",
150 | "Selecting previously unselected package libpangocairo-1.0-0:amd64.\n",
151 | "Preparing to unpack .../20-libpangocairo-1.0-0_1.40.14-1ubuntu0.1_amd64.deb ...\n",
152 | "Unpacking libpangocairo-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n",
153 | "Selecting previously unselected package libpathplan4.\n",
154 | "Preparing to unpack .../21-libpathplan4_2.40.1-2_amd64.deb ...\n",
155 | "Unpacking libpathplan4 (2.40.1-2) ...\n",
156 | "Selecting previously unselected package libgvc6.\n",
157 | "Preparing to unpack .../22-libgvc6_2.40.1-2_amd64.deb ...\n",
158 | "Unpacking libgvc6 (2.40.1-2) ...\n",
159 | "Selecting previously unselected package libgvpr2.\n",
160 | "Preparing to unpack .../23-libgvpr2_2.40.1-2_amd64.deb ...\n",
161 | "Unpacking libgvpr2 (2.40.1-2) ...\n",
162 | "Selecting previously unselected package liblab-gamut1.\n",
163 | "Preparing to unpack .../24-liblab-gamut1_2.40.1-2_amd64.deb ...\n",
164 | "Unpacking liblab-gamut1 (2.40.1-2) ...\n",
165 | "Selecting previously unselected package libxt6:amd64.\n",
166 | "Preparing to unpack .../25-libxt6_1%3a1.1.5-1_amd64.deb ...\n",
167 | "Unpacking libxt6:amd64 (1:1.1.5-1) ...\n",
168 | "Selecting previously unselected package libxmu6:amd64.\n",
169 | "Preparing to unpack .../26-libxmu6_2%3a1.1.2-2_amd64.deb ...\n",
170 | "Unpacking libxmu6:amd64 (2:1.1.2-2) ...\n",
171 | "Selecting previously unselected package libxaw7:amd64.\n",
172 | "Preparing to unpack .../27-libxaw7_2%3a1.0.13-1_amd64.deb ...\n",
173 | "Unpacking libxaw7:amd64 (2:1.0.13-1) ...\n",
174 | "Selecting previously unselected package graphviz.\n",
175 | "Preparing to unpack .../28-graphviz_2.40.1-2_amd64.deb ...\n",
176 | "Unpacking graphviz (2.40.1-2) ...\n",
177 | "Selecting previously unselected package libgts-bin.\n",
178 | "Preparing to unpack .../29-libgts-bin_0.7.6+darcs121130-4_amd64.deb ...\n",
179 | "Unpacking libgts-bin (0.7.6+darcs121130-4) ...\n",
180 | "Setting up libgts-0.7-5:amd64 (0.7.6+darcs121130-4) ...\n",
181 | "Setting up libpathplan4 (2.40.1-2) ...\n",
182 | "Setting up liblab-gamut1 (2.40.1-2) ...\n",
183 | "Setting up libxcb-render0:amd64 (1.13-1) ...\n",
184 | "Setting up libjbig0:amd64 (2.1-3.1build1) ...\n",
185 | "Setting up libdatrie1:amd64 (0.2.10-7) ...\n",
186 | "Setting up libtiff5:amd64 (4.0.9-5) ...\n",
187 | "Setting up libpixman-1-0:amd64 (0.34.0-2) ...\n",
188 | "Processing triggers for libc-bin (2.27-3ubuntu1) ...\n",
189 | "Setting up libltdl7:amd64 (2.4.6-2) ...\n",
190 | "Setting up libann0 (1.1.2+doc-6) ...\n",
191 | "Setting up libxcb-shm0:amd64 (1.13-1) ...\n",
192 | "Setting up libxpm4:amd64 (1:3.5.12-1) ...\n",
193 | "Setting up libxt6:amd64 (1:1.1.5-1) ...\n",
194 | "Setting up libgts-bin (0.7.6+darcs121130-4) ...\n",
195 | "Setting up libthai-data (0.1.27-2) ...\n",
196 | "Setting up libcdt5 (2.40.1-2) ...\n",
197 | "Setting up fontconfig (2.12.6-0ubuntu2) ...\n",
198 | "Regenerating fonts cache... done.\n",
199 | "Setting up libcgraph6 (2.40.1-2) ...\n",
200 | "Setting up libwebp6:amd64 (0.6.1-2) ...\n",
201 | "Setting up libcairo2:amd64 (1.15.10-2) ...\n",
202 | "Setting up libgvpr2 (2.40.1-2) ...\n",
203 | "Setting up libgd3:amd64 (2.2.5-4ubuntu0.2) ...\n",
204 | "Setting up libthai0:amd64 (0.1.27-2) ...\n",
205 | "Setting up libxmu6:amd64 (2:1.1.2-2) ...\n",
206 | "Setting up libpango-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n",
207 | "Setting up libxaw7:amd64 (2:1.0.13-1) ...\n",
208 | "Setting up libpangoft2-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n",
209 | "Setting up libpangocairo-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n",
210 | "Setting up libgvc6 (2.40.1-2) ...\n",
211 | "Setting up graphviz (2.40.1-2) ...\n",
212 | "Processing triggers for libc-bin (2.27-3ubuntu1) ...\n"
213 | ],
214 | "name": "stdout"
215 | }
216 | ]
217 | },
218 | {
219 | "metadata": {
220 | "id": "y7ik04NlDZMA",
221 | "colab_type": "code",
222 | "colab": {
223 | "base_uri": "https://localhost:8080/",
224 | "height": 198
225 | },
226 | "outputId": "73c13868-60e4-4a25-9338-02f324cabb18"
227 | },
228 | "cell_type": "code",
229 | "source": [
230 | "import pandas as pd\n",
231 | "\n",
232 | "uri = \"https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv\"\n",
233 | "dados = pd.read_csv(uri)\n",
234 | "dados.head()"
235 | ],
236 | "execution_count": 1,
237 | "outputs": [
238 | {
239 | "output_type": "execute_result",
240 | "data": {
241 | "text/html": [
242 | "\n",
243 | "\n",
256 | "
\n",
257 | " \n",
258 | " \n",
259 | " | \n",
260 | " Unnamed: 0 | \n",
261 | " mileage_per_year | \n",
262 | " model_year | \n",
263 | " price | \n",
264 | " sold | \n",
265 | "
\n",
266 | " \n",
267 | " \n",
268 | " \n",
269 | " 0 | \n",
270 | " 0 | \n",
271 | " 21801 | \n",
272 | " 2000 | \n",
273 | " 30941.02 | \n",
274 | " yes | \n",
275 | "
\n",
276 | " \n",
277 | " 1 | \n",
278 | " 1 | \n",
279 | " 7843 | \n",
280 | " 1998 | \n",
281 | " 40557.96 | \n",
282 | " yes | \n",
283 | "
\n",
284 | " \n",
285 | " 2 | \n",
286 | " 2 | \n",
287 | " 7109 | \n",
288 | " 2006 | \n",
289 | " 89627.50 | \n",
290 | " no | \n",
291 | "
\n",
292 | " \n",
293 | " 3 | \n",
294 | " 3 | \n",
295 | " 26823 | \n",
296 | " 2015 | \n",
297 | " 95276.14 | \n",
298 | " no | \n",
299 | "
\n",
300 | " \n",
301 | " 4 | \n",
302 | " 4 | \n",
303 | " 7935 | \n",
304 | " 2014 | \n",
305 | " 117384.68 | \n",
306 | " yes | \n",
307 | "
\n",
308 | " \n",
309 | "
\n",
310 | "
"
311 | ],
312 | "text/plain": [
313 | " Unnamed: 0 mileage_per_year model_year price sold\n",
314 | "0 0 21801 2000 30941.02 yes\n",
315 | "1 1 7843 1998 40557.96 yes\n",
316 | "2 2 7109 2006 89627.50 no\n",
317 | "3 3 26823 2015 95276.14 no\n",
318 | "4 4 7935 2014 117384.68 yes"
319 | ]
320 | },
321 | "metadata": {
322 | "tags": []
323 | },
324 | "execution_count": 1
325 | }
326 | ]
327 | },
328 | {
329 | "metadata": {
330 | "id": "FqFrPmgJDhvM",
331 | "colab_type": "code",
332 | "colab": {
333 | "base_uri": "https://localhost:8080/",
334 | "height": 198
335 | },
336 | "outputId": "46b16944-2a4a-4b53-b970-5a96a9b7d867"
337 | },
338 | "cell_type": "code",
339 | "source": [
340 | "a_renomear = {\n",
341 | " 'mileage_per_year' : 'milhas_por_ano',\n",
342 | " 'model_year' : 'ano_do_modelo',\n",
343 | " 'price' : 'preco',\n",
344 | " 'sold' : 'vendido'\n",
345 | "}\n",
346 | "dados = dados.rename(columns=a_renomear)\n",
347 | "dados.head()"
348 | ],
349 | "execution_count": 2,
350 | "outputs": [
351 | {
352 | "output_type": "execute_result",
353 | "data": {
354 | "text/html": [
355 | "\n",
356 | "\n",
369 | "
\n",
370 | " \n",
371 | " \n",
372 | " | \n",
373 | " Unnamed: 0 | \n",
374 | " milhas_por_ano | \n",
375 | " ano_do_modelo | \n",
376 | " preco | \n",
377 | " vendido | \n",
378 | "
\n",
379 | " \n",
380 | " \n",
381 | " \n",
382 | " 0 | \n",
383 | " 0 | \n",
384 | " 21801 | \n",
385 | " 2000 | \n",
386 | " 30941.02 | \n",
387 | " yes | \n",
388 | "
\n",
389 | " \n",
390 | " 1 | \n",
391 | " 1 | \n",
392 | " 7843 | \n",
393 | " 1998 | \n",
394 | " 40557.96 | \n",
395 | " yes | \n",
396 | "
\n",
397 | " \n",
398 | " 2 | \n",
399 | " 2 | \n",
400 | " 7109 | \n",
401 | " 2006 | \n",
402 | " 89627.50 | \n",
403 | " no | \n",
404 | "
\n",
405 | " \n",
406 | " 3 | \n",
407 | " 3 | \n",
408 | " 26823 | \n",
409 | " 2015 | \n",
410 | " 95276.14 | \n",
411 | " no | \n",
412 | "
\n",
413 | " \n",
414 | " 4 | \n",
415 | " 4 | \n",
416 | " 7935 | \n",
417 | " 2014 | \n",
418 | " 117384.68 | \n",
419 | " yes | \n",
420 | "
\n",
421 | " \n",
422 | "
\n",
423 | "
"
424 | ],
425 | "text/plain": [
426 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido\n",
427 | "0 0 21801 2000 30941.02 yes\n",
428 | "1 1 7843 1998 40557.96 yes\n",
429 | "2 2 7109 2006 89627.50 no\n",
430 | "3 3 26823 2015 95276.14 no\n",
431 | "4 4 7935 2014 117384.68 yes"
432 | ]
433 | },
434 | "metadata": {
435 | "tags": []
436 | },
437 | "execution_count": 2
438 | }
439 | ]
440 | },
441 | {
442 | "metadata": {
443 | "id": "J31fUGbVEKpW",
444 | "colab_type": "code",
445 | "colab": {
446 | "base_uri": "https://localhost:8080/",
447 | "height": 198
448 | },
449 | "outputId": "ad0a651f-320a-4f5e-8e2c-706bf6937fbb"
450 | },
451 | "cell_type": "code",
452 | "source": [
453 | "a_trocar = {\n",
454 | " 'no' : 0,\n",
455 | " 'yes' : 1\n",
456 | "}\n",
457 | "dados.vendido = dados.vendido.map(a_trocar)\n",
458 | "dados.head()"
459 | ],
460 | "execution_count": 3,
461 | "outputs": [
462 | {
463 | "output_type": "execute_result",
464 | "data": {
465 | "text/html": [
466 | "\n",
467 | "\n",
480 | "
\n",
481 | " \n",
482 | " \n",
483 | " | \n",
484 | " Unnamed: 0 | \n",
485 | " milhas_por_ano | \n",
486 | " ano_do_modelo | \n",
487 | " preco | \n",
488 | " vendido | \n",
489 | "
\n",
490 | " \n",
491 | " \n",
492 | " \n",
493 | " 0 | \n",
494 | " 0 | \n",
495 | " 21801 | \n",
496 | " 2000 | \n",
497 | " 30941.02 | \n",
498 | " 1 | \n",
499 | "
\n",
500 | " \n",
501 | " 1 | \n",
502 | " 1 | \n",
503 | " 7843 | \n",
504 | " 1998 | \n",
505 | " 40557.96 | \n",
506 | " 1 | \n",
507 | "
\n",
508 | " \n",
509 | " 2 | \n",
510 | " 2 | \n",
511 | " 7109 | \n",
512 | " 2006 | \n",
513 | " 89627.50 | \n",
514 | " 0 | \n",
515 | "
\n",
516 | " \n",
517 | " 3 | \n",
518 | " 3 | \n",
519 | " 26823 | \n",
520 | " 2015 | \n",
521 | " 95276.14 | \n",
522 | " 0 | \n",
523 | "
\n",
524 | " \n",
525 | " 4 | \n",
526 | " 4 | \n",
527 | " 7935 | \n",
528 | " 2014 | \n",
529 | " 117384.68 | \n",
530 | " 1 | \n",
531 | "
\n",
532 | " \n",
533 | "
\n",
534 | "
"
535 | ],
536 | "text/plain": [
537 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido\n",
538 | "0 0 21801 2000 30941.02 1\n",
539 | "1 1 7843 1998 40557.96 1\n",
540 | "2 2 7109 2006 89627.50 0\n",
541 | "3 3 26823 2015 95276.14 0\n",
542 | "4 4 7935 2014 117384.68 1"
543 | ]
544 | },
545 | "metadata": {
546 | "tags": []
547 | },
548 | "execution_count": 3
549 | }
550 | ]
551 | },
552 | {
553 | "metadata": {
554 | "id": "tZFog8O9EXYD",
555 | "colab_type": "code",
556 | "colab": {
557 | "base_uri": "https://localhost:8080/",
558 | "height": 198
559 | },
560 | "outputId": "5792a2f8-f777-4237-e813-697f60309951"
561 | },
562 | "cell_type": "code",
563 | "source": [
564 | "from datetime import datetime\n",
565 | "\n",
566 | "ano_atual = datetime.today().year\n",
567 | "dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo\n",
568 | "dados.head()"
569 | ],
570 | "execution_count": 4,
571 | "outputs": [
572 | {
573 | "output_type": "execute_result",
574 | "data": {
575 | "text/html": [
576 | "\n",
577 | "\n",
590 | "
\n",
591 | " \n",
592 | " \n",
593 | " | \n",
594 | " Unnamed: 0 | \n",
595 | " milhas_por_ano | \n",
596 | " ano_do_modelo | \n",
597 | " preco | \n",
598 | " vendido | \n",
599 | " idade_do_modelo | \n",
600 | "
\n",
601 | " \n",
602 | " \n",
603 | " \n",
604 | " 0 | \n",
605 | " 0 | \n",
606 | " 21801 | \n",
607 | " 2000 | \n",
608 | " 30941.02 | \n",
609 | " 1 | \n",
610 | " 18 | \n",
611 | "
\n",
612 | " \n",
613 | " 1 | \n",
614 | " 1 | \n",
615 | " 7843 | \n",
616 | " 1998 | \n",
617 | " 40557.96 | \n",
618 | " 1 | \n",
619 | " 20 | \n",
620 | "
\n",
621 | " \n",
622 | " 2 | \n",
623 | " 2 | \n",
624 | " 7109 | \n",
625 | " 2006 | \n",
626 | " 89627.50 | \n",
627 | " 0 | \n",
628 | " 12 | \n",
629 | "
\n",
630 | " \n",
631 | " 3 | \n",
632 | " 3 | \n",
633 | " 26823 | \n",
634 | " 2015 | \n",
635 | " 95276.14 | \n",
636 | " 0 | \n",
637 | " 3 | \n",
638 | "
\n",
639 | " \n",
640 | " 4 | \n",
641 | " 4 | \n",
642 | " 7935 | \n",
643 | " 2014 | \n",
644 | " 117384.68 | \n",
645 | " 1 | \n",
646 | " 4 | \n",
647 | "
\n",
648 | " \n",
649 | "
\n",
650 | "
"
651 | ],
652 | "text/plain": [
653 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido \\\n",
654 | "0 0 21801 2000 30941.02 1 \n",
655 | "1 1 7843 1998 40557.96 1 \n",
656 | "2 2 7109 2006 89627.50 0 \n",
657 | "3 3 26823 2015 95276.14 0 \n",
658 | "4 4 7935 2014 117384.68 1 \n",
659 | "\n",
660 | " idade_do_modelo \n",
661 | "0 18 \n",
662 | "1 20 \n",
663 | "2 12 \n",
664 | "3 3 \n",
665 | "4 4 "
666 | ]
667 | },
668 | "metadata": {
669 | "tags": []
670 | },
671 | "execution_count": 4
672 | }
673 | ]
674 | },
675 | {
676 | "metadata": {
677 | "id": "3wWWgxhcFbR9",
678 | "colab_type": "code",
679 | "colab": {
680 | "base_uri": "https://localhost:8080/",
681 | "height": 198
682 | },
683 | "outputId": "b4c147dc-c2e5-4bfd-e78f-5a34f72770d6"
684 | },
685 | "cell_type": "code",
686 | "source": [
687 | "dados['km_por_ano'] = dados.milhas_por_ano * 1.60934\n",
688 | "dados.head()"
689 | ],
690 | "execution_count": 5,
691 | "outputs": [
692 | {
693 | "output_type": "execute_result",
694 | "data": {
695 | "text/html": [
696 | "\n",
697 | "\n",
710 | "
\n",
711 | " \n",
712 | " \n",
713 | " | \n",
714 | " Unnamed: 0 | \n",
715 | " milhas_por_ano | \n",
716 | " ano_do_modelo | \n",
717 | " preco | \n",
718 | " vendido | \n",
719 | " idade_do_modelo | \n",
720 | " km_por_ano | \n",
721 | "
\n",
722 | " \n",
723 | " \n",
724 | " \n",
725 | " 0 | \n",
726 | " 0 | \n",
727 | " 21801 | \n",
728 | " 2000 | \n",
729 | " 30941.02 | \n",
730 | " 1 | \n",
731 | " 18 | \n",
732 | " 35085.22134 | \n",
733 | "
\n",
734 | " \n",
735 | " 1 | \n",
736 | " 1 | \n",
737 | " 7843 | \n",
738 | " 1998 | \n",
739 | " 40557.96 | \n",
740 | " 1 | \n",
741 | " 20 | \n",
742 | " 12622.05362 | \n",
743 | "
\n",
744 | " \n",
745 | " 2 | \n",
746 | " 2 | \n",
747 | " 7109 | \n",
748 | " 2006 | \n",
749 | " 89627.50 | \n",
750 | " 0 | \n",
751 | " 12 | \n",
752 | " 11440.79806 | \n",
753 | "
\n",
754 | " \n",
755 | " 3 | \n",
756 | " 3 | \n",
757 | " 26823 | \n",
758 | " 2015 | \n",
759 | " 95276.14 | \n",
760 | " 0 | \n",
761 | " 3 | \n",
762 | " 43167.32682 | \n",
763 | "
\n",
764 | " \n",
765 | " 4 | \n",
766 | " 4 | \n",
767 | " 7935 | \n",
768 | " 2014 | \n",
769 | " 117384.68 | \n",
770 | " 1 | \n",
771 | " 4 | \n",
772 | " 12770.11290 | \n",
773 | "
\n",
774 | " \n",
775 | "
\n",
776 | "
"
777 | ],
778 | "text/plain": [
779 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido \\\n",
780 | "0 0 21801 2000 30941.02 1 \n",
781 | "1 1 7843 1998 40557.96 1 \n",
782 | "2 2 7109 2006 89627.50 0 \n",
783 | "3 3 26823 2015 95276.14 0 \n",
784 | "4 4 7935 2014 117384.68 1 \n",
785 | "\n",
786 | " idade_do_modelo km_por_ano \n",
787 | "0 18 35085.22134 \n",
788 | "1 20 12622.05362 \n",
789 | "2 12 11440.79806 \n",
790 | "3 3 43167.32682 \n",
791 | "4 4 12770.11290 "
792 | ]
793 | },
794 | "metadata": {
795 | "tags": []
796 | },
797 | "execution_count": 5
798 | }
799 | ]
800 | },
801 | {
802 | "metadata": {
803 | "id": "MfjQNKlyFo2S",
804 | "colab_type": "code",
805 | "colab": {
806 | "base_uri": "https://localhost:8080/",
807 | "height": 198
808 | },
809 | "outputId": "7588f6a5-ff37-4996-91a5-d9ee4b5543d7"
810 | },
811 | "cell_type": "code",
812 | "source": [
813 | "dados = dados.drop(columns = [\"Unnamed: 0\", \"milhas_por_ano\",\"ano_do_modelo\"], axis=1)\n",
814 | "dados.head()"
815 | ],
816 | "execution_count": 6,
817 | "outputs": [
818 | {
819 | "output_type": "execute_result",
820 | "data": {
821 | "text/html": [
822 | "\n",
823 | "\n",
836 | "
\n",
837 | " \n",
838 | " \n",
839 | " | \n",
840 | " preco | \n",
841 | " vendido | \n",
842 | " idade_do_modelo | \n",
843 | " km_por_ano | \n",
844 | "
\n",
845 | " \n",
846 | " \n",
847 | " \n",
848 | " 0 | \n",
849 | " 30941.02 | \n",
850 | " 1 | \n",
851 | " 18 | \n",
852 | " 35085.22134 | \n",
853 | "
\n",
854 | " \n",
855 | " 1 | \n",
856 | " 40557.96 | \n",
857 | " 1 | \n",
858 | " 20 | \n",
859 | " 12622.05362 | \n",
860 | "
\n",
861 | " \n",
862 | " 2 | \n",
863 | " 89627.50 | \n",
864 | " 0 | \n",
865 | " 12 | \n",
866 | " 11440.79806 | \n",
867 | "
\n",
868 | " \n",
869 | " 3 | \n",
870 | " 95276.14 | \n",
871 | " 0 | \n",
872 | " 3 | \n",
873 | " 43167.32682 | \n",
874 | "
\n",
875 | " \n",
876 | " 4 | \n",
877 | " 117384.68 | \n",
878 | " 1 | \n",
879 | " 4 | \n",
880 | " 12770.11290 | \n",
881 | "
\n",
882 | " \n",
883 | "
\n",
884 | "
"
885 | ],
886 | "text/plain": [
887 | " preco vendido idade_do_modelo km_por_ano\n",
888 | "0 30941.02 1 18 35085.22134\n",
889 | "1 40557.96 1 20 12622.05362\n",
890 | "2 89627.50 0 12 11440.79806\n",
891 | "3 95276.14 0 3 43167.32682\n",
892 | "4 117384.68 1 4 12770.11290"
893 | ]
894 | },
895 | "metadata": {
896 | "tags": []
897 | },
898 | "execution_count": 6
899 | }
900 | ]
901 | },
902 | {
903 | "metadata": {
904 | "id": "E3xebM4FF0Tc",
905 | "colab_type": "code",
906 | "colab": {
907 | "base_uri": "https://localhost:8080/",
908 | "height": 52
909 | },
910 | "outputId": "7d48ae66-beb0-4884-cbfa-c81c9ce95eb1"
911 | },
912 | "cell_type": "code",
913 | "source": [
914 | "import numpy as np\n",
915 | "from sklearn.model_selection import train_test_split\n",
916 | "from sklearn.svm import LinearSVC\n",
917 | "from sklearn.metrics import accuracy_score\n",
918 | "\n",
919 | "x = dados[[\"preco\", \"idade_do_modelo\",\"km_por_ano\"]]\n",
920 | "y = dados[\"vendido\"]\n",
921 | "\n",
922 | "SEED = 5\n",
923 | "np.random.seed(SEED)\n",
924 | "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n",
925 | " stratify = y)\n",
926 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
927 | "\n",
928 | "modelo = LinearSVC()\n",
929 | "modelo.fit(treino_x, treino_y)\n",
930 | "previsoes = modelo.predict(teste_x)\n",
931 | "\n",
932 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
933 | "print(\"A acurácia foi %.2f%%\" % acuracia)"
934 | ],
935 | "execution_count": 17,
936 | "outputs": [
937 | {
938 | "output_type": "stream",
939 | "text": [
940 | "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n",
941 | "A acurácia foi 57.88%\n"
942 | ],
943 | "name": "stdout"
944 | }
945 | ]
946 | },
947 | {
948 | "metadata": {
949 | "id": "G2ZFWoPkGONL",
950 | "colab_type": "code",
951 | "colab": {
952 | "base_uri": "https://localhost:8080/",
953 | "height": 35
954 | },
955 | "outputId": "f86fecd4-0528-420d-face-b3ab72af6a23"
956 | },
957 | "cell_type": "code",
958 | "source": [
959 | "from sklearn.dummy import DummyClassifier\n",
960 | "\n",
961 | "dummy_stratified = DummyClassifier()\n",
962 | "dummy_stratified.fit(treino_x, treino_y)\n",
963 | "acuracia = dummy_stratified.score(teste_x, teste_y) * 100\n",
964 | "\n",
965 | "print(\"A acurácia do dummy stratified foi %.2f%%\" % acuracia)"
966 | ],
967 | "execution_count": 18,
968 | "outputs": [
969 | {
970 | "output_type": "stream",
971 | "text": [
972 | "A acurácia do dummy stratified foi 52.44%\n"
973 | ],
974 | "name": "stdout"
975 | }
976 | ]
977 | },
978 | {
979 | "metadata": {
980 | "id": "469ZKbHJIHL0",
981 | "colab_type": "code",
982 | "colab": {
983 | "base_uri": "https://localhost:8080/",
984 | "height": 35
985 | },
986 | "outputId": "7348330f-4752-4e1e-8f27-2420acf2ad14"
987 | },
988 | "cell_type": "code",
989 | "source": [
990 | "from sklearn.dummy import DummyClassifier\n",
991 | "\n",
992 | "dummy_mostfrequent = DummyClassifier()\n",
993 | "dummy_mostfrequent.fit(treino_x, treino_y)\n",
994 | "acuracia = dummy_mostfrequent.score(teste_x, teste_y) * 100\n",
995 | "\n",
996 | "print(\"A acurácia do dummy mostfrequent foi %.2f%%\" % acuracia)"
997 | ],
998 | "execution_count": 19,
999 | "outputs": [
1000 | {
1001 | "output_type": "stream",
1002 | "text": [
1003 | "A acurácia do dummy mostfrequent foi 50.68%\n"
1004 | ],
1005 | "name": "stdout"
1006 | }
1007 | ]
1008 | },
1009 | {
1010 | "metadata": {
1011 | "id": "S_5AfFerIsH_",
1012 | "colab_type": "code",
1013 | "colab": {
1014 | "base_uri": "https://localhost:8080/",
1015 | "height": 52
1016 | },
1017 | "outputId": "569a686d-9b86-459f-d30d-5ce738bb6897"
1018 | },
1019 | "cell_type": "code",
1020 | "source": [
1021 | "from sklearn.preprocessing import StandardScaler\n",
1022 | "from sklearn.model_selection import train_test_split\n",
1023 | "from sklearn.svm import SVC\n",
1024 | "from sklearn.metrics import accuracy_score\n",
1025 | "\n",
1026 | "SEED = 5\n",
1027 | "np.random.seed(SEED)\n",
1028 | "raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n",
1029 | " stratify = y)\n",
1030 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
1031 | "\n",
1032 | "scaler = StandardScaler()\n",
1033 | "scaler.fit(raw_treino_x)\n",
1034 | "treino_x = scaler.transform(raw_treino_x)\n",
1035 | "teste_x = scaler.transform(raw_teste_x)\n",
1036 | "\n",
1037 | "modelo = SVC()\n",
1038 | "modelo.fit(treino_x, treino_y)\n",
1039 | "previsoes = modelo.predict(teste_x)\n",
1040 | "\n",
1041 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
1042 | "print(\"A acurácia foi %.2f%%\" % acuracia)\n"
1043 | ],
1044 | "execution_count": 20,
1045 | "outputs": [
1046 | {
1047 | "output_type": "stream",
1048 | "text": [
1049 | "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n",
1050 | "A acurácia foi 77.48%\n"
1051 | ],
1052 | "name": "stdout"
1053 | }
1054 | ]
1055 | },
1056 | {
1057 | "metadata": {
1058 | "id": "-1YsD-whJuGR",
1059 | "colab_type": "code",
1060 | "colab": {
1061 | "base_uri": "https://localhost:8080/",
1062 | "height": 52
1063 | },
1064 | "outputId": "5e0c7f00-98ce-4b32-bbc6-908dde4cc2f8"
1065 | },
1066 | "cell_type": "code",
1067 | "source": [
1068 | "from sklearn.preprocessing import StandardScaler\n",
1069 | "from sklearn.model_selection import train_test_split\n",
1070 | "from sklearn.tree import DecisionTreeClassifier\n",
1071 | "from sklearn.metrics import accuracy_score\n",
1072 | "\n",
1073 | "SEED = 5\n",
1074 | "np.random.seed(SEED)\n",
1075 | "raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n",
1076 | " stratify = y)\n",
1077 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n",
1078 | "\n",
1079 | "modelo = DecisionTreeClassifier(max_depth=3)\n",
1080 | "modelo.fit(raw_treino_x, treino_y)\n",
1081 | "previsoes = modelo.predict(raw_teste_x)\n",
1082 | "\n",
1083 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n",
1084 | "print(\"A acurácia foi %.2f%%\" % acuracia)\n"
1085 | ],
1086 | "execution_count": 37,
1087 | "outputs": [
1088 | {
1089 | "output_type": "stream",
1090 | "text": [
1091 | "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n",
1092 | "A acurácia foi 79.28%\n"
1093 | ],
1094 | "name": "stdout"
1095 | }
1096 | ]
1097 | },
1098 | {
1099 | "metadata": {
1100 | "id": "-8O83jbOLctO",
1101 | "colab_type": "code",
1102 | "colab": {
1103 | "base_uri": "https://localhost:8080/",
1104 | "height": 619
1105 | },
1106 | "outputId": "8ddc6972-e0a7-40d3-adba-0877358ab355"
1107 | },
1108 | "cell_type": "code",
1109 | "source": [
1110 | "from sklearn.tree import export_graphviz\n",
1111 | "import graphviz\n",
1112 | "\n",
1113 | "features = x.columns\n",
1114 | "dot_data = export_graphviz(modelo, out_file=None,\n",
1115 | " filled = True, rounded = True,\n",
1116 | " feature_names = features,\n",
1117 | " class_names = [\"não\", \"sim\"])\n",
1118 | "grafico = graphviz.Source(dot_data)\n",
1119 | "grafico"
1120 | ],
1121 | "execution_count": 38,
1122 | "outputs": [
1123 | {
1124 | "output_type": "execute_result",
1125 | "data": {
1126 | "text/plain": [
1127 | ""
1128 | ],
1129 | "image/svg+xml": "\n\n\n\n\n"
1130 | },
1131 | "metadata": {
1132 | "tags": []
1133 | },
1134 | "execution_count": 38
1135 | }
1136 | ]
1137 | },
1138 | {
1139 | "metadata": {
1140 | "id": "TLIr9EPALvM4",
1141 | "colab_type": "code",
1142 | "colab": {}
1143 | },
1144 | "cell_type": "code",
1145 | "source": [
1146 | ""
1147 | ],
1148 | "execution_count": 0,
1149 | "outputs": []
1150 | }
1151 | ]
1152 | }
--------------------------------------------------------------------------------
/aula5.4/introdução_a_machine_learning_4.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Introdução a Machine Learning - 4.ipynb
3 |
4 | Automatically generated by Colaboratory.
5 |
6 | Original file is located at
7 | https://colab.research.google.com/drive/1RpYAAROMa4C86iZscVUzaWIeVYSJapyE
8 | """
9 |
10 | !pip install graphviz==0.10
11 | !apt-get install graphviz
12 |
13 | import pandas as pd
14 |
15 | uri = "https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv"
16 | dados = pd.read_csv(uri)
17 | dados.head()
18 |
19 | a_renomear = {
20 | 'mileage_per_year' : 'milhas_por_ano',
21 | 'model_year' : 'ano_do_modelo',
22 | 'price' : 'preco',
23 | 'sold' : 'vendido'
24 | }
25 | dados = dados.rename(columns=a_renomear)
26 | dados.head()
27 |
28 | a_trocar = {
29 | 'no' : 0,
30 | 'yes' : 1
31 | }
32 | dados.vendido = dados.vendido.map(a_trocar)
33 | dados.head()
34 |
35 | from datetime import datetime
36 |
37 | ano_atual = datetime.today().year
38 | dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo
39 | dados.head()
40 |
41 | dados['km_por_ano'] = dados.milhas_por_ano * 1.60934
42 | dados.head()
43 |
44 | dados = dados.drop(columns = ["Unnamed: 0", "milhas_por_ano","ano_do_modelo"], axis=1)
45 | dados.head()
46 |
47 | import numpy as np
48 | from sklearn.model_selection import train_test_split
49 | from sklearn.svm import LinearSVC
50 | from sklearn.metrics import accuracy_score
51 |
52 | x = dados[["preco", "idade_do_modelo","km_por_ano"]]
53 | y = dados["vendido"]
54 |
55 | SEED = 5
56 | np.random.seed(SEED)
57 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
58 | stratify = y)
59 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
60 |
61 | modelo = LinearSVC()
62 | modelo.fit(treino_x, treino_y)
63 | previsoes = modelo.predict(teste_x)
64 |
65 | acuracia = accuracy_score(teste_y, previsoes) * 100
66 | print("A acurácia foi %.2f%%" % acuracia)
67 |
68 | from sklearn.dummy import DummyClassifier
69 |
70 | dummy_stratified = DummyClassifier()
71 | dummy_stratified.fit(treino_x, treino_y)
72 | acuracia = dummy_stratified.score(teste_x, teste_y) * 100
73 |
74 | print("A acurácia do dummy stratified foi %.2f%%" % acuracia)
75 |
76 | from sklearn.dummy import DummyClassifier
77 |
78 | dummy_mostfrequent = DummyClassifier()
79 | dummy_mostfrequent.fit(treino_x, treino_y)
80 | acuracia = dummy_mostfrequent.score(teste_x, teste_y) * 100
81 |
82 | print("A acurácia do dummy mostfrequent foi %.2f%%" % acuracia)
83 |
84 | from sklearn.preprocessing import StandardScaler
85 | from sklearn.model_selection import train_test_split
86 | from sklearn.svm import SVC
87 | from sklearn.metrics import accuracy_score
88 |
89 | SEED = 5
90 | np.random.seed(SEED)
91 | raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
92 | stratify = y)
93 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
94 |
95 | scaler = StandardScaler()
96 | scaler.fit(raw_treino_x)
97 | treino_x = scaler.transform(raw_treino_x)
98 | teste_x = scaler.transform(raw_teste_x)
99 |
100 | modelo = SVC()
101 | modelo.fit(treino_x, treino_y)
102 | previsoes = modelo.predict(teste_x)
103 |
104 | acuracia = accuracy_score(teste_y, previsoes) * 100
105 | print("A acurácia foi %.2f%%" % acuracia)
106 |
107 | from sklearn.preprocessing import StandardScaler
108 | from sklearn.model_selection import train_test_split
109 | from sklearn.tree import DecisionTreeClassifier
110 | from sklearn.metrics import accuracy_score
111 |
112 | SEED = 5
113 | np.random.seed(SEED)
114 | raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,
115 | stratify = y)
116 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x)))
117 |
118 | modelo = DecisionTreeClassifier(max_depth=3)
119 | modelo.fit(raw_treino_x, treino_y)
120 | previsoes = modelo.predict(raw_teste_x)
121 |
122 | acuracia = accuracy_score(teste_y, previsoes) * 100
123 | print("A acurácia foi %.2f%%" % acuracia)
124 |
125 | from sklearn.tree import export_graphviz
126 | import graphviz
127 |
128 | features = x.columns
129 | dot_data = export_graphviz(modelo, out_file=None,
130 | filled = True, rounded = True,
131 | feature_names = features,
132 | class_names = ["não", "sim"])
133 | grafico = graphviz.Source(dot_data)
134 | grafico
135 |
136 |
--------------------------------------------------------------------------------