├── aula1.3 ├── Introdução_a_Machine_Learning_e_Classificação_1.ipynb └── introdução_a_machine_learning_e_classificação_1.py ├── aula1.4 ├── Introdução_a_Machine_Learning_e_Classificação_1.ipynb └── introdução_a_machine_learning_e_classificação_1.py ├── aula2.1 ├── Introdução_a_Machine_Learning_Classificação_2.ipynb └── introdução_a_machine_learning_classificação_2.py ├── aula2.2 ├── Introdução_a_Machine_Learning_Classificação_2.ipynb └── introdução_a_machine_learning_classificação_2.py ├── aula3.1 ├── Introdução_a_Machine_Learning_3.ipynb └── introdução_a_machine_learning_3 (1).py ├── aula4.1 ├── Introdução_a_Machine_Learning_3.ipynb └── introdução_a_machine_learning_3.py ├── aula5.1 ├── Introdução_a_Machine_Learning_4.ipynb └── introdução_a_machine_learning_4.py ├── aula5.2 ├── Introdução_a_Machine_Learning_4.ipynb └── introdução_a_machine_learning_4.py └── aula5.4 ├── Introdução_a_Machine_Learning_4.ipynb └── introdução_a_machine_learning_4.py /aula1.3/Introdução_a_Machine_Learning_e_Classificação_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Introdução a Machine Learning e Classificação - 1.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "metadata": { 18 | "id": "iN35zFmNyYIc", 19 | "colab_type": "code", 20 | "colab": {} 21 | }, 22 | "cell_type": "code", 23 | "source": [ 24 | "# features (1 sim, 0 não)\n", 25 | "# pelo longo?\n", 26 | "# perna curta?\n", 27 | "# faz auau?\n", 28 | "porco1 = [0, 1, 0]\n", 29 | "porco2 = [0, 1, 1]\n", 30 | "porco3 = [1, 1, 0]\n", 31 | "\n", 32 | "cachorro1 = [0, 1, 1]\n", 33 | "cachorro2 = [1, 0, 1]\n", 34 | "cachorro3 = [1, 1, 1]\n", 35 | "\n", 36 | "# 1 => porco, 0 => cachorro\n", 37 | "dados = [porco1, porco2, porco3, cachorro1, cachorro2, cachorro3]\n", 38 | "classes = [1,1,1,0,0,0]" 39 | ], 40 | "execution_count": 0, 41 | "outputs": [] 42 | }, 43 | { 44 | "metadata": { 45 | "id": "tcWrSPHkzaby", 46 | "colab_type": "code", 47 | "colab": { 48 | "base_uri": "https://localhost:8080/", 49 | "height": 86 50 | }, 51 | "outputId": "17cd7227-3924-457c-b41a-2498a22c141e" 52 | }, 53 | "cell_type": "code", 54 | "source": [ 55 | "from sklearn.svm import LinearSVC\n", 56 | "\n", 57 | "model = LinearSVC()\n", 58 | "model.fit(dados, classes)" 59 | ], 60 | "execution_count": 7, 61 | "outputs": [ 62 | { 63 | "output_type": "execute_result", 64 | "data": { 65 | "text/plain": [ 66 | "LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n", 67 | " intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n", 68 | " multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n", 69 | " verbose=0)" 70 | ] 71 | }, 72 | "metadata": { 73 | "tags": [] 74 | }, 75 | "execution_count": 7 76 | } 77 | ] 78 | }, 79 | { 80 | "metadata": { 81 | "id": "YaUuFWOx0YZF", 82 | "colab_type": "code", 83 | "colab": { 84 | "base_uri": "https://localhost:8080/", 85 | "height": 34 86 | }, 87 | "outputId": "43e963d4-9b76-40a4-9896-e513dd23fd8f" 88 | }, 89 | "cell_type": "code", 90 | "source": [ 91 | "animal_misterioso = [1,1,1]\n", 92 | "model.predict([animal_misterioso])" 93 | ], 94 | "execution_count": 9, 95 | "outputs": [ 96 | { 97 | "output_type": "execute_result", 98 | "data": { 99 | "text/plain": [ 100 | "array([0])" 101 | ] 102 | }, 103 | "metadata": { 104 | "tags": [] 105 | }, 106 | "execution_count": 9 107 | } 108 | ] 109 | }, 110 | { 111 | "metadata": { 112 | "id": "yJEFM8mx0jtR", 113 | "colab_type": "code", 114 | "colab": {} 115 | }, 116 | "cell_type": "code", 117 | "source": [ 118 | "misterio1 = [1,1,1]\n", 119 | "misterio2 = [1,1,0]\n", 120 | "misterio3 = [0,1,1]\n", 121 | "\n", 122 | "testes = [misterio1, misterio2, misterio3]\n", 123 | "previsoes = model.predict(testes)" 124 | ], 125 | "execution_count": 0, 126 | "outputs": [] 127 | }, 128 | { 129 | "metadata": { 130 | "id": "3xOA4L4e03GO", 131 | "colab_type": "code", 132 | "colab": {} 133 | }, 134 | "cell_type": "code", 135 | "source": [ 136 | "testes_classes = [0, 1, 1]" 137 | ], 138 | "execution_count": 0, 139 | "outputs": [] 140 | }, 141 | { 142 | "metadata": { 143 | "id": "cUaaDQol1b_D", 144 | "colab_type": "code", 145 | "colab": { 146 | "base_uri": "https://localhost:8080/", 147 | "height": 34 148 | }, 149 | "outputId": "6b335fa6-9f1b-4e60-de6c-5dd417ff3b08" 150 | }, 151 | "cell_type": "code", 152 | "source": [ 153 | "corretos = (previsoes == testes_classes).sum()\n", 154 | "total = len(testes)\n", 155 | "taxa_de_acerto = corretos/total\n", 156 | "print(\"Taxa de acerto: \", taxa_de_acerto * 100)" 157 | ], 158 | "execution_count": 24, 159 | "outputs": [ 160 | { 161 | "output_type": "stream", 162 | "text": [ 163 | "Taxa de acerto: 66.66666666666666\n" 164 | ], 165 | "name": "stdout" 166 | } 167 | ] 168 | }, 169 | { 170 | "metadata": { 171 | "id": "T6zhFhQa1c28", 172 | "colab_type": "code", 173 | "colab": { 174 | "base_uri": "https://localhost:8080/", 175 | "height": 34 176 | }, 177 | "outputId": "3f8b5c7b-2d17-4233-c8de-e9cc9a3b7476" 178 | }, 179 | "cell_type": "code", 180 | "source": [ 181 | "from sklearn.metrics import accuracy_score\n", 182 | "\n", 183 | "taxa_de_acerto = accuracy_score(testes_classes, previsoes)\n", 184 | "print(\"Taxa de acerto\", taxa_de_acerto * 100)" 185 | ], 186 | "execution_count": 26, 187 | "outputs": [ 188 | { 189 | "output_type": "stream", 190 | "text": [ 191 | "Taxa de acerto 66.66666666666666\n" 192 | ], 193 | "name": "stdout" 194 | } 195 | ] 196 | }, 197 | { 198 | "metadata": { 199 | "id": "Z9wOaemi2fzE", 200 | "colab_type": "code", 201 | "colab": {} 202 | }, 203 | "cell_type": "code", 204 | "source": [ 205 | "" 206 | ], 207 | "execution_count": 0, 208 | "outputs": [] 209 | } 210 | ] 211 | } -------------------------------------------------------------------------------- /aula1.3/introdução_a_machine_learning_e_classificação_1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Introdução a Machine Learning e Classificação - 1.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1SNvuZmre0mDEJgTBBXBzvptPtth7q_IX 8 | """ 9 | 10 | # features (1 sim, 0 não) 11 | # pelo longo? 12 | # perna curta? 13 | # faz auau? 14 | porco1 = [0, 1, 0] 15 | porco2 = [0, 1, 1] 16 | porco3 = [1, 1, 0] 17 | 18 | cachorro1 = [0, 1, 1] 19 | cachorro2 = [1, 0, 1] 20 | cachorro3 = [1, 1, 1] 21 | 22 | # 1 => porco, 0 => cachorro 23 | dados = [porco1, porco2, porco3, cachorro1, cachorro2, cachorro3] 24 | classes = [1,1,1,0,0,0] 25 | 26 | from sklearn.svm import LinearSVC 27 | 28 | model = LinearSVC() 29 | model.fit(dados, classes) 30 | 31 | animal_misterioso = [1,1,1] 32 | model.predict([animal_misterioso]) 33 | 34 | misterio1 = [1,1,1] 35 | misterio2 = [1,1,0] 36 | misterio3 = [0,1,1] 37 | 38 | testes = [misterio1, misterio2, misterio3] 39 | previsoes = model.predict(testes) 40 | 41 | testes_classes = [0, 1, 1] 42 | 43 | corretos = (previsoes == testes_classes).sum() 44 | total = len(testes) 45 | taxa_de_acerto = corretos/total 46 | print("Taxa de acerto: ", taxa_de_acerto * 100) 47 | 48 | from sklearn.metrics import accuracy_score 49 | 50 | taxa_de_acerto = accuracy_score(testes_classes, previsoes) 51 | print("Taxa de acerto", taxa_de_acerto * 100) 52 | 53 | -------------------------------------------------------------------------------- /aula1.4/Introdução_a_Machine_Learning_e_Classificação_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Introdução a Machine Learning e Classificação - 1.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "metadata": { 19 | "id": "iN35zFmNyYIc", 20 | "colab_type": "code", 21 | "colab": {} 22 | }, 23 | "cell_type": "code", 24 | "source": [ 25 | "# features (1 sim, 0 não)\n", 26 | "# pelo longo?\n", 27 | "# perna curta?\n", 28 | "# faz auau?\n", 29 | "porco1 = [0, 1, 0]\n", 30 | "porco2 = [0, 1, 1]\n", 31 | "porco3 = [1, 1, 0]\n", 32 | "\n", 33 | "cachorro1 = [0, 1, 1]\n", 34 | "cachorro2 = [1, 0, 1]\n", 35 | "cachorro3 = [1, 1, 1]\n", 36 | "\n", 37 | "# 1 => porco, 0 => cachorro\n", 38 | "treino_x = [porco1, porco2, porco3, cachorro1, cachorro2, cachorro3]\n", 39 | "treino_y = [1,1,1,0,0,0] # labels / etiqueta" 40 | ], 41 | "execution_count": 0, 42 | "outputs": [] 43 | }, 44 | { 45 | "metadata": { 46 | "id": "tcWrSPHkzaby", 47 | "colab_type": "code", 48 | "colab": { 49 | "base_uri": "https://localhost:8080/", 50 | "height": 87 51 | }, 52 | "outputId": "0675eaa5-68bd-4df2-cafe-a3d94a9fcec9" 53 | }, 54 | "cell_type": "code", 55 | "source": [ 56 | "from sklearn.svm import LinearSVC\n", 57 | "\n", 58 | "model = LinearSVC()\n", 59 | "model.fit(treino_x, treino_y)" 60 | ], 61 | "execution_count": 2, 62 | "outputs": [ 63 | { 64 | "output_type": "execute_result", 65 | "data": { 66 | "text/plain": [ 67 | "LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n", 68 | " intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n", 69 | " multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n", 70 | " verbose=0)" 71 | ] 72 | }, 73 | "metadata": { 74 | "tags": [] 75 | }, 76 | "execution_count": 2 77 | } 78 | ] 79 | }, 80 | { 81 | "metadata": { 82 | "id": "YaUuFWOx0YZF", 83 | "colab_type": "code", 84 | "colab": { 85 | "base_uri": "https://localhost:8080/", 86 | "height": 35 87 | }, 88 | "outputId": "defc66aa-ce1b-460c-895c-e422ffe287c5" 89 | }, 90 | "cell_type": "code", 91 | "source": [ 92 | "animal_misterioso = [1,1,1]\n", 93 | "model.predict([animal_misterioso])" 94 | ], 95 | "execution_count": 3, 96 | "outputs": [ 97 | { 98 | "output_type": "execute_result", 99 | "data": { 100 | "text/plain": [ 101 | "array([0])" 102 | ] 103 | }, 104 | "metadata": { 105 | "tags": [] 106 | }, 107 | "execution_count": 3 108 | } 109 | ] 110 | }, 111 | { 112 | "metadata": { 113 | "id": "yJEFM8mx0jtR", 114 | "colab_type": "code", 115 | "colab": {} 116 | }, 117 | "cell_type": "code", 118 | "source": [ 119 | "misterio1 = [1,1,1]\n", 120 | "misterio2 = [1,1,0]\n", 121 | "misterio3 = [0,1,1]\n", 122 | "\n", 123 | "teste_x = [misterio1, misterio2, misterio3]\n", 124 | "teste_y = [0, 1, 1]" 125 | ], 126 | "execution_count": 0, 127 | "outputs": [] 128 | }, 129 | { 130 | "metadata": { 131 | "id": "3xOA4L4e03GO", 132 | "colab_type": "code", 133 | "colab": {} 134 | }, 135 | "cell_type": "code", 136 | "source": [ 137 | "previsoes = model.predict(teste_x)" 138 | ], 139 | "execution_count": 0, 140 | "outputs": [] 141 | }, 142 | { 143 | "metadata": { 144 | "id": "cUaaDQol1b_D", 145 | "colab_type": "code", 146 | "colab": { 147 | "base_uri": "https://localhost:8080/", 148 | "height": 35 149 | }, 150 | "outputId": "4cd12076-c2e0-433c-b023-b0c3936b5b36" 151 | }, 152 | "cell_type": "code", 153 | "source": [ 154 | "corretos = (previsoes == teste_y).sum()\n", 155 | "total = len(teste_x)\n", 156 | "taxa_de_acerto = corretos/total\n", 157 | "print(\"Taxa de acerto %.2f\" % (taxa_de_acerto * 100))" 158 | ], 159 | "execution_count": 8, 160 | "outputs": [ 161 | { 162 | "output_type": "stream", 163 | "text": [ 164 | "Taxa de acerto 66.67\n" 165 | ], 166 | "name": "stdout" 167 | } 168 | ] 169 | }, 170 | { 171 | "metadata": { 172 | "id": "T6zhFhQa1c28", 173 | "colab_type": "code", 174 | "colab": { 175 | "base_uri": "https://localhost:8080/", 176 | "height": 34 177 | }, 178 | "outputId": "29ed71f9-5a9d-4de3-84f5-825121a92a0b" 179 | }, 180 | "cell_type": "code", 181 | "source": [ 182 | "from sklearn.metrics import accuracy_score\n", 183 | "\n", 184 | "taxa_de_acerto = accuracy_score(teste_y, previsoes)\n", 185 | "print(\"Taxa de acerto %.2f\" % (taxa_de_acerto * 100))" 186 | ], 187 | "execution_count": 9, 188 | "outputs": [ 189 | { 190 | "output_type": "stream", 191 | "text": [ 192 | "Taxa de acerto 66.67\n" 193 | ], 194 | "name": "stdout" 195 | } 196 | ] 197 | }, 198 | { 199 | "metadata": { 200 | "id": "Z9wOaemi2fzE", 201 | "colab_type": "code", 202 | "colab": {} 203 | }, 204 | "cell_type": "code", 205 | "source": [ 206 | "" 207 | ], 208 | "execution_count": 0, 209 | "outputs": [] 210 | } 211 | ] 212 | } -------------------------------------------------------------------------------- /aula1.4/introdução_a_machine_learning_e_classificação_1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Introdução a Machine Learning e Classificação - 1.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1SNvuZmre0mDEJgTBBXBzvptPtth7q_IX 8 | """ 9 | 10 | # features (1 sim, 0 não) 11 | # pelo longo? 12 | # perna curta? 13 | # faz auau? 14 | porco1 = [0, 1, 0] 15 | porco2 = [0, 1, 1] 16 | porco3 = [1, 1, 0] 17 | 18 | cachorro1 = [0, 1, 1] 19 | cachorro2 = [1, 0, 1] 20 | cachorro3 = [1, 1, 1] 21 | 22 | # 1 => porco, 0 => cachorro 23 | treino_x = [porco1, porco2, porco3, cachorro1, cachorro2, cachorro3] 24 | treino_y = [1,1,1,0,0,0] # labels / etiqueta 25 | 26 | from sklearn.svm import LinearSVC 27 | 28 | model = LinearSVC() 29 | model.fit(treino_x, treino_y) 30 | 31 | animal_misterioso = [1,1,1] 32 | model.predict([animal_misterioso]) 33 | 34 | misterio1 = [1,1,1] 35 | misterio2 = [1,1,0] 36 | misterio3 = [0,1,1] 37 | 38 | teste_x = [misterio1, misterio2, misterio3] 39 | teste_y = [0, 1, 1] 40 | 41 | previsoes = model.predict(teste_x) 42 | 43 | corretos = (previsoes == teste_y).sum() 44 | total = len(teste_x) 45 | taxa_de_acerto = corretos/total 46 | print("Taxa de acerto %.2f" % (taxa_de_acerto * 100)) 47 | 48 | from sklearn.metrics import accuracy_score 49 | 50 | taxa_de_acerto = accuracy_score(teste_y, previsoes) 51 | print("Taxa de acerto %.2f" % (taxa_de_acerto * 100)) 52 | 53 | -------------------------------------------------------------------------------- /aula2.1/Introdução_a_Machine_Learning_Classificação_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Introdução a Machine Learning Classificação - 2.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "metadata": { 19 | "id": "mEIOC06i7QEJ", 20 | "colab_type": "code", 21 | "colab": { 22 | "base_uri": "https://localhost:8080/", 23 | "height": 202 24 | }, 25 | "outputId": "f6dbd4b4-8372-44b6-c3d1-92a4e08d83ff" 26 | }, 27 | "cell_type": "code", 28 | "source": [ 29 | "import pandas as pd\n", 30 | "\n", 31 | "uri = \"https://gist.githubusercontent.com/guilhermesilveira/2d2efa37d66b6c84a722ea627a897ced/raw/10968b997d885cbded1c92938c7a9912ba41c615/tracking.csv\"\n", 32 | "dados = pd.read_csv(uri)\n", 33 | "dados.head()" 34 | ], 35 | "execution_count": 2, 36 | "outputs": [ 37 | { 38 | "output_type": "execute_result", 39 | "data": { 40 | "text/html": [ 41 | "
\n", 42 | "\n", 55 | "\n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | "
homehow_it_workscontactbought
01100
11100
21100
31100
41100
\n", 103 | "
" 104 | ], 105 | "text/plain": [ 106 | " home how_it_works contact bought\n", 107 | "0 1 1 0 0\n", 108 | "1 1 1 0 0\n", 109 | "2 1 1 0 0\n", 110 | "3 1 1 0 0\n", 111 | "4 1 1 0 0" 112 | ] 113 | }, 114 | "metadata": { 115 | "tags": [] 116 | }, 117 | "execution_count": 2 118 | } 119 | ] 120 | }, 121 | { 122 | "metadata": { 123 | "id": "uDu0eTJn7x0D", 124 | "colab_type": "code", 125 | "colab": {} 126 | }, 127 | "cell_type": "code", 128 | "source": [ 129 | "mapa = {\n", 130 | " \"home\" : \"principal\",\n", 131 | " \"how_it_works\" : \"como_funciona\",\n", 132 | " \"contact\" : \"contato\",\n", 133 | " \"bought\" : \"comprou\"\n", 134 | "}\n", 135 | "dados = dados.rename(columns = mapa)" 136 | ], 137 | "execution_count": 0, 138 | "outputs": [] 139 | }, 140 | { 141 | "metadata": { 142 | "id": "9En1V0PM7e8V", 143 | "colab_type": "code", 144 | "colab": { 145 | "base_uri": "https://localhost:8080/", 146 | "height": 202 147 | }, 148 | "outputId": "0f54cba9-4f0c-433e-b7a0-b0fa15d39f5a" 149 | }, 150 | "cell_type": "code", 151 | "source": [ 152 | "x = dados[[\"principal\",\"como_funciona\",\"contato\"]]\n", 153 | "x.head()" 154 | ], 155 | "execution_count": 10, 156 | "outputs": [ 157 | { 158 | "output_type": "execute_result", 159 | "data": { 160 | "text/html": [ 161 | "
\n", 162 | "\n", 175 | "\n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | "
principalcomo_funcionacontato
0110
1110
2110
3110
4110
\n", 217 | "
" 218 | ], 219 | "text/plain": [ 220 | " principal como_funciona contato\n", 221 | "0 1 1 0\n", 222 | "1 1 1 0\n", 223 | "2 1 1 0\n", 224 | "3 1 1 0\n", 225 | "4 1 1 0" 226 | ] 227 | }, 228 | "metadata": { 229 | "tags": [] 230 | }, 231 | "execution_count": 10 232 | } 233 | ] 234 | }, 235 | { 236 | "metadata": { 237 | "id": "qDr2YoWu8O3O", 238 | "colab_type": "code", 239 | "colab": { 240 | "base_uri": "https://localhost:8080/", 241 | "height": 121 242 | }, 243 | "outputId": "29495494-aff4-4b5f-b8ae-95ea280f3bce" 244 | }, 245 | "cell_type": "code", 246 | "source": [ 247 | "y = dados[\"comprou\"]\n", 248 | "y.head()" 249 | ], 250 | "execution_count": 11, 251 | "outputs": [ 252 | { 253 | "output_type": "execute_result", 254 | "data": { 255 | "text/plain": [ 256 | "0 0\n", 257 | "1 0\n", 258 | "2 0\n", 259 | "3 0\n", 260 | "4 0\n", 261 | "Name: comprou, dtype: int64" 262 | ] 263 | }, 264 | "metadata": { 265 | "tags": [] 266 | }, 267 | "execution_count": 11 268 | } 269 | ] 270 | }, 271 | { 272 | "metadata": { 273 | "id": "X5pZ6xcZ8fYq", 274 | "colab_type": "code", 275 | "colab": { 276 | "base_uri": "https://localhost:8080/", 277 | "height": 35 278 | }, 279 | "outputId": "a31766ff-52df-4b74-97a7-605916419c87" 280 | }, 281 | "cell_type": "code", 282 | "source": [ 283 | "dados.shape" 284 | ], 285 | "execution_count": 12, 286 | "outputs": [ 287 | { 288 | "output_type": "execute_result", 289 | "data": { 290 | "text/plain": [ 291 | "(99, 4)" 292 | ] 293 | }, 294 | "metadata": { 295 | "tags": [] 296 | }, 297 | "execution_count": 12 298 | } 299 | ] 300 | }, 301 | { 302 | "metadata": { 303 | "id": "TLZ9eTvP9U9q", 304 | "colab_type": "code", 305 | "colab": { 306 | "base_uri": "https://localhost:8080/", 307 | "height": 35 308 | }, 309 | "outputId": "5c392acb-77ff-496c-d8f9-573a8c6414d2" 310 | }, 311 | "cell_type": "code", 312 | "source": [ 313 | "treino_x = x[:75]\n", 314 | "treino_y = y[:75]\n", 315 | "teste_x = x[75:]\n", 316 | "teste_y = y[75:]\n", 317 | "\n", 318 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))" 319 | ], 320 | "execution_count": 16, 321 | "outputs": [ 322 | { 323 | "output_type": "stream", 324 | "text": [ 325 | "Treinaremos com 75 elementos e testaremos com 24 elementos\n" 326 | ], 327 | "name": "stdout" 328 | } 329 | ] 330 | }, 331 | { 332 | "metadata": { 333 | "id": "pZZjbQxh9jn8", 334 | "colab_type": "code", 335 | "colab": { 336 | "base_uri": "https://localhost:8080/", 337 | "height": 34 338 | }, 339 | "outputId": "b7feb2a2-2694-4e6a-aa6c-9fb33e25917f" 340 | }, 341 | "cell_type": "code", 342 | "source": [ 343 | "from sklearn.svm import LinearSVC\n", 344 | "from sklearn.metrics import accuracy_score\n", 345 | "\n", 346 | "modelo = LinearSVC()\n", 347 | "modelo.fit(treino_x, treino_y)\n", 348 | "previsoes = modelo.predict(teste_x)\n", 349 | "\n", 350 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n", 351 | "print(\"A acurácia foi %.2f%%\" % acuracia)" 352 | ], 353 | "execution_count": 20, 354 | "outputs": [ 355 | { 356 | "output_type": "stream", 357 | "text": [ 358 | "A acurácia foi 95.83%\n" 359 | ], 360 | "name": "stdout" 361 | } 362 | ] 363 | }, 364 | { 365 | "metadata": { 366 | "id": "rA-z0_a6-CM1", 367 | "colab_type": "code", 368 | "colab": {} 369 | }, 370 | "cell_type": "code", 371 | "source": [ 372 | "" 373 | ], 374 | "execution_count": 0, 375 | "outputs": [] 376 | } 377 | ] 378 | } -------------------------------------------------------------------------------- /aula2.1/introdução_a_machine_learning_classificação_2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Introdução a Machine Learning Classificação - 2.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1nIhP3F_nGiAQayvsPziHuEOZva-HvzLn 8 | """ 9 | 10 | import pandas as pd 11 | 12 | uri = "https://gist.githubusercontent.com/guilhermesilveira/2d2efa37d66b6c84a722ea627a897ced/raw/10968b997d885cbded1c92938c7a9912ba41c615/tracking.csv" 13 | dados = pd.read_csv(uri) 14 | dados.head() 15 | 16 | mapa = { 17 | "home" : "principal", 18 | "how_it_works" : "como_funciona", 19 | "contact" : "contato", 20 | "bought" : "comprou" 21 | } 22 | dados = dados.rename(columns = mapa) 23 | 24 | x = dados[["principal","como_funciona","contato"]] 25 | x.head() 26 | 27 | y = dados["comprou"] 28 | y.head() 29 | 30 | dados.shape 31 | 32 | treino_x = x[:75] 33 | treino_y = y[:75] 34 | teste_x = x[75:] 35 | teste_y = y[75:] 36 | 37 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 38 | 39 | from sklearn.svm import LinearSVC 40 | from sklearn.metrics import accuracy_score 41 | 42 | modelo = LinearSVC() 43 | modelo.fit(treino_x, treino_y) 44 | previsoes = modelo.predict(teste_x) 45 | 46 | acuracia = accuracy_score(teste_y, previsoes) * 100 47 | print("A acurácia foi %.2f%%" % acuracia) 48 | 49 | -------------------------------------------------------------------------------- /aula2.2/Introdução_a_Machine_Learning_Classificação_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Introdução a Machine Learning Classificação - 2.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "metadata": { 19 | "id": "mEIOC06i7QEJ", 20 | "colab_type": "code", 21 | "colab": { 22 | "base_uri": "https://localhost:8080/", 23 | "height": 195 24 | }, 25 | "outputId": "f6dbd4b4-8372-44b6-c3d1-92a4e08d83ff" 26 | }, 27 | "cell_type": "code", 28 | "source": [ 29 | "import pandas as pd\n", 30 | "\n", 31 | "uri = \"https://gist.githubusercontent.com/guilhermesilveira/2d2efa37d66b6c84a722ea627a897ced/raw/10968b997d885cbded1c92938c7a9912ba41c615/tracking.csv\"\n", 32 | "dados = pd.read_csv(uri)\n", 33 | "dados.head()" 34 | ], 35 | "execution_count": 2, 36 | "outputs": [ 37 | { 38 | "output_type": "execute_result", 39 | "data": { 40 | "text/html": [ 41 | "
\n", 42 | "\n", 55 | "\n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | "
homehow_it_workscontactbought
01100
11100
21100
31100
41100
\n", 103 | "
" 104 | ], 105 | "text/plain": [ 106 | " home how_it_works contact bought\n", 107 | "0 1 1 0 0\n", 108 | "1 1 1 0 0\n", 109 | "2 1 1 0 0\n", 110 | "3 1 1 0 0\n", 111 | "4 1 1 0 0" 112 | ] 113 | }, 114 | "metadata": { 115 | "tags": [] 116 | }, 117 | "execution_count": 2 118 | } 119 | ] 120 | }, 121 | { 122 | "metadata": { 123 | "id": "uDu0eTJn7x0D", 124 | "colab_type": "code", 125 | "colab": {} 126 | }, 127 | "cell_type": "code", 128 | "source": [ 129 | "mapa = {\n", 130 | " \"home\" : \"principal\",\n", 131 | " \"how_it_works\" : \"como_funciona\",\n", 132 | " \"contact\" : \"contato\",\n", 133 | " \"bought\" : \"comprou\"\n", 134 | "}\n", 135 | "dados = dados.rename(columns = mapa)" 136 | ], 137 | "execution_count": 0, 138 | "outputs": [] 139 | }, 140 | { 141 | "metadata": { 142 | "id": "9En1V0PM7e8V", 143 | "colab_type": "code", 144 | "colab": { 145 | "base_uri": "https://localhost:8080/", 146 | "height": 195 147 | }, 148 | "outputId": "0f54cba9-4f0c-433e-b7a0-b0fa15d39f5a" 149 | }, 150 | "cell_type": "code", 151 | "source": [ 152 | "x = dados[[\"principal\",\"como_funciona\",\"contato\"]]\n", 153 | "x.head()" 154 | ], 155 | "execution_count": 10, 156 | "outputs": [ 157 | { 158 | "output_type": "execute_result", 159 | "data": { 160 | "text/html": [ 161 | "
\n", 162 | "\n", 175 | "\n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | "
principalcomo_funcionacontato
0110
1110
2110
3110
4110
\n", 217 | "
" 218 | ], 219 | "text/plain": [ 220 | " principal como_funciona contato\n", 221 | "0 1 1 0\n", 222 | "1 1 1 0\n", 223 | "2 1 1 0\n", 224 | "3 1 1 0\n", 225 | "4 1 1 0" 226 | ] 227 | }, 228 | "metadata": { 229 | "tags": [] 230 | }, 231 | "execution_count": 10 232 | } 233 | ] 234 | }, 235 | { 236 | "metadata": { 237 | "id": "qDr2YoWu8O3O", 238 | "colab_type": "code", 239 | "colab": { 240 | "base_uri": "https://localhost:8080/", 241 | "height": 118 242 | }, 243 | "outputId": "29495494-aff4-4b5f-b8ae-95ea280f3bce" 244 | }, 245 | "cell_type": "code", 246 | "source": [ 247 | "y = dados[\"comprou\"]\n", 248 | "y.head()" 249 | ], 250 | "execution_count": 11, 251 | "outputs": [ 252 | { 253 | "output_type": "execute_result", 254 | "data": { 255 | "text/plain": [ 256 | "0 0\n", 257 | "1 0\n", 258 | "2 0\n", 259 | "3 0\n", 260 | "4 0\n", 261 | "Name: comprou, dtype: int64" 262 | ] 263 | }, 264 | "metadata": { 265 | "tags": [] 266 | }, 267 | "execution_count": 11 268 | } 269 | ] 270 | }, 271 | { 272 | "metadata": { 273 | "id": "X5pZ6xcZ8fYq", 274 | "colab_type": "code", 275 | "colab": { 276 | "base_uri": "https://localhost:8080/", 277 | "height": 34 278 | }, 279 | "outputId": "a31766ff-52df-4b74-97a7-605916419c87" 280 | }, 281 | "cell_type": "code", 282 | "source": [ 283 | "dados.shape" 284 | ], 285 | "execution_count": 12, 286 | "outputs": [ 287 | { 288 | "output_type": "execute_result", 289 | "data": { 290 | "text/plain": [ 291 | "(99, 4)" 292 | ] 293 | }, 294 | "metadata": { 295 | "tags": [] 296 | }, 297 | "execution_count": 12 298 | } 299 | ] 300 | }, 301 | { 302 | "metadata": { 303 | "id": "TLZ9eTvP9U9q", 304 | "colab_type": "code", 305 | "colab": { 306 | "base_uri": "https://localhost:8080/", 307 | "height": 34 308 | }, 309 | "outputId": "5c392acb-77ff-496c-d8f9-573a8c6414d2" 310 | }, 311 | "cell_type": "code", 312 | "source": [ 313 | "treino_x = x[:75]\n", 314 | "treino_y = y[:75]\n", 315 | "teste_x = x[75:]\n", 316 | "teste_y = y[75:]\n", 317 | "\n", 318 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))" 319 | ], 320 | "execution_count": 16, 321 | "outputs": [ 322 | { 323 | "output_type": "stream", 324 | "text": [ 325 | "Treinaremos com 75 elementos e testaremos com 24 elementos\n" 326 | ], 327 | "name": "stdout" 328 | } 329 | ] 330 | }, 331 | { 332 | "metadata": { 333 | "id": "pZZjbQxh9jn8", 334 | "colab_type": "code", 335 | "colab": { 336 | "base_uri": "https://localhost:8080/", 337 | "height": 34 338 | }, 339 | "outputId": "b7feb2a2-2694-4e6a-aa6c-9fb33e25917f" 340 | }, 341 | "cell_type": "code", 342 | "source": [ 343 | "from sklearn.svm import LinearSVC\n", 344 | "from sklearn.metrics import accuracy_score\n", 345 | "\n", 346 | "modelo = LinearSVC()\n", 347 | "modelo.fit(treino_x, treino_y)\n", 348 | "previsoes = modelo.predict(teste_x)\n", 349 | "\n", 350 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n", 351 | "print(\"A acurácia foi %.2f%%\" % acuracia)" 352 | ], 353 | "execution_count": 20, 354 | "outputs": [ 355 | { 356 | "output_type": "stream", 357 | "text": [ 358 | "A acurácia foi 95.83%\n" 359 | ], 360 | "name": "stdout" 361 | } 362 | ] 363 | }, 364 | { 365 | "metadata": { 366 | "id": "2iVcuGkyA5tK", 367 | "colab_type": "text" 368 | }, 369 | "cell_type": "markdown", 370 | "source": [ 371 | "# Usando a biblioteca para separar treino e teste" 372 | ] 373 | }, 374 | { 375 | "metadata": { 376 | "id": "rA-z0_a6-CM1", 377 | "colab_type": "code", 378 | "colab": { 379 | "base_uri": "https://localhost:8080/", 380 | "height": 50 381 | }, 382 | "outputId": "f3287dba-50a8-4cd8-9001-ce41278c8bb1" 383 | }, 384 | "cell_type": "code", 385 | "source": [ 386 | "from sklearn.model_selection import train_test_split\n", 387 | "from sklearn.svm import LinearSVC\n", 388 | "from sklearn.metrics import accuracy_score\n", 389 | "\n", 390 | "SEED = 20\n", 391 | "\n", 392 | "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, random_state = SEED, test_size = 0.25)\n", 393 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n", 394 | "\n", 395 | "modelo = LinearSVC()\n", 396 | "modelo.fit(treino_x, treino_y)\n", 397 | "previsoes = modelo.predict(teste_x)\n", 398 | "\n", 399 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n", 400 | "print(\"A acurácia foi %.2f%%\" % acuracia)" 401 | ], 402 | "execution_count": 35, 403 | "outputs": [ 404 | { 405 | "output_type": "stream", 406 | "text": [ 407 | "Treinaremos com 74 elementos e testaremos com 25 elementos\n", 408 | "A acurácia foi 96.00%\n" 409 | ], 410 | "name": "stdout" 411 | } 412 | ] 413 | }, 414 | { 415 | "metadata": { 416 | "id": "JWFKlQccAk1F", 417 | "colab_type": "code", 418 | "colab": { 419 | "base_uri": "https://localhost:8080/", 420 | "height": 68 421 | }, 422 | "outputId": "749d52d7-a4b3-488b-a7ad-01ab66793ef7" 423 | }, 424 | "cell_type": "code", 425 | "source": [ 426 | "treino_y.value_counts()" 427 | ], 428 | "execution_count": 37, 429 | "outputs": [ 430 | { 431 | "output_type": "execute_result", 432 | "data": { 433 | "text/plain": [ 434 | "0 47\n", 435 | "1 27\n", 436 | "Name: comprou, dtype: int64" 437 | ] 438 | }, 439 | "metadata": { 440 | "tags": [] 441 | }, 442 | "execution_count": 37 443 | } 444 | ] 445 | }, 446 | { 447 | "metadata": { 448 | "id": "fdORezxWBcwX", 449 | "colab_type": "code", 450 | "colab": { 451 | "base_uri": "https://localhost:8080/", 452 | "height": 68 453 | }, 454 | "outputId": "7ce6be19-5f39-498a-cf61-e76af3990271" 455 | }, 456 | "cell_type": "code", 457 | "source": [ 458 | "teste_y.value_counts()" 459 | ], 460 | "execution_count": 38, 461 | "outputs": [ 462 | { 463 | "output_type": "execute_result", 464 | "data": { 465 | "text/plain": [ 466 | "0 19\n", 467 | "1 6\n", 468 | "Name: comprou, dtype: int64" 469 | ] 470 | }, 471 | "metadata": { 472 | "tags": [] 473 | }, 474 | "execution_count": 38 475 | } 476 | ] 477 | }, 478 | { 479 | "metadata": { 480 | "id": "k_kFDoBABh4B", 481 | "colab_type": "code", 482 | "colab": { 483 | "base_uri": "https://localhost:8080/", 484 | "height": 51 485 | }, 486 | "outputId": "ba8b4464-305a-4e94-9422-445600888353" 487 | }, 488 | "cell_type": "code", 489 | "source": [ 490 | "from sklearn.model_selection import train_test_split\n", 491 | "from sklearn.svm import LinearSVC\n", 492 | "from sklearn.metrics import accuracy_score\n", 493 | "\n", 494 | "SEED = 20\n", 495 | "\n", 496 | "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y,\n", 497 | " random_state = SEED, test_size = 0.25,\n", 498 | " stratify = y)\n", 499 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n", 500 | "\n", 501 | "modelo = LinearSVC()\n", 502 | "modelo.fit(treino_x, treino_y)\n", 503 | "previsoes = modelo.predict(teste_x)\n", 504 | "\n", 505 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n", 506 | "print(\"A acurácia foi %.2f%%\" % acuracia)" 507 | ], 508 | "execution_count": 41, 509 | "outputs": [ 510 | { 511 | "output_type": "stream", 512 | "text": [ 513 | "Treinaremos com 74 elementos e testaremos com 25 elementos\n", 514 | "A acurácia foi 96.00%\n" 515 | ], 516 | "name": "stdout" 517 | } 518 | ] 519 | }, 520 | { 521 | "metadata": { 522 | "id": "SS4n0CVXB6Fo", 523 | "colab_type": "code", 524 | "colab": { 525 | "base_uri": "https://localhost:8080/", 526 | "height": 67 527 | }, 528 | "outputId": "60d3e7e9-4a79-4d96-f3f5-6ef1a843519a" 529 | }, 530 | "cell_type": "code", 531 | "source": [ 532 | "treino_y.value_counts()" 533 | ], 534 | "execution_count": 42, 535 | "outputs": [ 536 | { 537 | "output_type": "execute_result", 538 | "data": { 539 | "text/plain": [ 540 | "0 49\n", 541 | "1 25\n", 542 | "Name: comprou, dtype: int64" 543 | ] 544 | }, 545 | "metadata": { 546 | "tags": [] 547 | }, 548 | "execution_count": 42 549 | } 550 | ] 551 | }, 552 | { 553 | "metadata": { 554 | "id": "hgvSvos6CHIk", 555 | "colab_type": "code", 556 | "colab": { 557 | "base_uri": "https://localhost:8080/", 558 | "height": 67 559 | }, 560 | "outputId": "8ec62784-d5a3-4197-81fb-accc0632bdf8" 561 | }, 562 | "cell_type": "code", 563 | "source": [ 564 | "teste_y.value_counts()" 565 | ], 566 | "execution_count": 43, 567 | "outputs": [ 568 | { 569 | "output_type": "execute_result", 570 | "data": { 571 | "text/plain": [ 572 | "0 17\n", 573 | "1 8\n", 574 | "Name: comprou, dtype: int64" 575 | ] 576 | }, 577 | "metadata": { 578 | "tags": [] 579 | }, 580 | "execution_count": 43 581 | } 582 | ] 583 | }, 584 | { 585 | "metadata": { 586 | "id": "bqhIUWBsCH8w", 587 | "colab_type": "code", 588 | "colab": {} 589 | }, 590 | "cell_type": "code", 591 | "source": [ 592 | "" 593 | ], 594 | "execution_count": 0, 595 | "outputs": [] 596 | } 597 | ] 598 | } -------------------------------------------------------------------------------- /aula2.2/introdução_a_machine_learning_classificação_2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Introdução a Machine Learning Classificação - 2.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1nIhP3F_nGiAQayvsPziHuEOZva-HvzLn 8 | """ 9 | 10 | import pandas as pd 11 | 12 | uri = "https://gist.githubusercontent.com/guilhermesilveira/2d2efa37d66b6c84a722ea627a897ced/raw/10968b997d885cbded1c92938c7a9912ba41c615/tracking.csv" 13 | dados = pd.read_csv(uri) 14 | dados.head() 15 | 16 | mapa = { 17 | "home" : "principal", 18 | "how_it_works" : "como_funciona", 19 | "contact" : "contato", 20 | "bought" : "comprou" 21 | } 22 | dados = dados.rename(columns = mapa) 23 | 24 | x = dados[["principal","como_funciona","contato"]] 25 | x.head() 26 | 27 | y = dados["comprou"] 28 | y.head() 29 | 30 | dados.shape 31 | 32 | treino_x = x[:75] 33 | treino_y = y[:75] 34 | teste_x = x[75:] 35 | teste_y = y[75:] 36 | 37 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 38 | 39 | from sklearn.svm import LinearSVC 40 | from sklearn.metrics import accuracy_score 41 | 42 | modelo = LinearSVC() 43 | modelo.fit(treino_x, treino_y) 44 | previsoes = modelo.predict(teste_x) 45 | 46 | acuracia = accuracy_score(teste_y, previsoes) * 100 47 | print("A acurácia foi %.2f%%" % acuracia) 48 | 49 | """# Usando a biblioteca para separar treino e teste""" 50 | 51 | from sklearn.model_selection import train_test_split 52 | from sklearn.svm import LinearSVC 53 | from sklearn.metrics import accuracy_score 54 | 55 | SEED = 20 56 | 57 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, random_state = SEED, test_size = 0.25) 58 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 59 | 60 | modelo = LinearSVC() 61 | modelo.fit(treino_x, treino_y) 62 | previsoes = modelo.predict(teste_x) 63 | 64 | acuracia = accuracy_score(teste_y, previsoes) * 100 65 | print("A acurácia foi %.2f%%" % acuracia) 66 | 67 | treino_y.value_counts() 68 | 69 | teste_y.value_counts() 70 | 71 | from sklearn.model_selection import train_test_split 72 | from sklearn.svm import LinearSVC 73 | from sklearn.metrics import accuracy_score 74 | 75 | SEED = 20 76 | 77 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, 78 | random_state = SEED, test_size = 0.25, 79 | stratify = y) 80 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 81 | 82 | modelo = LinearSVC() 83 | modelo.fit(treino_x, treino_y) 84 | previsoes = modelo.predict(teste_x) 85 | 86 | acuracia = accuracy_score(teste_y, previsoes) * 100 87 | print("A acurácia foi %.2f%%" % acuracia) 88 | 89 | treino_y.value_counts() 90 | 91 | teste_y.value_counts() 92 | 93 | -------------------------------------------------------------------------------- /aula3.1/introdução_a_machine_learning_3 (1).py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Introdução a Machine Learning 3.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1r4UlftWbCZA3w-glDhPlo1TUK-Pf6-Sa 8 | """ 9 | 10 | !pip install seaborn==0.9.0 11 | 12 | import pandas as pd 13 | 14 | uri = "https://gist.githubusercontent.com/guilhermesilveira/1b7d5475863c15f484ac495bd70975cf/raw/16aff7a0aee67e7c100a2a48b676a2d2d142f646/projects.csv" 15 | dados = pd.read_csv(uri) 16 | dados.head() 17 | 18 | a_renomear = { 19 | 'expected_hours' : 'horas_esperadas', 20 | 'price' : 'preco', 21 | 'unfinished' : 'nao_finalizado' 22 | } 23 | dados = dados.rename(columns = a_renomear) 24 | dados.head() 25 | 26 | troca = { 27 | 0 : 1, 28 | 1 : 0 29 | } 30 | dados['finalizado'] = dados.nao_finalizado.map(troca) 31 | dados.head() 32 | 33 | dados.tail() 34 | 35 | import seaborn as sns 36 | 37 | sns.scatterplot(x="horas_esperadas", y="preco", data=dados) 38 | 39 | sns.scatterplot(x="horas_esperadas", y="preco", hue="finalizado", data=dados) 40 | 41 | sns.relplot(x="horas_esperadas", y="preco", hue="finalizado", col="finalizado", data=dados) 42 | 43 | x = dados[['horas_esperadas', 'preco']] 44 | y = dados['finalizado'] 45 | 46 | from sklearn.model_selection import train_test_split 47 | from sklearn.svm import LinearSVC 48 | from sklearn.metrics import accuracy_score 49 | 50 | SEED = 20 51 | 52 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, 53 | random_state = SEED, test_size = 0.25, 54 | stratify = y) 55 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 56 | 57 | modelo = LinearSVC() 58 | modelo.fit(treino_x, treino_y) 59 | previsoes = modelo.predict(teste_x) 60 | 61 | acuracia = accuracy_score(teste_y, previsoes) * 100 62 | print("A acurácia foi %.2f%%" % acuracia) 63 | 64 | import numpy as np 65 | previsoes_de_base = np.ones(540) 66 | acuracia = accuracy_score(teste_y, previsoes_de_base) * 100 67 | print("A acurácia do algoritmo de baseline foi %.2f%%" % acuracia) 68 | 69 | -------------------------------------------------------------------------------- /aula4.1/introdução_a_machine_learning_3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Introdução a Machine Learning 3.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1r4UlftWbCZA3w-glDhPlo1TUK-Pf6-Sa 8 | """ 9 | 10 | !pip install seaborn==0.9.0 11 | 12 | import pandas as pd 13 | 14 | uri = "https://gist.githubusercontent.com/guilhermesilveira/1b7d5475863c15f484ac495bd70975cf/raw/16aff7a0aee67e7c100a2a48b676a2d2d142f646/projects.csv" 15 | dados = pd.read_csv(uri) 16 | dados.head() 17 | 18 | a_renomear = { 19 | 'expected_hours' : 'horas_esperadas', 20 | 'price' : 'preco', 21 | 'unfinished' : 'nao_finalizado' 22 | } 23 | dados = dados.rename(columns = a_renomear) 24 | dados.head() 25 | 26 | troca = { 27 | 0 : 1, 28 | 1 : 0 29 | } 30 | dados['finalizado'] = dados.nao_finalizado.map(troca) 31 | dados.head() 32 | 33 | dados.tail() 34 | 35 | import seaborn as sns 36 | 37 | sns.scatterplot(x="horas_esperadas", y="preco", data=dados) 38 | 39 | sns.scatterplot(x="horas_esperadas", y="preco", hue="finalizado", data=dados) 40 | 41 | sns.relplot(x="horas_esperadas", y="preco", hue="finalizado", col="finalizado", data=dados) 42 | 43 | x = dados[['horas_esperadas', 'preco']] 44 | y = dados['finalizado'] 45 | 46 | from sklearn.model_selection import train_test_split 47 | from sklearn.svm import LinearSVC 48 | from sklearn.metrics import accuracy_score 49 | 50 | SEED = 5 51 | np.random.seed(SEED) 52 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25, 53 | stratify = y) 54 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 55 | 56 | modelo = LinearSVC() 57 | modelo.fit(treino_x, treino_y) 58 | previsoes = modelo.predict(teste_x) 59 | 60 | acuracia = accuracy_score(teste_y, previsoes) * 100 61 | print("A acurácia foi %.2f%%" % acuracia) 62 | 63 | import numpy as np 64 | previsoes_de_base = np.ones(540) 65 | acuracia = accuracy_score(teste_y, previsoes_de_base) * 100 66 | print("A acurácia do algoritmo de baseline foi %.2f%%" % acuracia) 67 | 68 | sns.scatterplot(x="horas_esperadas", y="preco", hue=teste_y, data=teste_x) 69 | 70 | x_min = teste_x.horas_esperadas.min() 71 | x_max = teste_x.horas_esperadas.max() 72 | y_min = teste_x.preco.min() 73 | y_max = teste_x.preco.max() 74 | print(x_min, x_max,y_min,y_max) 75 | 76 | pixels = 100 77 | eixo_x = np.arange(x_min, x_max, (x_max - x_min) / pixels) 78 | eixo_y = np.arange(y_min, y_max, (y_max - y_min) / pixels) 79 | 80 | xx, yy = np.meshgrid(eixo_x, eixo_y) 81 | pontos = np.c_[xx.ravel(), yy.ravel()] 82 | pontos 83 | 84 | Z = modelo.predict(pontos) 85 | Z = Z.reshape(xx.shape) 86 | Z 87 | 88 | import matplotlib.pyplot as plt 89 | 90 | plt.contourf(xx, yy, Z, alpha=0.3) 91 | plt.scatter(teste_x.horas_esperadas, teste_x.preco, c=teste_y, s=1) 92 | 93 | # DECISION BOUNDARY 94 | 95 | from sklearn.model_selection import train_test_split 96 | from sklearn.svm import SVC 97 | from sklearn.metrics import accuracy_score 98 | 99 | SEED = 5 100 | np.random.seed(SEED) 101 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25, 102 | stratify = y) 103 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 104 | 105 | modelo = SVC() 106 | modelo.fit(treino_x, treino_y) 107 | previsoes = modelo.predict(teste_x) 108 | 109 | acuracia = accuracy_score(teste_y, previsoes) * 100 110 | print("A acurácia foi %.2f%%" % acuracia) 111 | 112 | x_min = teste_x.horas_esperadas.min() 113 | x_max = teste_x.horas_esperadas.max() 114 | y_min = teste_x.preco.min() 115 | y_max = teste_x.preco.max() 116 | 117 | pixels = 100 118 | eixo_x = np.arange(x_min, x_max, (x_max - x_min) / pixels) 119 | eixo_y = np.arange(y_min, y_max, (y_max - y_min) / pixels) 120 | 121 | xx, yy = np.meshgrid(eixo_x, eixo_y) 122 | pontos = np.c_[xx.ravel(), yy.ravel()] 123 | 124 | Z = modelo.predict(pontos) 125 | Z = Z.reshape(xx.shape) 126 | 127 | import matplotlib.pyplot as plt 128 | 129 | plt.contourf(xx, yy, Z, alpha=0.3) 130 | plt.scatter(teste_x.horas_esperadas, teste_x.preco, c=teste_y, s=1) 131 | 132 | # DECISION BOUNDARY 133 | 134 | from sklearn.preprocessing import StandardScaler 135 | from sklearn.model_selection import train_test_split 136 | from sklearn.svm import SVC 137 | from sklearn.metrics import accuracy_score 138 | 139 | SEED = 5 140 | np.random.seed(SEED) 141 | raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25, 142 | stratify = y) 143 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 144 | 145 | scaler = StandardScaler() 146 | scaler.fit(raw_treino_x) 147 | treino_x = scaler.transform(raw_treino_x) 148 | teste_x = scaler.transform(raw_teste_x) 149 | 150 | modelo = SVC() 151 | modelo.fit(treino_x, treino_y) 152 | previsoes = modelo.predict(teste_x) 153 | 154 | acuracia = accuracy_score(teste_y, previsoes) * 100 155 | print("A acurácia foi %.2f%%" % acuracia) 156 | 157 | treino_x 158 | 159 | data_x = teste_x[:,0] 160 | data_y = teste_x[:,1] 161 | 162 | x_min = data_x.min() 163 | x_max = data_x.max() 164 | y_min = data_y.min() 165 | y_max = data_y.max() 166 | 167 | pixels = 100 168 | eixo_x = np.arange(x_min, x_max, (x_max - x_min) / pixels) 169 | eixo_y = np.arange(y_min, y_max, (y_max - y_min) / pixels) 170 | 171 | xx, yy = np.meshgrid(eixo_x, eixo_y) 172 | pontos = np.c_[xx.ravel(), yy.ravel()] 173 | 174 | Z = modelo.predict(pontos) 175 | Z = Z.reshape(xx.shape) 176 | 177 | import matplotlib.pyplot as plt 178 | 179 | plt.contourf(xx, yy, Z, alpha=0.3) 180 | plt.scatter(data_x, data_y, c=teste_y, s=1) 181 | 182 | # DECISION BOUNDARY 183 | 184 | -------------------------------------------------------------------------------- /aula5.1/Introdução_a_Machine_Learning_4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Introdução a Machine Learning - 4.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "metadata": { 19 | "id": "y7ik04NlDZMA", 20 | "colab_type": "code", 21 | "colab": { 22 | "base_uri": "https://localhost:8080/", 23 | "height": 195 24 | }, 25 | "outputId": "0bdc8cf3-ab12-4a1c-83e4-3691f646e930" 26 | }, 27 | "cell_type": "code", 28 | "source": [ 29 | "import pandas as pd\n", 30 | "\n", 31 | "uri = \"https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv\"\n", 32 | "dados = pd.read_csv(uri)\n", 33 | "dados.head()" 34 | ], 35 | "execution_count": 4, 36 | "outputs": [ 37 | { 38 | "output_type": "execute_result", 39 | "data": { 40 | "text/html": [ 41 | "
\n", 42 | "\n", 55 | "\n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | "
Unnamed: 0mileage_per_yearmodel_yearpricesold
0021801200030941.02yes
117843199840557.96yes
227109200689627.50no
3326823201595276.14no
4479352014117384.68yes
\n", 109 | "
" 110 | ], 111 | "text/plain": [ 112 | " Unnamed: 0 mileage_per_year model_year price sold\n", 113 | "0 0 21801 2000 30941.02 yes\n", 114 | "1 1 7843 1998 40557.96 yes\n", 115 | "2 2 7109 2006 89627.50 no\n", 116 | "3 3 26823 2015 95276.14 no\n", 117 | "4 4 7935 2014 117384.68 yes" 118 | ] 119 | }, 120 | "metadata": { 121 | "tags": [] 122 | }, 123 | "execution_count": 4 124 | } 125 | ] 126 | }, 127 | { 128 | "metadata": { 129 | "id": "FqFrPmgJDhvM", 130 | "colab_type": "code", 131 | "colab": { 132 | "base_uri": "https://localhost:8080/", 133 | "height": 195 134 | }, 135 | "outputId": "2c6b015c-4c70-461a-a5be-a1a5f225a5f7" 136 | }, 137 | "cell_type": "code", 138 | "source": [ 139 | "a_renomear = {\n", 140 | " 'mileage_per_year' : 'milhas_por_ano',\n", 141 | " 'model_year' : 'ano_do_modelo',\n", 142 | " 'price' : 'preco',\n", 143 | " 'sold' : 'vendido'\n", 144 | "}\n", 145 | "dados = dados.rename(columns=a_renomear)\n", 146 | "dados.head()" 147 | ], 148 | "execution_count": 6, 149 | "outputs": [ 150 | { 151 | "output_type": "execute_result", 152 | "data": { 153 | "text/html": [ 154 | "
\n", 155 | "\n", 168 | "\n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | "
Unnamed: 0milhas_por_anoano_do_modeloprecovendido
0021801200030941.02yes
117843199840557.96yes
227109200689627.50no
3326823201595276.14no
4479352014117384.68yes
\n", 222 | "
" 223 | ], 224 | "text/plain": [ 225 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido\n", 226 | "0 0 21801 2000 30941.02 yes\n", 227 | "1 1 7843 1998 40557.96 yes\n", 228 | "2 2 7109 2006 89627.50 no\n", 229 | "3 3 26823 2015 95276.14 no\n", 230 | "4 4 7935 2014 117384.68 yes" 231 | ] 232 | }, 233 | "metadata": { 234 | "tags": [] 235 | }, 236 | "execution_count": 6 237 | } 238 | ] 239 | }, 240 | { 241 | "metadata": { 242 | "id": "J31fUGbVEKpW", 243 | "colab_type": "code", 244 | "colab": { 245 | "base_uri": "https://localhost:8080/", 246 | "height": 195 247 | }, 248 | "outputId": "f25f070a-ee26-40ce-9b02-7e37aecc7b2f" 249 | }, 250 | "cell_type": "code", 251 | "source": [ 252 | "a_trocar = {\n", 253 | " 'no' : 0,\n", 254 | " 'yes' : 1\n", 255 | "}\n", 256 | "dados.vendido = dados.vendido.map(a_trocar)\n", 257 | "dados.head()" 258 | ], 259 | "execution_count": 9, 260 | "outputs": [ 261 | { 262 | "output_type": "execute_result", 263 | "data": { 264 | "text/html": [ 265 | "
\n", 266 | "\n", 279 | "\n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | "
Unnamed: 0milhas_por_anoano_do_modeloprecovendido
0021801200030941.021
117843199840557.961
227109200689627.500
3326823201595276.140
4479352014117384.681
\n", 333 | "
" 334 | ], 335 | "text/plain": [ 336 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido\n", 337 | "0 0 21801 2000 30941.02 1\n", 338 | "1 1 7843 1998 40557.96 1\n", 339 | "2 2 7109 2006 89627.50 0\n", 340 | "3 3 26823 2015 95276.14 0\n", 341 | "4 4 7935 2014 117384.68 1" 342 | ] 343 | }, 344 | "metadata": { 345 | "tags": [] 346 | }, 347 | "execution_count": 9 348 | } 349 | ] 350 | }, 351 | { 352 | "metadata": { 353 | "id": "tZFog8O9EXYD", 354 | "colab_type": "code", 355 | "colab": { 356 | "base_uri": "https://localhost:8080/", 357 | "height": 195 358 | }, 359 | "outputId": "09240207-0e20-4c07-822c-3a23186b99fe" 360 | }, 361 | "cell_type": "code", 362 | "source": [ 363 | "from datetime import datetime\n", 364 | "\n", 365 | "ano_atual = datetime.today().year\n", 366 | "dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo\n", 367 | "dados.head()" 368 | ], 369 | "execution_count": 12, 370 | "outputs": [ 371 | { 372 | "output_type": "execute_result", 373 | "data": { 374 | "text/html": [ 375 | "
\n", 376 | "\n", 389 | "\n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | "
Unnamed: 0milhas_por_anoano_do_modeloprecovendidoidade_do_modelo
0021801200030941.02118
117843199840557.96120
227109200689627.50012
3326823201595276.1403
4479352014117384.6814
\n", 449 | "
" 450 | ], 451 | "text/plain": [ 452 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido \\\n", 453 | "0 0 21801 2000 30941.02 1 \n", 454 | "1 1 7843 1998 40557.96 1 \n", 455 | "2 2 7109 2006 89627.50 0 \n", 456 | "3 3 26823 2015 95276.14 0 \n", 457 | "4 4 7935 2014 117384.68 1 \n", 458 | "\n", 459 | " idade_do_modelo \n", 460 | "0 18 \n", 461 | "1 20 \n", 462 | "2 12 \n", 463 | "3 3 \n", 464 | "4 4 " 465 | ] 466 | }, 467 | "metadata": { 468 | "tags": [] 469 | }, 470 | "execution_count": 12 471 | } 472 | ] 473 | }, 474 | { 475 | "metadata": { 476 | "id": "3wWWgxhcFbR9", 477 | "colab_type": "code", 478 | "colab": { 479 | "base_uri": "https://localhost:8080/", 480 | "height": 195 481 | }, 482 | "outputId": "5c1a4a30-6d60-44b7-d232-0c8d47bb4d22" 483 | }, 484 | "cell_type": "code", 485 | "source": [ 486 | "dados['km_por_ano'] = dados.milhas_por_ano * 1.60934\n", 487 | "dados.head()" 488 | ], 489 | "execution_count": 15, 490 | "outputs": [ 491 | { 492 | "output_type": "execute_result", 493 | "data": { 494 | "text/html": [ 495 | "
\n", 496 | "\n", 509 | "\n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | "
Unnamed: 0milhas_por_anoano_do_modeloprecovendidoidade_do_modelokm_por_ano
0021801200030941.0211835085.22134
117843199840557.9612012622.05362
227109200689627.5001211440.79806
3326823201595276.140343167.32682
4479352014117384.681412770.11290
\n", 575 | "
" 576 | ], 577 | "text/plain": [ 578 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido \\\n", 579 | "0 0 21801 2000 30941.02 1 \n", 580 | "1 1 7843 1998 40557.96 1 \n", 581 | "2 2 7109 2006 89627.50 0 \n", 582 | "3 3 26823 2015 95276.14 0 \n", 583 | "4 4 7935 2014 117384.68 1 \n", 584 | "\n", 585 | " idade_do_modelo km_por_ano \n", 586 | "0 18 35085.22134 \n", 587 | "1 20 12622.05362 \n", 588 | "2 12 11440.79806 \n", 589 | "3 3 43167.32682 \n", 590 | "4 4 12770.11290 " 591 | ] 592 | }, 593 | "metadata": { 594 | "tags": [] 595 | }, 596 | "execution_count": 15 597 | } 598 | ] 599 | }, 600 | { 601 | "metadata": { 602 | "id": "MfjQNKlyFo2S", 603 | "colab_type": "code", 604 | "colab": { 605 | "base_uri": "https://localhost:8080/", 606 | "height": 195 607 | }, 608 | "outputId": "908e17b5-9b48-48d0-f9e3-fbe69343a121" 609 | }, 610 | "cell_type": "code", 611 | "source": [ 612 | "dados = dados.drop(columns = [\"Unnamed: 0\", \"milhas_por_ano\",\"ano_do_modelo\"], axis=1)\n", 613 | "dados.head()" 614 | ], 615 | "execution_count": 16, 616 | "outputs": [ 617 | { 618 | "output_type": "execute_result", 619 | "data": { 620 | "text/html": [ 621 | "
\n", 622 | "\n", 635 | "\n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | "
precovendidoidade_do_modelokm_por_ano
030941.0211835085.22134
140557.9612012622.05362
289627.5001211440.79806
395276.140343167.32682
4117384.681412770.11290
\n", 683 | "
" 684 | ], 685 | "text/plain": [ 686 | " preco vendido idade_do_modelo km_por_ano\n", 687 | "0 30941.02 1 18 35085.22134\n", 688 | "1 40557.96 1 20 12622.05362\n", 689 | "2 89627.50 0 12 11440.79806\n", 690 | "3 95276.14 0 3 43167.32682\n", 691 | "4 117384.68 1 4 12770.11290" 692 | ] 693 | }, 694 | "metadata": { 695 | "tags": [] 696 | }, 697 | "execution_count": 16 698 | } 699 | ] 700 | }, 701 | { 702 | "metadata": { 703 | "id": "E3xebM4FF0Tc", 704 | "colab_type": "code", 705 | "colab": { 706 | "base_uri": "https://localhost:8080/", 707 | "height": 51 708 | }, 709 | "outputId": "51eb40fc-4bb1-4183-85d5-de0fd699c5d7" 710 | }, 711 | "cell_type": "code", 712 | "source": [ 713 | "import numpy as np\n", 714 | "from sklearn.model_selection import train_test_split\n", 715 | "from sklearn.svm import LinearSVC\n", 716 | "from sklearn.metrics import accuracy_score\n", 717 | "\n", 718 | "x = dados[[\"preco\", \"idade_do_modelo\",\"km_por_ano\"]]\n", 719 | "y = dados[\"vendido\"]\n", 720 | "\n", 721 | "SEED = 5\n", 722 | "np.random.seed(SEED)\n", 723 | "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n", 724 | " stratify = y)\n", 725 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n", 726 | "\n", 727 | "modelo = LinearSVC()\n", 728 | "modelo.fit(treino_x, treino_y)\n", 729 | "previsoes = modelo.predict(teste_x)\n", 730 | "\n", 731 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n", 732 | "print(\"A acurácia foi %.2f%%\" % acuracia)" 733 | ], 734 | "execution_count": 18, 735 | "outputs": [ 736 | { 737 | "output_type": "stream", 738 | "text": [ 739 | "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n", 740 | "A acurácia foi 57.88%\n" 741 | ], 742 | "name": "stdout" 743 | } 744 | ] 745 | }, 746 | { 747 | "metadata": { 748 | "id": "G2ZFWoPkGONL", 749 | "colab_type": "code", 750 | "colab": {} 751 | }, 752 | "cell_type": "code", 753 | "source": [ 754 | "" 755 | ], 756 | "execution_count": 0, 757 | "outputs": [] 758 | } 759 | ] 760 | } -------------------------------------------------------------------------------- /aula5.1/introdução_a_machine_learning_4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Introdução a Machine Learning - 4.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1RpYAAROMa4C86iZscVUzaWIeVYSJapyE 8 | """ 9 | 10 | import pandas as pd 11 | 12 | uri = "https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv" 13 | dados = pd.read_csv(uri) 14 | dados.head() 15 | 16 | a_renomear = { 17 | 'mileage_per_year' : 'milhas_por_ano', 18 | 'model_year' : 'ano_do_modelo', 19 | 'price' : 'preco', 20 | 'sold' : 'vendido' 21 | } 22 | dados = dados.rename(columns=a_renomear) 23 | dados.head() 24 | 25 | a_trocar = { 26 | 'no' : 0, 27 | 'yes' : 1 28 | } 29 | dados.vendido = dados.vendido.map(a_trocar) 30 | dados.head() 31 | 32 | from datetime import datetime 33 | 34 | ano_atual = datetime.today().year 35 | dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo 36 | dados.head() 37 | 38 | dados['km_por_ano'] = dados.milhas_por_ano * 1.60934 39 | dados.head() 40 | 41 | dados = dados.drop(columns = ["Unnamed: 0", "milhas_por_ano","ano_do_modelo"], axis=1) 42 | dados.head() 43 | 44 | import numpy as np 45 | from sklearn.model_selection import train_test_split 46 | from sklearn.svm import LinearSVC 47 | from sklearn.metrics import accuracy_score 48 | 49 | x = dados[["preco", "idade_do_modelo","km_por_ano"]] 50 | y = dados["vendido"] 51 | 52 | SEED = 5 53 | np.random.seed(SEED) 54 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25, 55 | stratify = y) 56 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 57 | 58 | modelo = LinearSVC() 59 | modelo.fit(treino_x, treino_y) 60 | previsoes = modelo.predict(teste_x) 61 | 62 | acuracia = accuracy_score(teste_y, previsoes) * 100 63 | print("A acurácia foi %.2f%%" % acuracia) 64 | 65 | -------------------------------------------------------------------------------- /aula5.2/Introdução_a_Machine_Learning_4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Introdução a Machine Learning - 4.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "metadata": { 19 | "id": "y7ik04NlDZMA", 20 | "colab_type": "code", 21 | "colab": { 22 | "base_uri": "https://localhost:8080/", 23 | "height": 204 24 | }, 25 | "outputId": "73c13868-60e4-4a25-9338-02f324cabb18" 26 | }, 27 | "cell_type": "code", 28 | "source": [ 29 | "import pandas as pd\n", 30 | "\n", 31 | "uri = \"https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv\"\n", 32 | "dados = pd.read_csv(uri)\n", 33 | "dados.head()" 34 | ], 35 | "execution_count": 1, 36 | "outputs": [ 37 | { 38 | "output_type": "execute_result", 39 | "data": { 40 | "text/html": [ 41 | "
\n", 42 | "\n", 55 | "\n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | "
Unnamed: 0mileage_per_yearmodel_yearpricesold
0021801200030941.02yes
117843199840557.96yes
227109200689627.50no
3326823201595276.14no
4479352014117384.68yes
\n", 109 | "
" 110 | ], 111 | "text/plain": [ 112 | " Unnamed: 0 mileage_per_year model_year price sold\n", 113 | "0 0 21801 2000 30941.02 yes\n", 114 | "1 1 7843 1998 40557.96 yes\n", 115 | "2 2 7109 2006 89627.50 no\n", 116 | "3 3 26823 2015 95276.14 no\n", 117 | "4 4 7935 2014 117384.68 yes" 118 | ] 119 | }, 120 | "metadata": { 121 | "tags": [] 122 | }, 123 | "execution_count": 1 124 | } 125 | ] 126 | }, 127 | { 128 | "metadata": { 129 | "id": "FqFrPmgJDhvM", 130 | "colab_type": "code", 131 | "colab": { 132 | "base_uri": "https://localhost:8080/", 133 | "height": 195 134 | }, 135 | "outputId": "46b16944-2a4a-4b53-b970-5a96a9b7d867" 136 | }, 137 | "cell_type": "code", 138 | "source": [ 139 | "a_renomear = {\n", 140 | " 'mileage_per_year' : 'milhas_por_ano',\n", 141 | " 'model_year' : 'ano_do_modelo',\n", 142 | " 'price' : 'preco',\n", 143 | " 'sold' : 'vendido'\n", 144 | "}\n", 145 | "dados = dados.rename(columns=a_renomear)\n", 146 | "dados.head()" 147 | ], 148 | "execution_count": 2, 149 | "outputs": [ 150 | { 151 | "output_type": "execute_result", 152 | "data": { 153 | "text/html": [ 154 | "
\n", 155 | "\n", 168 | "\n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | "
Unnamed: 0milhas_por_anoano_do_modeloprecovendido
0021801200030941.02yes
117843199840557.96yes
227109200689627.50no
3326823201595276.14no
4479352014117384.68yes
\n", 222 | "
" 223 | ], 224 | "text/plain": [ 225 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido\n", 226 | "0 0 21801 2000 30941.02 yes\n", 227 | "1 1 7843 1998 40557.96 yes\n", 228 | "2 2 7109 2006 89627.50 no\n", 229 | "3 3 26823 2015 95276.14 no\n", 230 | "4 4 7935 2014 117384.68 yes" 231 | ] 232 | }, 233 | "metadata": { 234 | "tags": [] 235 | }, 236 | "execution_count": 2 237 | } 238 | ] 239 | }, 240 | { 241 | "metadata": { 242 | "id": "J31fUGbVEKpW", 243 | "colab_type": "code", 244 | "colab": { 245 | "base_uri": "https://localhost:8080/", 246 | "height": 195 247 | }, 248 | "outputId": "ad0a651f-320a-4f5e-8e2c-706bf6937fbb" 249 | }, 250 | "cell_type": "code", 251 | "source": [ 252 | "a_trocar = {\n", 253 | " 'no' : 0,\n", 254 | " 'yes' : 1\n", 255 | "}\n", 256 | "dados.vendido = dados.vendido.map(a_trocar)\n", 257 | "dados.head()" 258 | ], 259 | "execution_count": 3, 260 | "outputs": [ 261 | { 262 | "output_type": "execute_result", 263 | "data": { 264 | "text/html": [ 265 | "
\n", 266 | "\n", 279 | "\n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | "
Unnamed: 0milhas_por_anoano_do_modeloprecovendido
0021801200030941.021
117843199840557.961
227109200689627.500
3326823201595276.140
4479352014117384.681
\n", 333 | "
" 334 | ], 335 | "text/plain": [ 336 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido\n", 337 | "0 0 21801 2000 30941.02 1\n", 338 | "1 1 7843 1998 40557.96 1\n", 339 | "2 2 7109 2006 89627.50 0\n", 340 | "3 3 26823 2015 95276.14 0\n", 341 | "4 4 7935 2014 117384.68 1" 342 | ] 343 | }, 344 | "metadata": { 345 | "tags": [] 346 | }, 347 | "execution_count": 3 348 | } 349 | ] 350 | }, 351 | { 352 | "metadata": { 353 | "id": "tZFog8O9EXYD", 354 | "colab_type": "code", 355 | "colab": { 356 | "base_uri": "https://localhost:8080/", 357 | "height": 195 358 | }, 359 | "outputId": "5792a2f8-f777-4237-e813-697f60309951" 360 | }, 361 | "cell_type": "code", 362 | "source": [ 363 | "from datetime import datetime\n", 364 | "\n", 365 | "ano_atual = datetime.today().year\n", 366 | "dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo\n", 367 | "dados.head()" 368 | ], 369 | "execution_count": 4, 370 | "outputs": [ 371 | { 372 | "output_type": "execute_result", 373 | "data": { 374 | "text/html": [ 375 | "
\n", 376 | "\n", 389 | "\n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | "
Unnamed: 0milhas_por_anoano_do_modeloprecovendidoidade_do_modelo
0021801200030941.02118
117843199840557.96120
227109200689627.50012
3326823201595276.1403
4479352014117384.6814
\n", 449 | "
" 450 | ], 451 | "text/plain": [ 452 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido \\\n", 453 | "0 0 21801 2000 30941.02 1 \n", 454 | "1 1 7843 1998 40557.96 1 \n", 455 | "2 2 7109 2006 89627.50 0 \n", 456 | "3 3 26823 2015 95276.14 0 \n", 457 | "4 4 7935 2014 117384.68 1 \n", 458 | "\n", 459 | " idade_do_modelo \n", 460 | "0 18 \n", 461 | "1 20 \n", 462 | "2 12 \n", 463 | "3 3 \n", 464 | "4 4 " 465 | ] 466 | }, 467 | "metadata": { 468 | "tags": [] 469 | }, 470 | "execution_count": 4 471 | } 472 | ] 473 | }, 474 | { 475 | "metadata": { 476 | "id": "3wWWgxhcFbR9", 477 | "colab_type": "code", 478 | "colab": { 479 | "base_uri": "https://localhost:8080/", 480 | "height": 204 481 | }, 482 | "outputId": "b4c147dc-c2e5-4bfd-e78f-5a34f72770d6" 483 | }, 484 | "cell_type": "code", 485 | "source": [ 486 | "dados['km_por_ano'] = dados.milhas_por_ano * 1.60934\n", 487 | "dados.head()" 488 | ], 489 | "execution_count": 5, 490 | "outputs": [ 491 | { 492 | "output_type": "execute_result", 493 | "data": { 494 | "text/html": [ 495 | "
\n", 496 | "\n", 509 | "\n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | "
Unnamed: 0milhas_por_anoano_do_modeloprecovendidoidade_do_modelokm_por_ano
0021801200030941.0211835085.22134
117843199840557.9612012622.05362
227109200689627.5001211440.79806
3326823201595276.140343167.32682
4479352014117384.681412770.11290
\n", 575 | "
" 576 | ], 577 | "text/plain": [ 578 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido \\\n", 579 | "0 0 21801 2000 30941.02 1 \n", 580 | "1 1 7843 1998 40557.96 1 \n", 581 | "2 2 7109 2006 89627.50 0 \n", 582 | "3 3 26823 2015 95276.14 0 \n", 583 | "4 4 7935 2014 117384.68 1 \n", 584 | "\n", 585 | " idade_do_modelo km_por_ano \n", 586 | "0 18 35085.22134 \n", 587 | "1 20 12622.05362 \n", 588 | "2 12 11440.79806 \n", 589 | "3 3 43167.32682 \n", 590 | "4 4 12770.11290 " 591 | ] 592 | }, 593 | "metadata": { 594 | "tags": [] 595 | }, 596 | "execution_count": 5 597 | } 598 | ] 599 | }, 600 | { 601 | "metadata": { 602 | "id": "MfjQNKlyFo2S", 603 | "colab_type": "code", 604 | "colab": { 605 | "base_uri": "https://localhost:8080/", 606 | "height": 204 607 | }, 608 | "outputId": "7588f6a5-ff37-4996-91a5-d9ee4b5543d7" 609 | }, 610 | "cell_type": "code", 611 | "source": [ 612 | "dados = dados.drop(columns = [\"Unnamed: 0\", \"milhas_por_ano\",\"ano_do_modelo\"], axis=1)\n", 613 | "dados.head()" 614 | ], 615 | "execution_count": 6, 616 | "outputs": [ 617 | { 618 | "output_type": "execute_result", 619 | "data": { 620 | "text/html": [ 621 | "
\n", 622 | "\n", 635 | "\n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | "
precovendidoidade_do_modelokm_por_ano
030941.0211835085.22134
140557.9612012622.05362
289627.5001211440.79806
395276.140343167.32682
4117384.681412770.11290
\n", 683 | "
" 684 | ], 685 | "text/plain": [ 686 | " preco vendido idade_do_modelo km_por_ano\n", 687 | "0 30941.02 1 18 35085.22134\n", 688 | "1 40557.96 1 20 12622.05362\n", 689 | "2 89627.50 0 12 11440.79806\n", 690 | "3 95276.14 0 3 43167.32682\n", 691 | "4 117384.68 1 4 12770.11290" 692 | ] 693 | }, 694 | "metadata": { 695 | "tags": [] 696 | }, 697 | "execution_count": 6 698 | } 699 | ] 700 | }, 701 | { 702 | "metadata": { 703 | "id": "E3xebM4FF0Tc", 704 | "colab_type": "code", 705 | "colab": { 706 | "base_uri": "https://localhost:8080/", 707 | "height": 50 708 | }, 709 | "outputId": "7d48ae66-beb0-4884-cbfa-c81c9ce95eb1" 710 | }, 711 | "cell_type": "code", 712 | "source": [ 713 | "import numpy as np\n", 714 | "from sklearn.model_selection import train_test_split\n", 715 | "from sklearn.svm import LinearSVC\n", 716 | "from sklearn.metrics import accuracy_score\n", 717 | "\n", 718 | "x = dados[[\"preco\", \"idade_do_modelo\",\"km_por_ano\"]]\n", 719 | "y = dados[\"vendido\"]\n", 720 | "\n", 721 | "SEED = 5\n", 722 | "np.random.seed(SEED)\n", 723 | "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n", 724 | " stratify = y)\n", 725 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n", 726 | "\n", 727 | "modelo = LinearSVC()\n", 728 | "modelo.fit(treino_x, treino_y)\n", 729 | "previsoes = modelo.predict(teste_x)\n", 730 | "\n", 731 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n", 732 | "print(\"A acurácia foi %.2f%%\" % acuracia)" 733 | ], 734 | "execution_count": 17, 735 | "outputs": [ 736 | { 737 | "output_type": "stream", 738 | "text": [ 739 | "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n", 740 | "A acurácia foi 57.88%\n" 741 | ], 742 | "name": "stdout" 743 | } 744 | ] 745 | }, 746 | { 747 | "metadata": { 748 | "id": "G2ZFWoPkGONL", 749 | "colab_type": "code", 750 | "colab": { 751 | "base_uri": "https://localhost:8080/", 752 | "height": 34 753 | }, 754 | "outputId": "f86fecd4-0528-420d-face-b3ab72af6a23" 755 | }, 756 | "cell_type": "code", 757 | "source": [ 758 | "from sklearn.dummy import DummyClassifier\n", 759 | "\n", 760 | "dummy_stratified = DummyClassifier()\n", 761 | "dummy_stratified.fit(treino_x, treino_y)\n", 762 | "acuracia = dummy_stratified.score(teste_x, teste_y) * 100\n", 763 | "\n", 764 | "print(\"A acurácia do dummy stratified foi %.2f%%\" % acuracia)" 765 | ], 766 | "execution_count": 18, 767 | "outputs": [ 768 | { 769 | "output_type": "stream", 770 | "text": [ 771 | "A acurácia do dummy stratified foi 52.44%\n" 772 | ], 773 | "name": "stdout" 774 | } 775 | ] 776 | }, 777 | { 778 | "metadata": { 779 | "id": "469ZKbHJIHL0", 780 | "colab_type": "code", 781 | "colab": { 782 | "base_uri": "https://localhost:8080/", 783 | "height": 34 784 | }, 785 | "outputId": "7348330f-4752-4e1e-8f27-2420acf2ad14" 786 | }, 787 | "cell_type": "code", 788 | "source": [ 789 | "from sklearn.dummy import DummyClassifier\n", 790 | "\n", 791 | "dummy_mostfrequent = DummyClassifier()\n", 792 | "dummy_mostfrequent.fit(treino_x, treino_y)\n", 793 | "acuracia = dummy_mostfrequent.score(teste_x, teste_y) * 100\n", 794 | "\n", 795 | "print(\"A acurácia do dummy mostfrequent foi %.2f%%\" % acuracia)" 796 | ], 797 | "execution_count": 19, 798 | "outputs": [ 799 | { 800 | "output_type": "stream", 801 | "text": [ 802 | "A acurácia do dummy mostfrequent foi 50.68%\n" 803 | ], 804 | "name": "stdout" 805 | } 806 | ] 807 | }, 808 | { 809 | "metadata": { 810 | "id": "S_5AfFerIsH_", 811 | "colab_type": "code", 812 | "colab": { 813 | "base_uri": "https://localhost:8080/", 814 | "height": 50 815 | }, 816 | "outputId": "569a686d-9b86-459f-d30d-5ce738bb6897" 817 | }, 818 | "cell_type": "code", 819 | "source": [ 820 | "from sklearn.preprocessing import StandardScaler\n", 821 | "from sklearn.model_selection import train_test_split\n", 822 | "from sklearn.svm import SVC\n", 823 | "from sklearn.metrics import accuracy_score\n", 824 | "\n", 825 | "SEED = 5\n", 826 | "np.random.seed(SEED)\n", 827 | "raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n", 828 | " stratify = y)\n", 829 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n", 830 | "\n", 831 | "scaler = StandardScaler()\n", 832 | "scaler.fit(raw_treino_x)\n", 833 | "treino_x = scaler.transform(raw_treino_x)\n", 834 | "teste_x = scaler.transform(raw_teste_x)\n", 835 | "\n", 836 | "modelo = SVC()\n", 837 | "modelo.fit(treino_x, treino_y)\n", 838 | "previsoes = modelo.predict(teste_x)\n", 839 | "\n", 840 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n", 841 | "print(\"A acurácia foi %.2f%%\" % acuracia)\n" 842 | ], 843 | "execution_count": 20, 844 | "outputs": [ 845 | { 846 | "output_type": "stream", 847 | "text": [ 848 | "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n", 849 | "A acurácia foi 77.48%\n" 850 | ], 851 | "name": "stdout" 852 | } 853 | ] 854 | }, 855 | { 856 | "metadata": { 857 | "id": "-1YsD-whJuGR", 858 | "colab_type": "code", 859 | "colab": {} 860 | }, 861 | "cell_type": "code", 862 | "source": [ 863 | "" 864 | ], 865 | "execution_count": 0, 866 | "outputs": [] 867 | } 868 | ] 869 | } -------------------------------------------------------------------------------- /aula5.2/introdução_a_machine_learning_4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Introdução a Machine Learning - 4.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1RpYAAROMa4C86iZscVUzaWIeVYSJapyE 8 | """ 9 | 10 | import pandas as pd 11 | 12 | uri = "https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv" 13 | dados = pd.read_csv(uri) 14 | dados.head() 15 | 16 | a_renomear = { 17 | 'mileage_per_year' : 'milhas_por_ano', 18 | 'model_year' : 'ano_do_modelo', 19 | 'price' : 'preco', 20 | 'sold' : 'vendido' 21 | } 22 | dados = dados.rename(columns=a_renomear) 23 | dados.head() 24 | 25 | a_trocar = { 26 | 'no' : 0, 27 | 'yes' : 1 28 | } 29 | dados.vendido = dados.vendido.map(a_trocar) 30 | dados.head() 31 | 32 | from datetime import datetime 33 | 34 | ano_atual = datetime.today().year 35 | dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo 36 | dados.head() 37 | 38 | dados['km_por_ano'] = dados.milhas_por_ano * 1.60934 39 | dados.head() 40 | 41 | dados = dados.drop(columns = ["Unnamed: 0", "milhas_por_ano","ano_do_modelo"], axis=1) 42 | dados.head() 43 | 44 | import numpy as np 45 | from sklearn.model_selection import train_test_split 46 | from sklearn.svm import LinearSVC 47 | from sklearn.metrics import accuracy_score 48 | 49 | x = dados[["preco", "idade_do_modelo","km_por_ano"]] 50 | y = dados["vendido"] 51 | 52 | SEED = 5 53 | np.random.seed(SEED) 54 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25, 55 | stratify = y) 56 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 57 | 58 | modelo = LinearSVC() 59 | modelo.fit(treino_x, treino_y) 60 | previsoes = modelo.predict(teste_x) 61 | 62 | acuracia = accuracy_score(teste_y, previsoes) * 100 63 | print("A acurácia foi %.2f%%" % acuracia) 64 | 65 | from sklearn.dummy import DummyClassifier 66 | 67 | dummy_stratified = DummyClassifier() 68 | dummy_stratified.fit(treino_x, treino_y) 69 | acuracia = dummy_stratified.score(teste_x, teste_y) * 100 70 | 71 | print("A acurácia do dummy stratified foi %.2f%%" % acuracia) 72 | 73 | from sklearn.dummy import DummyClassifier 74 | 75 | dummy_mostfrequent = DummyClassifier() 76 | dummy_mostfrequent.fit(treino_x, treino_y) 77 | acuracia = dummy_mostfrequent.score(teste_x, teste_y) * 100 78 | 79 | print("A acurácia do dummy mostfrequent foi %.2f%%" % acuracia) 80 | 81 | from sklearn.preprocessing import StandardScaler 82 | from sklearn.model_selection import train_test_split 83 | from sklearn.svm import SVC 84 | from sklearn.metrics import accuracy_score 85 | 86 | SEED = 5 87 | np.random.seed(SEED) 88 | raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25, 89 | stratify = y) 90 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 91 | 92 | scaler = StandardScaler() 93 | scaler.fit(raw_treino_x) 94 | treino_x = scaler.transform(raw_treino_x) 95 | teste_x = scaler.transform(raw_teste_x) 96 | 97 | modelo = SVC() 98 | modelo.fit(treino_x, treino_y) 99 | previsoes = modelo.predict(teste_x) 100 | 101 | acuracia = accuracy_score(teste_y, previsoes) * 100 102 | print("A acurácia foi %.2f%%" % acuracia) 103 | 104 | -------------------------------------------------------------------------------- /aula5.4/Introdução_a_Machine_Learning_4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Introdução a Machine Learning - 4.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "metadata": { 19 | "id": "T3ywJhRBMGvY", 20 | "colab_type": "code", 21 | "colab": { 22 | "base_uri": "https://localhost:8080/", 23 | "height": 3057 24 | }, 25 | "outputId": "fdc1e620-4be1-4b38-f525-a075971a2572" 26 | }, 27 | "cell_type": "code", 28 | "source": [ 29 | "!pip install graphviz==0.10\n", 30 | "!apt-get install graphviz" 31 | ], 32 | "execution_count": 28, 33 | "outputs": [ 34 | { 35 | "output_type": "stream", 36 | "text": [ 37 | "Requirement already satisfied: graphviz==0.10 in /usr/local/lib/python3.6/dist-packages (0.10)\n", 38 | "Reading package lists... Done\n", 39 | "Building dependency tree \n", 40 | "Reading state information... Done\n", 41 | "The following additional packages will be installed:\n", 42 | " fontconfig libann0 libcairo2 libcdt5 libcgraph6 libdatrie1 libgd3\n", 43 | " libgts-0.7-5 libgts-bin libgvc6 libgvpr2 libjbig0 liblab-gamut1 libltdl7\n", 44 | " libpango-1.0-0 libpangocairo-1.0-0 libpangoft2-1.0-0 libpathplan4\n", 45 | " libpixman-1-0 libthai-data libthai0 libtiff5 libwebp6 libxaw7 libxcb-render0\n", 46 | " libxcb-shm0 libxmu6 libxpm4 libxt6\n", 47 | "Suggested packages:\n", 48 | " gsfonts graphviz-doc libgd-tools\n", 49 | "The following NEW packages will be installed:\n", 50 | " fontconfig graphviz libann0 libcairo2 libcdt5 libcgraph6 libdatrie1 libgd3\n", 51 | " libgts-0.7-5 libgts-bin libgvc6 libgvpr2 libjbig0 liblab-gamut1 libltdl7\n", 52 | " libpango-1.0-0 libpangocairo-1.0-0 libpangoft2-1.0-0 libpathplan4\n", 53 | " libpixman-1-0 libthai-data libthai0 libtiff5 libwebp6 libxaw7 libxcb-render0\n", 54 | " libxcb-shm0 libxmu6 libxpm4 libxt6\n", 55 | "0 upgraded, 30 newly installed, 0 to remove and 12 not upgraded.\n", 56 | "Need to get 4,154 kB of archives.\n", 57 | "After this operation, 16.1 MB of additional disk space will be used.\n", 58 | "Get:1 http://archive.ubuntu.com/ubuntu bionic/main amd64 fontconfig amd64 2.12.6-0ubuntu2 [169 kB]\n", 59 | "Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libann0 amd64 1.1.2+doc-6 [24.8 kB]\n", 60 | "Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libcdt5 amd64 2.40.1-2 [19.6 kB]\n", 61 | "Get:4 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libcgraph6 amd64 2.40.1-2 [40.8 kB]\n", 62 | "Get:5 http://archive.ubuntu.com/ubuntu bionic/main amd64 libjbig0 amd64 2.1-3.1build1 [26.7 kB]\n", 63 | "Get:6 http://archive.ubuntu.com/ubuntu bionic/main amd64 libtiff5 amd64 4.0.9-5 [152 kB]\n", 64 | "Get:7 http://archive.ubuntu.com/ubuntu bionic/main amd64 libwebp6 amd64 0.6.1-2 [185 kB]\n", 65 | "Get:8 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxpm4 amd64 1:3.5.12-1 [34.0 kB]\n", 66 | "Get:9 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libgd3 amd64 2.2.5-4ubuntu0.2 [119 kB]\n", 67 | "Get:10 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgts-0.7-5 amd64 0.7.6+darcs121130-4 [150 kB]\n", 68 | "Get:11 http://archive.ubuntu.com/ubuntu bionic/main amd64 libpixman-1-0 amd64 0.34.0-2 [229 kB]\n", 69 | "Get:12 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxcb-render0 amd64 1.13-1 [14.7 kB]\n", 70 | "Get:13 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxcb-shm0 amd64 1.13-1 [5,572 B]\n", 71 | "Get:14 http://archive.ubuntu.com/ubuntu bionic/main amd64 libcairo2 amd64 1.15.10-2 [580 kB]\n", 72 | "Get:15 http://archive.ubuntu.com/ubuntu bionic/main amd64 libltdl7 amd64 2.4.6-2 [38.8 kB]\n", 73 | "Get:16 http://archive.ubuntu.com/ubuntu bionic/main amd64 libthai-data all 0.1.27-2 [133 kB]\n", 74 | "Get:17 http://archive.ubuntu.com/ubuntu bionic/main amd64 libdatrie1 amd64 0.2.10-7 [17.8 kB]\n", 75 | "Get:18 http://archive.ubuntu.com/ubuntu bionic/main amd64 libthai0 amd64 0.1.27-2 [18.0 kB]\n", 76 | "Get:19 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libpango-1.0-0 amd64 1.40.14-1ubuntu0.1 [153 kB]\n", 77 | "Get:20 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libpangoft2-1.0-0 amd64 1.40.14-1ubuntu0.1 [33.2 kB]\n", 78 | "Get:21 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libpangocairo-1.0-0 amd64 1.40.14-1ubuntu0.1 [20.8 kB]\n", 79 | "Get:22 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libpathplan4 amd64 2.40.1-2 [22.6 kB]\n", 80 | "Get:23 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgvc6 amd64 2.40.1-2 [601 kB]\n", 81 | "Get:24 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgvpr2 amd64 2.40.1-2 [169 kB]\n", 82 | "Get:25 http://archive.ubuntu.com/ubuntu bionic/universe amd64 liblab-gamut1 amd64 2.40.1-2 [178 kB]\n", 83 | "Get:26 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxt6 amd64 1:1.1.5-1 [160 kB]\n", 84 | "Get:27 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxmu6 amd64 2:1.1.2-2 [46.0 kB]\n", 85 | "Get:28 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxaw7 amd64 2:1.0.13-1 [173 kB]\n", 86 | "Get:29 http://archive.ubuntu.com/ubuntu bionic/universe amd64 graphviz amd64 2.40.1-2 [601 kB]\n", 87 | "Get:30 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgts-bin amd64 0.7.6+darcs121130-4 [41.3 kB]\n", 88 | "Fetched 4,154 kB in 2s (2,256 kB/s)\n", 89 | "Selecting previously unselected package fontconfig.\n", 90 | "(Reading database ... 22278 files and directories currently installed.)\n", 91 | "Preparing to unpack .../00-fontconfig_2.12.6-0ubuntu2_amd64.deb ...\n", 92 | "Unpacking fontconfig (2.12.6-0ubuntu2) ...\n", 93 | "Selecting previously unselected package libann0.\n", 94 | "Preparing to unpack .../01-libann0_1.1.2+doc-6_amd64.deb ...\n", 95 | "Unpacking libann0 (1.1.2+doc-6) ...\n", 96 | "Selecting previously unselected package libcdt5.\n", 97 | "Preparing to unpack .../02-libcdt5_2.40.1-2_amd64.deb ...\n", 98 | "Unpacking libcdt5 (2.40.1-2) ...\n", 99 | "Selecting previously unselected package libcgraph6.\n", 100 | "Preparing to unpack .../03-libcgraph6_2.40.1-2_amd64.deb ...\n", 101 | "Unpacking libcgraph6 (2.40.1-2) ...\n", 102 | "Selecting previously unselected package libjbig0:amd64.\n", 103 | "Preparing to unpack .../04-libjbig0_2.1-3.1build1_amd64.deb ...\n", 104 | "Unpacking libjbig0:amd64 (2.1-3.1build1) ...\n", 105 | "Selecting previously unselected package libtiff5:amd64.\n", 106 | "Preparing to unpack .../05-libtiff5_4.0.9-5_amd64.deb ...\n", 107 | "Unpacking libtiff5:amd64 (4.0.9-5) ...\n", 108 | "Selecting previously unselected package libwebp6:amd64.\n", 109 | "Preparing to unpack .../06-libwebp6_0.6.1-2_amd64.deb ...\n", 110 | "Unpacking libwebp6:amd64 (0.6.1-2) ...\n", 111 | "Selecting previously unselected package libxpm4:amd64.\n", 112 | "Preparing to unpack .../07-libxpm4_1%3a3.5.12-1_amd64.deb ...\n", 113 | "Unpacking libxpm4:amd64 (1:3.5.12-1) ...\n", 114 | "Selecting previously unselected package libgd3:amd64.\n", 115 | "Preparing to unpack .../08-libgd3_2.2.5-4ubuntu0.2_amd64.deb ...\n", 116 | "Unpacking libgd3:amd64 (2.2.5-4ubuntu0.2) ...\n", 117 | "Selecting previously unselected package libgts-0.7-5:amd64.\n", 118 | "Preparing to unpack .../09-libgts-0.7-5_0.7.6+darcs121130-4_amd64.deb ...\n", 119 | "Unpacking libgts-0.7-5:amd64 (0.7.6+darcs121130-4) ...\n", 120 | "Selecting previously unselected package libpixman-1-0:amd64.\n", 121 | "Preparing to unpack .../10-libpixman-1-0_0.34.0-2_amd64.deb ...\n", 122 | "Unpacking libpixman-1-0:amd64 (0.34.0-2) ...\n", 123 | "Selecting previously unselected package libxcb-render0:amd64.\n", 124 | "Preparing to unpack .../11-libxcb-render0_1.13-1_amd64.deb ...\n", 125 | "Unpacking libxcb-render0:amd64 (1.13-1) ...\n", 126 | "Selecting previously unselected package libxcb-shm0:amd64.\n", 127 | "Preparing to unpack .../12-libxcb-shm0_1.13-1_amd64.deb ...\n", 128 | "Unpacking libxcb-shm0:amd64 (1.13-1) ...\n", 129 | "Selecting previously unselected package libcairo2:amd64.\n", 130 | "Preparing to unpack .../13-libcairo2_1.15.10-2_amd64.deb ...\n", 131 | "Unpacking libcairo2:amd64 (1.15.10-2) ...\n", 132 | "Selecting previously unselected package libltdl7:amd64.\n", 133 | "Preparing to unpack .../14-libltdl7_2.4.6-2_amd64.deb ...\n", 134 | "Unpacking libltdl7:amd64 (2.4.6-2) ...\n", 135 | "Selecting previously unselected package libthai-data.\n", 136 | "Preparing to unpack .../15-libthai-data_0.1.27-2_all.deb ...\n", 137 | "Unpacking libthai-data (0.1.27-2) ...\n", 138 | "Selecting previously unselected package libdatrie1:amd64.\n", 139 | "Preparing to unpack .../16-libdatrie1_0.2.10-7_amd64.deb ...\n", 140 | "Unpacking libdatrie1:amd64 (0.2.10-7) ...\n", 141 | "Selecting previously unselected package libthai0:amd64.\n", 142 | "Preparing to unpack .../17-libthai0_0.1.27-2_amd64.deb ...\n", 143 | "Unpacking libthai0:amd64 (0.1.27-2) ...\n", 144 | "Selecting previously unselected package libpango-1.0-0:amd64.\n", 145 | "Preparing to unpack .../18-libpango-1.0-0_1.40.14-1ubuntu0.1_amd64.deb ...\n", 146 | "Unpacking libpango-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n", 147 | "Selecting previously unselected package libpangoft2-1.0-0:amd64.\n", 148 | "Preparing to unpack .../19-libpangoft2-1.0-0_1.40.14-1ubuntu0.1_amd64.deb ...\n", 149 | "Unpacking libpangoft2-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n", 150 | "Selecting previously unselected package libpangocairo-1.0-0:amd64.\n", 151 | "Preparing to unpack .../20-libpangocairo-1.0-0_1.40.14-1ubuntu0.1_amd64.deb ...\n", 152 | "Unpacking libpangocairo-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n", 153 | "Selecting previously unselected package libpathplan4.\n", 154 | "Preparing to unpack .../21-libpathplan4_2.40.1-2_amd64.deb ...\n", 155 | "Unpacking libpathplan4 (2.40.1-2) ...\n", 156 | "Selecting previously unselected package libgvc6.\n", 157 | "Preparing to unpack .../22-libgvc6_2.40.1-2_amd64.deb ...\n", 158 | "Unpacking libgvc6 (2.40.1-2) ...\n", 159 | "Selecting previously unselected package libgvpr2.\n", 160 | "Preparing to unpack .../23-libgvpr2_2.40.1-2_amd64.deb ...\n", 161 | "Unpacking libgvpr2 (2.40.1-2) ...\n", 162 | "Selecting previously unselected package liblab-gamut1.\n", 163 | "Preparing to unpack .../24-liblab-gamut1_2.40.1-2_amd64.deb ...\n", 164 | "Unpacking liblab-gamut1 (2.40.1-2) ...\n", 165 | "Selecting previously unselected package libxt6:amd64.\n", 166 | "Preparing to unpack .../25-libxt6_1%3a1.1.5-1_amd64.deb ...\n", 167 | "Unpacking libxt6:amd64 (1:1.1.5-1) ...\n", 168 | "Selecting previously unselected package libxmu6:amd64.\n", 169 | "Preparing to unpack .../26-libxmu6_2%3a1.1.2-2_amd64.deb ...\n", 170 | "Unpacking libxmu6:amd64 (2:1.1.2-2) ...\n", 171 | "Selecting previously unselected package libxaw7:amd64.\n", 172 | "Preparing to unpack .../27-libxaw7_2%3a1.0.13-1_amd64.deb ...\n", 173 | "Unpacking libxaw7:amd64 (2:1.0.13-1) ...\n", 174 | "Selecting previously unselected package graphviz.\n", 175 | "Preparing to unpack .../28-graphviz_2.40.1-2_amd64.deb ...\n", 176 | "Unpacking graphviz (2.40.1-2) ...\n", 177 | "Selecting previously unselected package libgts-bin.\n", 178 | "Preparing to unpack .../29-libgts-bin_0.7.6+darcs121130-4_amd64.deb ...\n", 179 | "Unpacking libgts-bin (0.7.6+darcs121130-4) ...\n", 180 | "Setting up libgts-0.7-5:amd64 (0.7.6+darcs121130-4) ...\n", 181 | "Setting up libpathplan4 (2.40.1-2) ...\n", 182 | "Setting up liblab-gamut1 (2.40.1-2) ...\n", 183 | "Setting up libxcb-render0:amd64 (1.13-1) ...\n", 184 | "Setting up libjbig0:amd64 (2.1-3.1build1) ...\n", 185 | "Setting up libdatrie1:amd64 (0.2.10-7) ...\n", 186 | "Setting up libtiff5:amd64 (4.0.9-5) ...\n", 187 | "Setting up libpixman-1-0:amd64 (0.34.0-2) ...\n", 188 | "Processing triggers for libc-bin (2.27-3ubuntu1) ...\n", 189 | "Setting up libltdl7:amd64 (2.4.6-2) ...\n", 190 | "Setting up libann0 (1.1.2+doc-6) ...\n", 191 | "Setting up libxcb-shm0:amd64 (1.13-1) ...\n", 192 | "Setting up libxpm4:amd64 (1:3.5.12-1) ...\n", 193 | "Setting up libxt6:amd64 (1:1.1.5-1) ...\n", 194 | "Setting up libgts-bin (0.7.6+darcs121130-4) ...\n", 195 | "Setting up libthai-data (0.1.27-2) ...\n", 196 | "Setting up libcdt5 (2.40.1-2) ...\n", 197 | "Setting up fontconfig (2.12.6-0ubuntu2) ...\n", 198 | "Regenerating fonts cache... done.\n", 199 | "Setting up libcgraph6 (2.40.1-2) ...\n", 200 | "Setting up libwebp6:amd64 (0.6.1-2) ...\n", 201 | "Setting up libcairo2:amd64 (1.15.10-2) ...\n", 202 | "Setting up libgvpr2 (2.40.1-2) ...\n", 203 | "Setting up libgd3:amd64 (2.2.5-4ubuntu0.2) ...\n", 204 | "Setting up libthai0:amd64 (0.1.27-2) ...\n", 205 | "Setting up libxmu6:amd64 (2:1.1.2-2) ...\n", 206 | "Setting up libpango-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n", 207 | "Setting up libxaw7:amd64 (2:1.0.13-1) ...\n", 208 | "Setting up libpangoft2-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n", 209 | "Setting up libpangocairo-1.0-0:amd64 (1.40.14-1ubuntu0.1) ...\n", 210 | "Setting up libgvc6 (2.40.1-2) ...\n", 211 | "Setting up graphviz (2.40.1-2) ...\n", 212 | "Processing triggers for libc-bin (2.27-3ubuntu1) ...\n" 213 | ], 214 | "name": "stdout" 215 | } 216 | ] 217 | }, 218 | { 219 | "metadata": { 220 | "id": "y7ik04NlDZMA", 221 | "colab_type": "code", 222 | "colab": { 223 | "base_uri": "https://localhost:8080/", 224 | "height": 198 225 | }, 226 | "outputId": "73c13868-60e4-4a25-9338-02f324cabb18" 227 | }, 228 | "cell_type": "code", 229 | "source": [ 230 | "import pandas as pd\n", 231 | "\n", 232 | "uri = \"https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv\"\n", 233 | "dados = pd.read_csv(uri)\n", 234 | "dados.head()" 235 | ], 236 | "execution_count": 1, 237 | "outputs": [ 238 | { 239 | "output_type": "execute_result", 240 | "data": { 241 | "text/html": [ 242 | "
\n", 243 | "\n", 256 | "\n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | "
Unnamed: 0mileage_per_yearmodel_yearpricesold
0021801200030941.02yes
117843199840557.96yes
227109200689627.50no
3326823201595276.14no
4479352014117384.68yes
\n", 310 | "
" 311 | ], 312 | "text/plain": [ 313 | " Unnamed: 0 mileage_per_year model_year price sold\n", 314 | "0 0 21801 2000 30941.02 yes\n", 315 | "1 1 7843 1998 40557.96 yes\n", 316 | "2 2 7109 2006 89627.50 no\n", 317 | "3 3 26823 2015 95276.14 no\n", 318 | "4 4 7935 2014 117384.68 yes" 319 | ] 320 | }, 321 | "metadata": { 322 | "tags": [] 323 | }, 324 | "execution_count": 1 325 | } 326 | ] 327 | }, 328 | { 329 | "metadata": { 330 | "id": "FqFrPmgJDhvM", 331 | "colab_type": "code", 332 | "colab": { 333 | "base_uri": "https://localhost:8080/", 334 | "height": 198 335 | }, 336 | "outputId": "46b16944-2a4a-4b53-b970-5a96a9b7d867" 337 | }, 338 | "cell_type": "code", 339 | "source": [ 340 | "a_renomear = {\n", 341 | " 'mileage_per_year' : 'milhas_por_ano',\n", 342 | " 'model_year' : 'ano_do_modelo',\n", 343 | " 'price' : 'preco',\n", 344 | " 'sold' : 'vendido'\n", 345 | "}\n", 346 | "dados = dados.rename(columns=a_renomear)\n", 347 | "dados.head()" 348 | ], 349 | "execution_count": 2, 350 | "outputs": [ 351 | { 352 | "output_type": "execute_result", 353 | "data": { 354 | "text/html": [ 355 | "
\n", 356 | "\n", 369 | "\n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | "
Unnamed: 0milhas_por_anoano_do_modeloprecovendido
0021801200030941.02yes
117843199840557.96yes
227109200689627.50no
3326823201595276.14no
4479352014117384.68yes
\n", 423 | "
" 424 | ], 425 | "text/plain": [ 426 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido\n", 427 | "0 0 21801 2000 30941.02 yes\n", 428 | "1 1 7843 1998 40557.96 yes\n", 429 | "2 2 7109 2006 89627.50 no\n", 430 | "3 3 26823 2015 95276.14 no\n", 431 | "4 4 7935 2014 117384.68 yes" 432 | ] 433 | }, 434 | "metadata": { 435 | "tags": [] 436 | }, 437 | "execution_count": 2 438 | } 439 | ] 440 | }, 441 | { 442 | "metadata": { 443 | "id": "J31fUGbVEKpW", 444 | "colab_type": "code", 445 | "colab": { 446 | "base_uri": "https://localhost:8080/", 447 | "height": 198 448 | }, 449 | "outputId": "ad0a651f-320a-4f5e-8e2c-706bf6937fbb" 450 | }, 451 | "cell_type": "code", 452 | "source": [ 453 | "a_trocar = {\n", 454 | " 'no' : 0,\n", 455 | " 'yes' : 1\n", 456 | "}\n", 457 | "dados.vendido = dados.vendido.map(a_trocar)\n", 458 | "dados.head()" 459 | ], 460 | "execution_count": 3, 461 | "outputs": [ 462 | { 463 | "output_type": "execute_result", 464 | "data": { 465 | "text/html": [ 466 | "
\n", 467 | "\n", 480 | "\n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | "
Unnamed: 0milhas_por_anoano_do_modeloprecovendido
0021801200030941.021
117843199840557.961
227109200689627.500
3326823201595276.140
4479352014117384.681
\n", 534 | "
" 535 | ], 536 | "text/plain": [ 537 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido\n", 538 | "0 0 21801 2000 30941.02 1\n", 539 | "1 1 7843 1998 40557.96 1\n", 540 | "2 2 7109 2006 89627.50 0\n", 541 | "3 3 26823 2015 95276.14 0\n", 542 | "4 4 7935 2014 117384.68 1" 543 | ] 544 | }, 545 | "metadata": { 546 | "tags": [] 547 | }, 548 | "execution_count": 3 549 | } 550 | ] 551 | }, 552 | { 553 | "metadata": { 554 | "id": "tZFog8O9EXYD", 555 | "colab_type": "code", 556 | "colab": { 557 | "base_uri": "https://localhost:8080/", 558 | "height": 198 559 | }, 560 | "outputId": "5792a2f8-f777-4237-e813-697f60309951" 561 | }, 562 | "cell_type": "code", 563 | "source": [ 564 | "from datetime import datetime\n", 565 | "\n", 566 | "ano_atual = datetime.today().year\n", 567 | "dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo\n", 568 | "dados.head()" 569 | ], 570 | "execution_count": 4, 571 | "outputs": [ 572 | { 573 | "output_type": "execute_result", 574 | "data": { 575 | "text/html": [ 576 | "
\n", 577 | "\n", 590 | "\n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | "
Unnamed: 0milhas_por_anoano_do_modeloprecovendidoidade_do_modelo
0021801200030941.02118
117843199840557.96120
227109200689627.50012
3326823201595276.1403
4479352014117384.6814
\n", 650 | "
" 651 | ], 652 | "text/plain": [ 653 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido \\\n", 654 | "0 0 21801 2000 30941.02 1 \n", 655 | "1 1 7843 1998 40557.96 1 \n", 656 | "2 2 7109 2006 89627.50 0 \n", 657 | "3 3 26823 2015 95276.14 0 \n", 658 | "4 4 7935 2014 117384.68 1 \n", 659 | "\n", 660 | " idade_do_modelo \n", 661 | "0 18 \n", 662 | "1 20 \n", 663 | "2 12 \n", 664 | "3 3 \n", 665 | "4 4 " 666 | ] 667 | }, 668 | "metadata": { 669 | "tags": [] 670 | }, 671 | "execution_count": 4 672 | } 673 | ] 674 | }, 675 | { 676 | "metadata": { 677 | "id": "3wWWgxhcFbR9", 678 | "colab_type": "code", 679 | "colab": { 680 | "base_uri": "https://localhost:8080/", 681 | "height": 198 682 | }, 683 | "outputId": "b4c147dc-c2e5-4bfd-e78f-5a34f72770d6" 684 | }, 685 | "cell_type": "code", 686 | "source": [ 687 | "dados['km_por_ano'] = dados.milhas_por_ano * 1.60934\n", 688 | "dados.head()" 689 | ], 690 | "execution_count": 5, 691 | "outputs": [ 692 | { 693 | "output_type": "execute_result", 694 | "data": { 695 | "text/html": [ 696 | "
\n", 697 | "\n", 710 | "\n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | "
Unnamed: 0milhas_por_anoano_do_modeloprecovendidoidade_do_modelokm_por_ano
0021801200030941.0211835085.22134
117843199840557.9612012622.05362
227109200689627.5001211440.79806
3326823201595276.140343167.32682
4479352014117384.681412770.11290
\n", 776 | "
" 777 | ], 778 | "text/plain": [ 779 | " Unnamed: 0 milhas_por_ano ano_do_modelo preco vendido \\\n", 780 | "0 0 21801 2000 30941.02 1 \n", 781 | "1 1 7843 1998 40557.96 1 \n", 782 | "2 2 7109 2006 89627.50 0 \n", 783 | "3 3 26823 2015 95276.14 0 \n", 784 | "4 4 7935 2014 117384.68 1 \n", 785 | "\n", 786 | " idade_do_modelo km_por_ano \n", 787 | "0 18 35085.22134 \n", 788 | "1 20 12622.05362 \n", 789 | "2 12 11440.79806 \n", 790 | "3 3 43167.32682 \n", 791 | "4 4 12770.11290 " 792 | ] 793 | }, 794 | "metadata": { 795 | "tags": [] 796 | }, 797 | "execution_count": 5 798 | } 799 | ] 800 | }, 801 | { 802 | "metadata": { 803 | "id": "MfjQNKlyFo2S", 804 | "colab_type": "code", 805 | "colab": { 806 | "base_uri": "https://localhost:8080/", 807 | "height": 198 808 | }, 809 | "outputId": "7588f6a5-ff37-4996-91a5-d9ee4b5543d7" 810 | }, 811 | "cell_type": "code", 812 | "source": [ 813 | "dados = dados.drop(columns = [\"Unnamed: 0\", \"milhas_por_ano\",\"ano_do_modelo\"], axis=1)\n", 814 | "dados.head()" 815 | ], 816 | "execution_count": 6, 817 | "outputs": [ 818 | { 819 | "output_type": "execute_result", 820 | "data": { 821 | "text/html": [ 822 | "
\n", 823 | "\n", 836 | "\n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | "
precovendidoidade_do_modelokm_por_ano
030941.0211835085.22134
140557.9612012622.05362
289627.5001211440.79806
395276.140343167.32682
4117384.681412770.11290
\n", 884 | "
" 885 | ], 886 | "text/plain": [ 887 | " preco vendido idade_do_modelo km_por_ano\n", 888 | "0 30941.02 1 18 35085.22134\n", 889 | "1 40557.96 1 20 12622.05362\n", 890 | "2 89627.50 0 12 11440.79806\n", 891 | "3 95276.14 0 3 43167.32682\n", 892 | "4 117384.68 1 4 12770.11290" 893 | ] 894 | }, 895 | "metadata": { 896 | "tags": [] 897 | }, 898 | "execution_count": 6 899 | } 900 | ] 901 | }, 902 | { 903 | "metadata": { 904 | "id": "E3xebM4FF0Tc", 905 | "colab_type": "code", 906 | "colab": { 907 | "base_uri": "https://localhost:8080/", 908 | "height": 52 909 | }, 910 | "outputId": "7d48ae66-beb0-4884-cbfa-c81c9ce95eb1" 911 | }, 912 | "cell_type": "code", 913 | "source": [ 914 | "import numpy as np\n", 915 | "from sklearn.model_selection import train_test_split\n", 916 | "from sklearn.svm import LinearSVC\n", 917 | "from sklearn.metrics import accuracy_score\n", 918 | "\n", 919 | "x = dados[[\"preco\", \"idade_do_modelo\",\"km_por_ano\"]]\n", 920 | "y = dados[\"vendido\"]\n", 921 | "\n", 922 | "SEED = 5\n", 923 | "np.random.seed(SEED)\n", 924 | "treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n", 925 | " stratify = y)\n", 926 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n", 927 | "\n", 928 | "modelo = LinearSVC()\n", 929 | "modelo.fit(treino_x, treino_y)\n", 930 | "previsoes = modelo.predict(teste_x)\n", 931 | "\n", 932 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n", 933 | "print(\"A acurácia foi %.2f%%\" % acuracia)" 934 | ], 935 | "execution_count": 17, 936 | "outputs": [ 937 | { 938 | "output_type": "stream", 939 | "text": [ 940 | "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n", 941 | "A acurácia foi 57.88%\n" 942 | ], 943 | "name": "stdout" 944 | } 945 | ] 946 | }, 947 | { 948 | "metadata": { 949 | "id": "G2ZFWoPkGONL", 950 | "colab_type": "code", 951 | "colab": { 952 | "base_uri": "https://localhost:8080/", 953 | "height": 35 954 | }, 955 | "outputId": "f86fecd4-0528-420d-face-b3ab72af6a23" 956 | }, 957 | "cell_type": "code", 958 | "source": [ 959 | "from sklearn.dummy import DummyClassifier\n", 960 | "\n", 961 | "dummy_stratified = DummyClassifier()\n", 962 | "dummy_stratified.fit(treino_x, treino_y)\n", 963 | "acuracia = dummy_stratified.score(teste_x, teste_y) * 100\n", 964 | "\n", 965 | "print(\"A acurácia do dummy stratified foi %.2f%%\" % acuracia)" 966 | ], 967 | "execution_count": 18, 968 | "outputs": [ 969 | { 970 | "output_type": "stream", 971 | "text": [ 972 | "A acurácia do dummy stratified foi 52.44%\n" 973 | ], 974 | "name": "stdout" 975 | } 976 | ] 977 | }, 978 | { 979 | "metadata": { 980 | "id": "469ZKbHJIHL0", 981 | "colab_type": "code", 982 | "colab": { 983 | "base_uri": "https://localhost:8080/", 984 | "height": 35 985 | }, 986 | "outputId": "7348330f-4752-4e1e-8f27-2420acf2ad14" 987 | }, 988 | "cell_type": "code", 989 | "source": [ 990 | "from sklearn.dummy import DummyClassifier\n", 991 | "\n", 992 | "dummy_mostfrequent = DummyClassifier()\n", 993 | "dummy_mostfrequent.fit(treino_x, treino_y)\n", 994 | "acuracia = dummy_mostfrequent.score(teste_x, teste_y) * 100\n", 995 | "\n", 996 | "print(\"A acurácia do dummy mostfrequent foi %.2f%%\" % acuracia)" 997 | ], 998 | "execution_count": 19, 999 | "outputs": [ 1000 | { 1001 | "output_type": "stream", 1002 | "text": [ 1003 | "A acurácia do dummy mostfrequent foi 50.68%\n" 1004 | ], 1005 | "name": "stdout" 1006 | } 1007 | ] 1008 | }, 1009 | { 1010 | "metadata": { 1011 | "id": "S_5AfFerIsH_", 1012 | "colab_type": "code", 1013 | "colab": { 1014 | "base_uri": "https://localhost:8080/", 1015 | "height": 52 1016 | }, 1017 | "outputId": "569a686d-9b86-459f-d30d-5ce738bb6897" 1018 | }, 1019 | "cell_type": "code", 1020 | "source": [ 1021 | "from sklearn.preprocessing import StandardScaler\n", 1022 | "from sklearn.model_selection import train_test_split\n", 1023 | "from sklearn.svm import SVC\n", 1024 | "from sklearn.metrics import accuracy_score\n", 1025 | "\n", 1026 | "SEED = 5\n", 1027 | "np.random.seed(SEED)\n", 1028 | "raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n", 1029 | " stratify = y)\n", 1030 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n", 1031 | "\n", 1032 | "scaler = StandardScaler()\n", 1033 | "scaler.fit(raw_treino_x)\n", 1034 | "treino_x = scaler.transform(raw_treino_x)\n", 1035 | "teste_x = scaler.transform(raw_teste_x)\n", 1036 | "\n", 1037 | "modelo = SVC()\n", 1038 | "modelo.fit(treino_x, treino_y)\n", 1039 | "previsoes = modelo.predict(teste_x)\n", 1040 | "\n", 1041 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n", 1042 | "print(\"A acurácia foi %.2f%%\" % acuracia)\n" 1043 | ], 1044 | "execution_count": 20, 1045 | "outputs": [ 1046 | { 1047 | "output_type": "stream", 1048 | "text": [ 1049 | "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n", 1050 | "A acurácia foi 77.48%\n" 1051 | ], 1052 | "name": "stdout" 1053 | } 1054 | ] 1055 | }, 1056 | { 1057 | "metadata": { 1058 | "id": "-1YsD-whJuGR", 1059 | "colab_type": "code", 1060 | "colab": { 1061 | "base_uri": "https://localhost:8080/", 1062 | "height": 52 1063 | }, 1064 | "outputId": "5e0c7f00-98ce-4b32-bbc6-908dde4cc2f8" 1065 | }, 1066 | "cell_type": "code", 1067 | "source": [ 1068 | "from sklearn.preprocessing import StandardScaler\n", 1069 | "from sklearn.model_selection import train_test_split\n", 1070 | "from sklearn.tree import DecisionTreeClassifier\n", 1071 | "from sklearn.metrics import accuracy_score\n", 1072 | "\n", 1073 | "SEED = 5\n", 1074 | "np.random.seed(SEED)\n", 1075 | "raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25,\n", 1076 | " stratify = y)\n", 1077 | "print(\"Treinaremos com %d elementos e testaremos com %d elementos\" % (len(treino_x), len(teste_x)))\n", 1078 | "\n", 1079 | "modelo = DecisionTreeClassifier(max_depth=3)\n", 1080 | "modelo.fit(raw_treino_x, treino_y)\n", 1081 | "previsoes = modelo.predict(raw_teste_x)\n", 1082 | "\n", 1083 | "acuracia = accuracy_score(teste_y, previsoes) * 100\n", 1084 | "print(\"A acurácia foi %.2f%%\" % acuracia)\n" 1085 | ], 1086 | "execution_count": 37, 1087 | "outputs": [ 1088 | { 1089 | "output_type": "stream", 1090 | "text": [ 1091 | "Treinaremos com 7500 elementos e testaremos com 2500 elementos\n", 1092 | "A acurácia foi 79.28%\n" 1093 | ], 1094 | "name": "stdout" 1095 | } 1096 | ] 1097 | }, 1098 | { 1099 | "metadata": { 1100 | "id": "-8O83jbOLctO", 1101 | "colab_type": "code", 1102 | "colab": { 1103 | "base_uri": "https://localhost:8080/", 1104 | "height": 619 1105 | }, 1106 | "outputId": "8ddc6972-e0a7-40d3-adba-0877358ab355" 1107 | }, 1108 | "cell_type": "code", 1109 | "source": [ 1110 | "from sklearn.tree import export_graphviz\n", 1111 | "import graphviz\n", 1112 | "\n", 1113 | "features = x.columns\n", 1114 | "dot_data = export_graphviz(modelo, out_file=None,\n", 1115 | " filled = True, rounded = True,\n", 1116 | " feature_names = features,\n", 1117 | " class_names = [\"não\", \"sim\"])\n", 1118 | "grafico = graphviz.Source(dot_data)\n", 1119 | "grafico" 1120 | ], 1121 | "execution_count": 38, 1122 | "outputs": [ 1123 | { 1124 | "output_type": "execute_result", 1125 | "data": { 1126 | "text/plain": [ 1127 | "" 1128 | ], 1129 | "image/svg+xml": "\n\n\n\n\n\nTree\n\n\n\n0\n\npreco <= 59999.074\ngini = 0.487\nsamples = 7500\nvalue = [3150, 4350]\nclass = sim\n\n\n\n1\n\npreco <= 40070.156\ngini = 0.202\nsamples = 3248\nvalue = [370, 2878]\nclass = sim\n\n\n\n0->1\n\n\nTrue\n\n\n\n6\n\nkm_por_ano <= 24112.742\ngini = 0.453\nsamples = 4252\nvalue = [2780, 1472]\nclass = não\n\n\n\n0->6\n\n\nFalse\n\n\n\n2\n\ngini = 0.0\nsamples = 1441\nvalue = [0, 1441]\nclass = sim\n\n\n\n1->2\n\n\n\n\n\n3\n\npreco <= 40723.648\ngini = 0.326\nsamples = 1807\nvalue = [370, 1437]\nclass = sim\n\n\n\n1->3\n\n\n\n\n\n4\n\ngini = 0.441\nsamples = 61\nvalue = [20, 41]\nclass = sim\n\n\n\n3->4\n\n\n\n\n\n5\n\ngini = 0.321\nsamples = 1746\nvalue = [350, 1396]\nclass = sim\n\n\n\n3->5\n\n\n\n\n\n7\n\npreco <= 99974.406\ngini = 0.498\nsamples = 2649\nvalue = [1409, 1240]\nclass = não\n\n\n\n6->7\n\n\n\n\n\n10\n\npreco <= 100076.953\ngini = 0.248\nsamples = 1603\nvalue = [1371, 232]\nclass = não\n\n\n\n6->10\n\n\n\n\n\n8\n\ngini = 0.49\nsamples = 2223\nvalue = [1266, 957]\nclass = não\n\n\n\n7->8\n\n\n\n\n\n9\n\ngini = 0.446\nsamples = 426\nvalue = [143, 283]\nclass = sim\n\n\n\n7->9\n\n\n\n\n\n11\n\ngini = 0.086\nsamples = 1356\nvalue = [1295, 61]\nclass = não\n\n\n\n10->11\n\n\n\n\n\n12\n\ngini = 0.426\nsamples = 247\nvalue = [76, 171]\nclass = sim\n\n\n\n10->12\n\n\n\n\n\n" 1130 | }, 1131 | "metadata": { 1132 | "tags": [] 1133 | }, 1134 | "execution_count": 38 1135 | } 1136 | ] 1137 | }, 1138 | { 1139 | "metadata": { 1140 | "id": "TLIr9EPALvM4", 1141 | "colab_type": "code", 1142 | "colab": {} 1143 | }, 1144 | "cell_type": "code", 1145 | "source": [ 1146 | "" 1147 | ], 1148 | "execution_count": 0, 1149 | "outputs": [] 1150 | } 1151 | ] 1152 | } -------------------------------------------------------------------------------- /aula5.4/introdução_a_machine_learning_4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Introdução a Machine Learning - 4.ipynb 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1RpYAAROMa4C86iZscVUzaWIeVYSJapyE 8 | """ 9 | 10 | !pip install graphviz==0.10 11 | !apt-get install graphviz 12 | 13 | import pandas as pd 14 | 15 | uri = "https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv" 16 | dados = pd.read_csv(uri) 17 | dados.head() 18 | 19 | a_renomear = { 20 | 'mileage_per_year' : 'milhas_por_ano', 21 | 'model_year' : 'ano_do_modelo', 22 | 'price' : 'preco', 23 | 'sold' : 'vendido' 24 | } 25 | dados = dados.rename(columns=a_renomear) 26 | dados.head() 27 | 28 | a_trocar = { 29 | 'no' : 0, 30 | 'yes' : 1 31 | } 32 | dados.vendido = dados.vendido.map(a_trocar) 33 | dados.head() 34 | 35 | from datetime import datetime 36 | 37 | ano_atual = datetime.today().year 38 | dados['idade_do_modelo'] = ano_atual - dados.ano_do_modelo 39 | dados.head() 40 | 41 | dados['km_por_ano'] = dados.milhas_por_ano * 1.60934 42 | dados.head() 43 | 44 | dados = dados.drop(columns = ["Unnamed: 0", "milhas_por_ano","ano_do_modelo"], axis=1) 45 | dados.head() 46 | 47 | import numpy as np 48 | from sklearn.model_selection import train_test_split 49 | from sklearn.svm import LinearSVC 50 | from sklearn.metrics import accuracy_score 51 | 52 | x = dados[["preco", "idade_do_modelo","km_por_ano"]] 53 | y = dados["vendido"] 54 | 55 | SEED = 5 56 | np.random.seed(SEED) 57 | treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25, 58 | stratify = y) 59 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 60 | 61 | modelo = LinearSVC() 62 | modelo.fit(treino_x, treino_y) 63 | previsoes = modelo.predict(teste_x) 64 | 65 | acuracia = accuracy_score(teste_y, previsoes) * 100 66 | print("A acurácia foi %.2f%%" % acuracia) 67 | 68 | from sklearn.dummy import DummyClassifier 69 | 70 | dummy_stratified = DummyClassifier() 71 | dummy_stratified.fit(treino_x, treino_y) 72 | acuracia = dummy_stratified.score(teste_x, teste_y) * 100 73 | 74 | print("A acurácia do dummy stratified foi %.2f%%" % acuracia) 75 | 76 | from sklearn.dummy import DummyClassifier 77 | 78 | dummy_mostfrequent = DummyClassifier() 79 | dummy_mostfrequent.fit(treino_x, treino_y) 80 | acuracia = dummy_mostfrequent.score(teste_x, teste_y) * 100 81 | 82 | print("A acurácia do dummy mostfrequent foi %.2f%%" % acuracia) 83 | 84 | from sklearn.preprocessing import StandardScaler 85 | from sklearn.model_selection import train_test_split 86 | from sklearn.svm import SVC 87 | from sklearn.metrics import accuracy_score 88 | 89 | SEED = 5 90 | np.random.seed(SEED) 91 | raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25, 92 | stratify = y) 93 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 94 | 95 | scaler = StandardScaler() 96 | scaler.fit(raw_treino_x) 97 | treino_x = scaler.transform(raw_treino_x) 98 | teste_x = scaler.transform(raw_teste_x) 99 | 100 | modelo = SVC() 101 | modelo.fit(treino_x, treino_y) 102 | previsoes = modelo.predict(teste_x) 103 | 104 | acuracia = accuracy_score(teste_y, previsoes) * 100 105 | print("A acurácia foi %.2f%%" % acuracia) 106 | 107 | from sklearn.preprocessing import StandardScaler 108 | from sklearn.model_selection import train_test_split 109 | from sklearn.tree import DecisionTreeClassifier 110 | from sklearn.metrics import accuracy_score 111 | 112 | SEED = 5 113 | np.random.seed(SEED) 114 | raw_treino_x, raw_teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.25, 115 | stratify = y) 116 | print("Treinaremos com %d elementos e testaremos com %d elementos" % (len(treino_x), len(teste_x))) 117 | 118 | modelo = DecisionTreeClassifier(max_depth=3) 119 | modelo.fit(raw_treino_x, treino_y) 120 | previsoes = modelo.predict(raw_teste_x) 121 | 122 | acuracia = accuracy_score(teste_y, previsoes) * 100 123 | print("A acurácia foi %.2f%%" % acuracia) 124 | 125 | from sklearn.tree import export_graphviz 126 | import graphviz 127 | 128 | features = x.columns 129 | dot_data = export_graphviz(modelo, out_file=None, 130 | filled = True, rounded = True, 131 | feature_names = features, 132 | class_names = ["não", "sim"]) 133 | grafico = graphviz.Source(dot_data) 134 | grafico 135 | 136 | --------------------------------------------------------------------------------