├── README.md ├── Tematik_Egitimler ├── Derin_Ogrenme_ile_Goruntu_Isleme_Temmuz_2024 │ ├── Goruntu_Isleme_Yapay_Sinir_Aglari_Derin_Ogrenme_Giris │ │ ├── CNN_Giris.ipynb │ │ ├── CNN_Mimarileri.ipynb │ │ ├── CNN_SVM_KNN.ipynb │ │ ├── Confusion_Matrix.ipynb │ │ ├── Model_Mimarileri_3.ipynb │ │ └── YSA_Giris.ipynb │ ├── Nesne_Tespiti │ │ ├── Faster_RCNN.ipynb │ │ ├── OD_1.ipynb │ │ ├── OD_2.ipynb │ │ ├── OD_3.ipynb │ │ └── OD_Yolol.ipynb │ └── Uygulamalar │ │ ├── T_Uyg_1_Overfitting.ipynb │ │ ├── T_Uyg_2_Underfitting.ipynb │ │ ├── T_Uyg_3_Model_kaydetme_ve_test.ipynb │ │ ├── T_Uyg_4_automatic_mask_generator.ipynb │ │ └── T_Uyg_5_predictor.ipynb ├── Makine_Ogrenmesi_Mayis_2024 │ ├── Kumeleme_Regresyon │ │ ├── clustering.ipynb │ │ ├── data │ │ │ ├── experience_salary_dataset │ │ │ └── experience_sale_dataset │ │ ├── non_linear_regression.ipynb │ │ └── simple_linear_regression.ipynb │ ├── Siniflandirma │ │ ├── siniflandirma.ipynb │ │ ├── siniflandirma_karsilastirma_1.ipynb │ │ └── siniflandirma_karsilastirma_2.ipynb │ ├── Uygulama_1 │ │ ├── SMOTE_example.ipynb │ │ ├── clus_crop_rec.ipynb │ │ ├── clus_iris.ipynb │ │ ├── data │ │ │ ├── Crop_Recommendation.csv │ │ │ ├── ankara_hk_data.csv │ │ │ ├── diamonds.csv │ │ │ ├── iris.csv │ │ │ └── temp_export_dir │ │ │ │ ├── nanned.csv │ │ │ │ ├── org.csv │ │ │ │ ├── station_14_nanned.csv │ │ │ │ ├── station_14_org.csv │ │ │ │ ├── station_15_nanned.csv │ │ │ │ ├── station_15_org.csv │ │ │ │ ├── station_17_nanned.csv │ │ │ │ ├── station_17_org.csv │ │ │ │ ├── station_1_nanned.csv │ │ │ │ ├── station_1_org.csv │ │ │ │ ├── station_2_nanned.csv │ │ │ │ ├── station_2_org.csv │ │ │ │ ├── station_9_nanned.csv │ │ │ │ └── station_9_org.csv │ │ ├── encoding_test.ipynb │ │ ├── missing_data_imputation.ipynb │ │ ├── reg_diamonds.ipynb │ │ ├── transformations.py │ │ ├── transformations_bcyj.ipynb │ │ ├── utils_encoding.py │ │ └── utils_missing_data.py │ └── Uygulama_2 │ │ ├── Sınıflandırma_Uygulama.ipynb │ │ ├── data │ │ ├── car_prices.csv │ │ ├── diabetes.csv │ │ └── diamonds.csv │ │ ├── reg_car_prices.ipynb │ │ └── reg_diamonds.ipynb └── Makine_Ogrenmesi_Temmuz_2024 │ └── Regresyon │ ├── data │ ├── car_prices.csv │ ├── diamonds.csv │ ├── experience_salary_dataset │ └── experience_sale_dataset │ ├── non_linear_regression.ipynb │ ├── reg_car_prices.ipynb │ ├── reg_diamonds.ipynb │ └── simple_linear_regression.ipynb ├── Webinar ├── Webinar-II-VeriOnisleme │ ├── data │ │ └── carprices.csv │ ├── data_generator.py │ ├── discretization.ipynb │ ├── transformations.py │ ├── transformations_basic.ipynb │ └── transformations_bcyj.ipynb ├── Webinar-IV-Sağlıkta YZ Uygulamaları | Mamografi Görüntülerinden Kitle Tespiti-II │ └── inbreastDataPreparing.ipynb ├── Webinar-V-Ozellik Muhendisligi │ ├── LDA_1.ipynb │ ├── LDA_2.ipynb │ ├── PCA_1.ipynb │ └── PCA_2.ipynb └── Webinar-VII-YOLO ile Mamografi Görüntülerinden Kitle Tespit Uygulaması │ ├── data.yaml │ ├── dataPreparation.ipynb │ ├── utils.py │ └── yoloImplementation.ipynb └── docs └── dijitalgencyze-icon.jpg /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | **

Dijital Genç Yapay Zekâ Ekosistemi

** 5 | 6 | Cumhurbaşkanlığı Dijital Dönüşüm Ofisi Dijital Genç Yapay Zekâ Ekosistemi, üniversitelerdeki yapay zekâ ile ilgili öğrenci kulüplerinin tek bir çatı altında toplayarak bu alanlardaki potansiyel nitelikli iş gücünün uygulamaya dayalı eğitimler, yarışmalar ve eğitim sonrası kariyer ödülleri ile desteklenmesi hedefiyle kurulmuştur. İlgili kulüplere dahil olan yapay zekâ ile ilgilenen on binlerce öğrencimizin eğitim hayatı kamu-akademi iş birliğiyle desteklenerek, mezuniyet sonrası süreçte kaliteli bir kariyer yolu çizilmesi konusunda imkân sağlanacaktır. 7 | 8 | Dijital Genç Yapay Zekâ Ekosistemi kapsamında düzenlenecek etkinlikleri ve güncel duyuruları aşağıdaki kanallardan takip edebilirsiniz: 9 | 10 | * **Telegram:** https://t.me/dijitalgencyz. 11 | 12 | * **Whatsapp:** https://whatsapp.com/channel/0029VaccbAfA2pL69lRYSM27 13 | 14 | * **bip:** https://channels.bip.ai/join/dijitalgencyz 15 | 16 | Dijital Genç Yapay Zekâ Ekosistemine dair sıkça sorulan sorulara [buradan](https://cbddo.gov.tr/sss/dijital-genc/) ulaşabilirsiniz. 17 | 18 | Soru, görüş ve önerileriniz için Başkanlığımızla dijitalgenc@cbddo.gov.tr adresi üzerinden iletişime geçebilirsiniz. 19 | 20 |
21 | 22 | ## **[DDYM Yapay Zeka Egitimleri](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/DDYM_Yapay_Zeka_Egitimi_2024)** 23 | 24 | ## **Aylık Webinarlar** 25 | 26 | * Webinar I - **Veri Operasyonları** 27 | * Webinar II - **[Veri Ön İşleme](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Webinar/Webinar-II-VeriOnisleme)** 28 | * Webinar III - **[Sağlıkta YZ Uygulamaları | Mamografi Görüntülerinden Kitle Tespiti-I](https://github.com/cbddobvyz/digitaleye-mammography)** 29 | * Webinar IV - **[Sağlıkta YZ Uygulamaları | Mamografi Görüntülerinden Kitle Tespiti-II](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Webinar/Webinar-IV-Sa%C4%9Fl%C4%B1kta%20YZ%20Uygulamalar%C4%B1%20%7C%20Mamografi%20G%C3%B6r%C3%BCnt%C3%BClerinden%20Kitle%20Tespiti-II)** 30 | * Webinar V - **[Ozellik Muhendisligi](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Webinar/Webinar-V-Ozellik%20Muhendisligi)** 31 | * Webinar VII - **[YOLO ile Mamografi Görüntülerinden Kitle Tespit Uygulaması](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Webinar/Webinar-VII-YOLO%20ile%20Mamografi%20G%C3%B6r%C3%BCnt%C3%BClerinden%20Kitle%20Tespit%20Uygulamas%C4%B1)** 32 | ## **Tematik Eğitimler** 33 | 34 | #### **[Makine Öğrenmesi (Temmuz 2024)](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Tematik_Egitimler/Makine_Ogrenmesi_Temmuz_2024)** 35 | 36 | - **[Regresyon](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Tematik_Egitimler/Makine_Ogrenmesi_Temmuz_2024/Regresyon)** 37 | 38 | #### **[Derin Öğrenme ile Görüntü İşleme (Temmuz 2024)](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Tematik_Egitimler/Derin_Ogrenme_ile_Goruntu_Isleme_Temmuz_2024)** 39 | 40 | - **[Görüntü İşleme, Yapay Sinir Ağları, Derin Öğrenme - Giriş-Sınıflandırma](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Tematik_Egitimler/Derin_Ogrenme_ile_Goruntu_Isleme_Temmuz_2024/Goruntu_Isleme_Yapay_Sinir_Aglari_Derin_Ogrenme_Giris)** 41 | - **[Görüntü İşleme, Yapay Sinir Ağları, Derin Öğrenme - Nesne Tespiti](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Tematik_Egitimler/Derin_Ogrenme_ile_Goruntu_Isleme_Temmuz_2024/Nesne_Tespiti)** 42 | - **[Görüntü İşleme, Yapay Sinir Ağları, Derin Öğrenme - Uygulamalar](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Tematik_Egitimler/Derin_Ogrenme_ile_Goruntu_Isleme_Temmuz_2024/Uygulamalar)** 43 | 44 | #### **[Makine Öğrenmesi (Mayıs 2024)](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024)** 45 | 46 | - **[Kumeleme - Regresyon](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024/Kumeleme_Regresyon)** 47 | 48 | - **[Sınıflandırma](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024/Siniflandirma)** 49 | 50 | - **[Uygulama - 1](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024/Uygulama_1)** 51 | 52 | - **[Uygulama - 2](https://github.com/cbddobvyz/dijital-genc-yze/tree/main/Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024/Uygulama_2)** 53 | -------------------------------------------------------------------------------- /Tematik_Egitimler/Derin_Ogrenme_ile_Goruntu_Isleme_Temmuz_2024/Goruntu_Isleme_Yapay_Sinir_Aglari_Derin_Ogrenme_Giris/CNN_Mimarileri.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "8AC6QIx_ur8-" 7 | }, 8 | "source": [ 9 | "#CNN Mimarilerinn Karşılaştırılması" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "source": [ 15 | "## Gerekli Kütüphanelerin yüklenmesi" 16 | ], 17 | "metadata": { 18 | "id": "x1TXleixxNHo" 19 | } 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "id": "nLXR_iaLrHqr" 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "# Google Colab'da çalıştırmak için gereken kütüphaneleri yükleyin\n", 30 | "!pip install tensorflow keras matplotlib\n", 31 | "\n", 32 | "import tensorflow as tf\n", 33 | "from tensorflow.keras.datasets import cifar10\n", 34 | "from tensorflow.keras.models import Sequential\n", 35 | "from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout\n", 36 | "from tensorflow.keras.applications import VGG16, InceptionV3, ResNet50, DenseNet121, EfficientNetB0\n", 37 | "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n", 38 | "from tensorflow.keras.utils import to_categorical\n", 39 | "import matplotlib.pyplot as plt\n", 40 | "import numpy as np\n", 41 | "from tensorflow.keras.optimizers import Adam\n", 42 | "from tensorflow.keras import layers, models" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "source": [ 48 | "## Veri setinin yüklenmesi" 49 | ], 50 | "metadata": { 51 | "id": "AYlmhZqKxUK5" 52 | } 53 | }, 54 | { 55 | "cell_type": "code", 56 | "source": [ 57 | "# CIFAR-10 veri setini yükleyin\n", 58 | "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", 59 | "\n", 60 | "input_shape = (32, 32, 3) # CIFAR-10 için giriş boyutu\n", 61 | "num_classes = 10" 62 | ], 63 | "metadata": { 64 | "id": "rkKeSFosxW53" 65 | }, 66 | "execution_count": null, 67 | "outputs": [] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "source": [ 72 | "## Normalizasyon" 73 | ], 74 | "metadata": { 75 | "id": "PGHqIinfxbcp" 76 | } 77 | }, 78 | { 79 | "cell_type": "code", 80 | "source": [ 81 | "# Verileri normalize edin ve etiketleri one-hot formatına dönüştürün\n", 82 | "x_train, x_test = x_train / 255.0, x_test / 255.0\n", 83 | "y_train, y_test = to_categorical(y_train), to_categorical(y_test)\n", 84 | "\n", 85 | "# Eğitim ve test verilerini artırma\n", 86 | "datagen = ImageDataGenerator(\n", 87 | " width_shift_range=0.1,\n", 88 | " height_shift_range=0.1,\n", 89 | " horizontal_flip=True\n", 90 | ")\n", 91 | "datagen.fit(x_train)\n", 92 | "\n", 93 | "# Metrikleri saklamak için boş bir liste oluşturun\n", 94 | "model_names = []\n", 95 | "accuracies = []" 96 | ], 97 | "metadata": { 98 | "id": "f2AODXTpxd4F" 99 | }, 100 | "execution_count": null, 101 | "outputs": [] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "source": [ 106 | "#Modellerin Oluşturulması" 107 | ], 108 | "metadata": { 109 | "id": "AsEKwpsxxhys" 110 | } 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "source": [ 115 | "## Lenet" 116 | ], 117 | "metadata": { 118 | "id": "xuxzCy_Txlef" 119 | } 120 | }, 121 | { 122 | "cell_type": "code", 123 | "source": [ 124 | "# Model tanımlama ve eğitim fonksiyonları\n", 125 | "def create_lenet5():\n", 126 | " model = Sequential([\n", 127 | " Conv2D(6, (5, 5), activation='tanh', padding='same', input_shape=(32, 32, 3)),\n", 128 | " MaxPooling2D(pool_size=(2, 2)),\n", 129 | " Conv2D(16, (5, 5), activation='tanh', padding='same'),\n", 130 | " MaxPooling2D(pool_size=(2, 2)),\n", 131 | " Conv2D(120, (5, 5), activation='tanh', padding='valid'),\n", 132 | " Flatten(),\n", 133 | " Dense(84, activation='tanh'),\n", 134 | " Dense(10, activation='softmax')\n", 135 | " ])\n", 136 | " model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n", 137 | " return model" 138 | ], 139 | "metadata": { 140 | "id": "wShK0uk4xoks" 141 | }, 142 | "execution_count": null, 143 | "outputs": [] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "source": [ 148 | "## Alexnet" 149 | ], 150 | "metadata": { 151 | "id": "4SPeLLBcxq2r" 152 | } 153 | }, 154 | { 155 | "cell_type": "code", 156 | "source": [ 157 | "def create_alexnet():\n", 158 | " model = models.Sequential()\n", 159 | "\n", 160 | " # 1. Konvolüsyonel Katman\n", 161 | " model.add(layers.Conv2D(96, (5, 5), strides=(1, 1), activation='relu', input_shape=input_shape, padding='same'))\n", 162 | " model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))\n", 163 | "\n", 164 | " # 2. Konvolüsyonel Katman\n", 165 | " model.add(layers.Conv2D(256, (5, 5), activation='relu', padding='same'))\n", 166 | " model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))\n", 167 | "\n", 168 | " # 3. Konvolüsyonel Katman\n", 169 | " model.add(layers.Conv2D(384, (3, 3), activation='relu', padding='same'))\n", 170 | "\n", 171 | " # 4. Konvolüsyonel Katman\n", 172 | " model.add(layers.Conv2D(384, (3, 3), activation='relu', padding='same'))\n", 173 | "\n", 174 | " # 5. Konvolüsyonel Katman\n", 175 | " model.add(layers.Conv2D(256, (3, 3), activation='relu', padding='same'))\n", 176 | " model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))\n", 177 | "\n", 178 | " # Düzleştirme Katmanı\n", 179 | " model.add(layers.Flatten())\n", 180 | "\n", 181 | " # Tam Bağlantılı Katmanlar\n", 182 | " model.add(layers.Dense(4096, activation='relu'))\n", 183 | " model.add(layers.Dropout(0.5))\n", 184 | " model.add(layers.Dense(4096, activation='relu'))\n", 185 | " model.add(layers.Dropout(0.5))\n", 186 | " model.add(layers.Dense(num_classes, activation='softmax'))\n", 187 | " model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])\n", 188 | " return model" 189 | ], 190 | "metadata": { 191 | "id": "YiynxHx6xsx1" 192 | }, 193 | "execution_count": null, 194 | "outputs": [] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "source": [ 199 | "## VGG" 200 | ], 201 | "metadata": { 202 | "id": "N__krfImxyw4" 203 | } 204 | }, 205 | { 206 | "cell_type": "code", 207 | "source": [ 208 | "def create_vgg16():\n", 209 | " base_model = VGG16(weights=None, input_shape=(32, 32, 3), classes=10)\n", 210 | " base_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n", 211 | " return base_model" 212 | ], 213 | "metadata": { 214 | "id": "EgeKPTupxy5o" 215 | }, 216 | "execution_count": null, 217 | "outputs": [] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "source": [ 222 | "## Resnet" 223 | ], 224 | "metadata": { 225 | "id": "uHYTqycEx2pQ" 226 | } 227 | }, 228 | { 229 | "cell_type": "code", 230 | "source": [ 231 | "def create_resnet():\n", 232 | " base_model = ResNet50(weights=None, input_shape=(32, 32, 3), classes=10)\n", 233 | " base_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n", 234 | " return base_model" 235 | ], 236 | "metadata": { 237 | "id": "Alo0OSXAx3NL" 238 | }, 239 | "execution_count": null, 240 | "outputs": [] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "source": [ 245 | "## Densnet" 246 | ], 247 | "metadata": { 248 | "id": "hSIV11l_x7KP" 249 | } 250 | }, 251 | { 252 | "cell_type": "code", 253 | "source": [ 254 | "def create_densenet():\n", 255 | " base_model = DenseNet121(weights=None, input_shape=(32, 32, 3), classes=10)\n", 256 | " base_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n", 257 | " return base_model" 258 | ], 259 | "metadata": { 260 | "id": "XWl9RXB9x8-W" 261 | }, 262 | "execution_count": null, 263 | "outputs": [] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "source": [ 268 | "## Efficentnet" 269 | ], 270 | "metadata": { 271 | "id": "MUrUcTtTx-5f" 272 | } 273 | }, 274 | { 275 | "cell_type": "code", 276 | "source": [ 277 | "def create_efficientnet():\n", 278 | " base_model = EfficientNetB0(weights=None, input_shape=(32, 32, 3), classes=10)\n", 279 | " base_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n", 280 | " return base_model" 281 | ], 282 | "metadata": { 283 | "id": "ZW1zaIlQyAuH" 284 | }, 285 | "execution_count": null, 286 | "outputs": [] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "source": [ 291 | "## Modelleri listeleme" 292 | ], 293 | "metadata": { 294 | "id": "Pk_RAl3PyC2b" 295 | } 296 | }, 297 | { 298 | "cell_type": "code", 299 | "source": [ 300 | "# Model listesi ve isimler\n", 301 | "models = [\n", 302 | " (create_lenet5(), 'LeNet-5'),\n", 303 | " (create_alexnet(), 'AlexNet'),\n", 304 | " (create_vgg16(), 'VGG16'),\n", 305 | " (create_resnet(), 'ResNet50'),\n", 306 | " (create_densenet(), 'DenseNet121'),\n", 307 | " (create_efficientnet(), 'EfficientNetB0')\n", 308 | "]" 309 | ], 310 | "metadata": { 311 | "id": "1q2XHaZ0yEsC" 312 | }, 313 | "execution_count": null, 314 | "outputs": [] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "source": [ 319 | "## Modellerin eğitimi ve doğrulama" 320 | ], 321 | "metadata": { 322 | "id": "Jc6uX_OnyGpD" 323 | } 324 | }, 325 | { 326 | "cell_type": "code", 327 | "source": [ 328 | "# Her modeli eğitin ve doğruluğunu hesaplayın\n", 329 | "for model, name in models:\n", 330 | " print(f\"Training {name}...\")\n", 331 | " history = model.fit(datagen.flow(x_train, y_train, batch_size=64),\n", 332 | " validation_data=(x_test, y_test),\n", 333 | " epochs=10, verbose=1)\n", 334 | " _, accuracy = model.evaluate(x_test, y_test, verbose=0)\n", 335 | " model_names.append(name)\n", 336 | " accuracies.append(accuracy)\n", 337 | " print(f\"{name} Test Accuracy: {accuracy:.4f}\")" 338 | ], 339 | "metadata": { 340 | "id": "x1XqdFS3yKWl" 341 | }, 342 | "execution_count": null, 343 | "outputs": [] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "source": [ 348 | "## Modellerin görselleştirilmesi" 349 | ], 350 | "metadata": { 351 | "id": "rTq_s90IyQqo" 352 | } 353 | }, 354 | { 355 | "cell_type": "code", 356 | "source": [ 357 | "# Sonuçları görselleştirin\n", 358 | "plt.figure(figsize=(12, 6))\n", 359 | "plt.bar(model_names, accuracies, color='skyblue')\n", 360 | "plt.xlabel('Model')\n", 361 | "plt.ylabel('Doğruluk')\n", 362 | "plt.title('CNN Mimarileri ve CIFAR-10 Doğruluk Karşılaştırması')\n", 363 | "plt.show()" 364 | ], 365 | "metadata": { 366 | "id": "i_khGZpeyTJQ" 367 | }, 368 | "execution_count": null, 369 | "outputs": [] 370 | } 371 | ], 372 | "metadata": { 373 | "colab": { 374 | "provenance": [], 375 | "gpuType": "T4" 376 | }, 377 | "kernelspec": { 378 | "display_name": "Python 3", 379 | "name": "python3" 380 | }, 381 | "language_info": { 382 | "name": "python" 383 | }, 384 | "accelerator": "GPU" 385 | }, 386 | "nbformat": 4, 387 | "nbformat_minor": 0 388 | } -------------------------------------------------------------------------------- /Tematik_Egitimler/Derin_Ogrenme_ile_Goruntu_Isleme_Temmuz_2024/Nesne_Tespiti/Faster_RCNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "colab": { 8 | "base_uri": "https://localhost:8080/" 9 | }, 10 | "id": "-RrkMVGV35QL", 11 | "outputId": "469143d7-2996-4579-bebf-3f93970e7165" 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "pip install tensorflow tensorflow-hub tensorflow-datasets opencv-python-headless matplotlib\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "colab": { 23 | "base_uri": "https://localhost:8080/", 24 | "height": 979 25 | }, 26 | "id": "Xum27oAu35-M", 27 | "outputId": "7a9653d7-aaeb-4f42-fdbf-9ec448f8dd6e" 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "import tensorflow as tf\n", 32 | "import tensorflow_hub as hub\n", 33 | "import tensorflow_datasets as tfds\n", 34 | "import numpy as np\n", 35 | "import cv2\n", 36 | "import matplotlib.pyplot as plt\n", 37 | "\n", 38 | "# Faster R-CNN modelini TensorFlow Hub'dan yükle\n", 39 | "model_url = \"https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_640x640/1\"\n", 40 | "detector = hub.load(model_url)\n", 41 | "\n", 42 | "# PASCAL VOC veri setini yükle (yalnızca küçük bir kısmını alıyoruz)\n", 43 | "dataset, dataset_info = tfds.load('voc/2007', split='validation[:5%]', with_info=True)\n", 44 | "\n", 45 | "# PASCAL VOC sınıf isimlerini al\n", 46 | "category_index = {i: {'id': i, 'name': name} for i, name in enumerate(dataset_info.features['objects']['label'].names)}\n", 47 | "\n", 48 | "# Bir örnek görüntü seç\n", 49 | "sample = next(iter(dataset))\n", 50 | "image_np = sample['image'].numpy()\n", 51 | "\n", 52 | "# Görüntüyü hazırla\n", 53 | "image_tensor = tf.convert_to_tensor(image_np)\n", 54 | "image_tensor = tf.expand_dims(image_tensor, 0)\n", 55 | "\n", 56 | "# 1. Region Proposal Generation\n", 57 | "detections = detector(image_tensor)\n", 58 | "num_detections = int(detections.pop('num_detections'))\n", 59 | "detections = {key: value[0, :num_detections].numpy() for key, value in detections.items()}\n", 60 | "detection_boxes = detections['detection_boxes']\n", 61 | "\n", 62 | "# Region Proposals'ı görselleştir\n", 63 | "def plot_region_proposals(image_np, boxes, max_boxes_to_draw=20):\n", 64 | " image_np_with_annotations = image_np.copy()\n", 65 | " for i in range(min(max_boxes_to_draw, boxes.shape[0])):\n", 66 | " box = tuple(boxes[i].tolist())\n", 67 | " image_np_with_annotations = cv2.rectangle(\n", 68 | " image_np_with_annotations,\n", 69 | " (int(box[1] * image_np.shape[1]), int(box[0] * image_np.shape[0])),\n", 70 | " (int(box[3] * image_np.shape[1]), int(box[2] * image_np.shape[0])),\n", 71 | " (0, 255, 0), 2\n", 72 | " )\n", 73 | " plt.figure(figsize=(12, 8))\n", 74 | " plt.imshow(image_np_with_annotations)\n", 75 | " plt.title('Region Proposals')\n", 76 | " plt.show()\n", 77 | "\n", 78 | "plot_region_proposals(image_np, detection_boxes)\n", 79 | "\n", 80 | "#\n", 81 | "\n", 82 | "# 3. Classification\n", 83 | "detection_classes = detections['detection_classes'].astype(np.int64)\n", 84 | "detection_scores = detections['detection_scores']\n", 85 | "\n", 86 | "# Tespit sonuçlarını görselleştir\n", 87 | "def plot_detections(image_np, boxes, classes, scores, category_index, max_boxes_to_draw=20, min_score_thresh=.5):\n", 88 | " image_np_with_annotations = image_np.copy()\n", 89 | " for i in range(min(max_boxes_to_draw, boxes.shape[0])):\n", 90 | " if scores[i] >= min_score_thresh:\n", 91 | " box = tuple(boxes[i].tolist())\n", 92 | " class_name = category_index[classes[i]]['name']\n", 93 | " display_str = f'{class_name}: {int(100 * scores[i])}%'\n", 94 | " image_np_with_annotations = cv2.rectangle(\n", 95 | " image_np_with_annotations,\n", 96 | " (int(box[1] * image_np.shape[1]), int(box[0] * image_np.shape[0])),\n", 97 | " (int(box[3] * image_np.shape[1]), int(box[2] * image_np.shape[0])),\n", 98 | " (0, 255, 0), 2\n", 99 | " )\n", 100 | " image_np_with_annotations = cv2.putText(\n", 101 | " image_np_with_annotations, display_str,\n", 102 | " (int(box[1] * image_np.shape[1]), int(box[0] * image_np.shape[0]) - 10),\n", 103 | " cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2\n", 104 | " )\n", 105 | " plt.figure(figsize=(12, 8))\n", 106 | " plt.imshow(image_np_with_annotations)\n", 107 | " plt.title('Detected Objects')\n", 108 | " plt.show()\n", 109 | "\n", 110 | "# Tespit edilen nesneleri görselleştir\n", 111 | "plot_detections(image_np, detection_boxes, detection_classes, detection_scores, category_index)\n" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "colab": { 119 | "base_uri": "https://localhost:8080/", 120 | "height": 1000 121 | }, 122 | "id": "-DPFTG7S4FHK", 123 | "outputId": "a0c0032e-af09-49ab-e8e8-809a9f78438b" 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "import os\n", 128 | "import tarfile\n", 129 | "import urllib.request\n", 130 | "\n", 131 | "import tensorflow as tf\n", 132 | "import tensorflow_hub as hub\n", 133 | "import numpy as np\n", 134 | "import cv2\n", 135 | "import matplotlib.pyplot as plt\n", 136 | "import xml.etree.ElementTree as ET\n", 137 | "\n", 138 | "# PASCAL VOC veri setini indirmek için URL\n", 139 | "VOC_URL = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar'\n", 140 | "\n", 141 | "# Veri setini indirme fonksiyonu\n", 142 | "def download_and_extract_voc(url, download_dir):\n", 143 | " if not os.path.exists(download_dir):\n", 144 | " os.makedirs(download_dir)\n", 145 | " tar_file_path = os.path.join(download_dir, 'VOCtrainval_06-Nov-2007.tar')\n", 146 | " if not os.path.exists(tar_file_path):\n", 147 | " print('Downloading PASCAL VOC 2007 dataset...')\n", 148 | " urllib.request.urlretrieve(url, tar_file_path)\n", 149 | " print('Download complete.')\n", 150 | " else:\n", 151 | " print('PASCAL VOC 2007 dataset already downloaded.')\n", 152 | "\n", 153 | " # Dosyayı çıkartma\n", 154 | " with tarfile.open(tar_file_path, 'r') as tar:\n", 155 | " tar.extractall(path=download_dir)\n", 156 | " print('Extraction complete.')\n", 157 | "\n", 158 | "# Veri seti yolunu belirleyin\n", 159 | "download_dir = './voc2007'\n", 160 | "download_and_extract_voc(VOC_URL, download_dir)\n", 161 | "\n", 162 | "# Görüntü ve anotasyon dosyalarını listeleme\n", 163 | "voc_root = os.path.join(download_dir, 'VOCdevkit', 'VOC2007')\n", 164 | "image_dir = os.path.join(voc_root, 'JPEGImages')\n", 165 | "annotation_dir = os.path.join(voc_root, 'Annotations')\n", 166 | "\n", 167 | "image_files = sorted([os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith('.jpg')])\n", 168 | "annotation_files = sorted([os.path.join(annotation_dir, file) for file in os.listdir(annotation_dir) if file.endswith('.xml')])\n", 169 | "\n", 170 | "# Bir örnek görüntü ve anotasyon dosyasını seçme\n", 171 | "sample_image_path = image_files[0]\n", 172 | "sample_annotation_path = annotation_files[0]\n", 173 | "\n", 174 | "# Anotasyon dosyasını okuma\n", 175 | "def parse_voc_annotation(annotation_path):\n", 176 | " tree = ET.parse(annotation_path)\n", 177 | " root = tree.getroot()\n", 178 | " boxes = []\n", 179 | " classes = []\n", 180 | " for obj in root.findall('object'):\n", 181 | " class_name = obj.find('name').text\n", 182 | " bbox = obj.find('bndbox')\n", 183 | " xmin = int(bbox.find('xmin').text)\n", 184 | " ymin = int(bbox.find('ymin').text)\n", 185 | " xmax = int(bbox.find('xmax').text)\n", 186 | " ymax = int(bbox.find('ymax').text)\n", 187 | " boxes.append([ymin, xmin, ymax, xmax])\n", 188 | " classes.append(class_name)\n", 189 | " return boxes, classes\n", 190 | "\n", 191 | "# Örnek görüntü ve anotasyonu yükleme\n", 192 | "image_np = cv2.imread(sample_image_path)\n", 193 | "image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)\n", 194 | "boxes, classes = parse_voc_annotation(sample_annotation_path)\n", 195 | "\n", 196 | "# Kategori indeksi oluşturma\n", 197 | "unique_classes = sorted(set(classes))\n", 198 | "category_index = {i: {'id': i, 'name': name} for i, name in enumerate(unique_classes)}\n", 199 | "\n", 200 | "# Görüntüyü hazırla\n", 201 | "image_tensor = tf.convert_to_tensor(image_np)\n", 202 | "image_tensor = tf.expand_dims(image_tensor, 0)\n", 203 | "\n", 204 | "# Faster R-CNN modelini TensorFlow Hub'dan yükle\n", 205 | "model_url = \"https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_640x640/1\"\n", 206 | "detector = hub.load(model_url)\n", 207 | "\n", 208 | "# 1. Region Proposal Generation\n", 209 | "detections = detector(image_tensor)\n", 210 | "num_detections = int(detections.pop('num_detections'))\n", 211 | "detections = {key: value[0, :num_detections].numpy() for key, value in detections.items()}\n", 212 | "detection_boxes = detections['detection_boxes']\n", 213 | "\n", 214 | "# Region Proposals'ı görselleştir\n", 215 | "def plot_region_proposals(image_np, boxes, max_boxes_to_draw=20):\n", 216 | " image_np_with_annotations = image_np.copy()\n", 217 | " for i in range(min(max_boxes_to_draw, boxes.shape[0])):\n", 218 | " box = tuple(boxes[i].tolist())\n", 219 | " image_np_with_annotations = cv2.rectangle(\n", 220 | " image_np_with_annotations,\n", 221 | " (int(box[1] * image_np.shape[1]), int(box[0] * image_np.shape[0])),\n", 222 | " (int(box[3] * image_np.shape[1]), int(box[2] * image_np.shape[0])),\n", 223 | " (0, 255, 0), 2\n", 224 | " )\n", 225 | " plt.figure(figsize=(12, 8))\n", 226 | " plt.imshow(image_np_with_annotations)\n", 227 | " plt.title('Region Proposals')\n", 228 | " plt.show()\n", 229 | "\n", 230 | "plot_region_proposals(image_np, detection_boxes)\n", 231 | "\n", 232 | "# 2. Feature Extraction\n", 233 | "# (Bu adım daha karmaşıktır ve genellikle derin öğrenme modelinin bir parçası olarak gerçekleştirilir)\n", 234 | "\n", 235 | "# 3. Classification\n", 236 | "detection_classes = detections['detection_classes'].astype(np.int64)\n", 237 | "detection_scores = detections['detection_scores']\n", 238 | "\n", 239 | "# Tespit sonuçlarını görselleştir\n", 240 | "def plot_detections(image_np, boxes, classes, scores, category_index, max_boxes_to_draw=20, min_score_thresh=.5):\n", 241 | " image_np_with_annotations = image_np.copy()\n", 242 | " for i in range(min(max_boxes_to_draw, boxes.shape[0])):\n", 243 | " if scores[i] >= min_score_thresh:\n", 244 | " box = tuple(boxes[i].tolist())\n", 245 | " class_name = category_index[classes[i]]['name']\n", 246 | " display_str = f'{class_name}: {int(100 * scores[i])}%'\n", 247 | " image_np_with_annotations = cv2.rectangle(\n", 248 | " image_np_with_annotations,\n", 249 | " (int(box[1] * image_np.shape[1]), int(box[0] * image_np.shape[0])),\n", 250 | " (int(box[3] * image_np.shape[1]), int(box[2] * image_np.shape[0])),\n", 251 | " (0, 255, 0), 2\n", 252 | " )\n", 253 | " image_np_with_annotations = cv2.putText(\n", 254 | " image_np_with_annotations, display_str,\n", 255 | " (int(box[1] * image_np.shape[1]), int(box[0] * image_np.shape[0]) - 10),\n", 256 | " cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2\n", 257 | " )\n", 258 | " plt.figure(figsize=(12, 8))\n", 259 | " plt.imshow(image_np_with_annotations)\n", 260 | " plt.title('Detected Objects')\n", 261 | " plt.show()\n", 262 | "\n", 263 | "# Tespit edilen nesneleri görselleştir\n", 264 | "plot_detections(image_np, detection_boxes, detection_classes, detection_scores, category_index)\n" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": { 271 | "colab": { 272 | "base_uri": "https://localhost:8080/", 273 | "height": 1000 274 | }, 275 | "id": "zF1OBlAD5tEx", 276 | "outputId": "2fe4fea9-5e9c-47c7-9450-6a180410a6b9" 277 | }, 278 | "outputs": [], 279 | "source": [ 280 | "import os\n", 281 | "import tarfile\n", 282 | "import urllib.request\n", 283 | "\n", 284 | "import tensorflow as tf\n", 285 | "import tensorflow_hub as hub\n", 286 | "import numpy as np\n", 287 | "import cv2\n", 288 | "import matplotlib.pyplot as plt\n", 289 | "import xml.etree.ElementTree as ET\n", 290 | "\n", 291 | "# PASCAL VOC veri setini indirmek için URL\n", 292 | "VOC_URL = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar'\n", 293 | "\n", 294 | "# Veri setini indirme fonksiyonu\n", 295 | "def download_and_extract_voc(url, download_dir):\n", 296 | " if not os.path.exists(download_dir):\n", 297 | " os.makedirs(download_dir)\n", 298 | " tar_file_path = os.path.join(download_dir, 'VOCtrainval_06-Nov-2007.tar')\n", 299 | " if not os.path.exists(tar_file_path):\n", 300 | " print('Downloading PASCAL VOC 2007 dataset...')\n", 301 | " urllib.request.urlretrieve(url, tar_file_path)\n", 302 | " print('Download complete.')\n", 303 | " else:\n", 304 | " print('PASCAL VOC 2007 dataset already downloaded.')\n", 305 | "\n", 306 | " # Dosyayı çıkartma\n", 307 | " with tarfile.open(tar_file_path, 'r') as tar:\n", 308 | " tar.extractall(path=download_dir)\n", 309 | " print('Extraction complete.')\n", 310 | "\n", 311 | "# Veri seti yolunu belirleyin\n", 312 | "download_dir = './voc2007'\n", 313 | "download_and_extract_voc(VOC_URL, download_dir)\n", 314 | "\n", 315 | "# Görüntü ve anotasyon dosyalarını listeleme\n", 316 | "voc_root = os.path.join(download_dir, 'VOCdevkit', 'VOC2007')\n", 317 | "image_dir = os.path.join(voc_root, 'JPEGImages')\n", 318 | "annotation_dir = os.path.join(voc_root, 'Annotations')\n", 319 | "\n", 320 | "image_files = sorted([os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith('.jpg')])\n", 321 | "annotation_files = sorted([os.path.join(annotation_dir, file) for file in os.listdir(annotation_dir) if file.endswith('.xml')])\n", 322 | "\n", 323 | "# Bir örnek görüntü ve anotasyon dosyasını seçme\n", 324 | "sample_image_path = image_files[0]\n", 325 | "sample_annotation_path = annotation_files[0]\n", 326 | "\n", 327 | "# Anotasyon dosyasını okuma\n", 328 | "def parse_voc_annotation(annotation_path):\n", 329 | " tree = ET.parse(annotation_path)\n", 330 | " root = tree.getroot()\n", 331 | " boxes = []\n", 332 | " classes = []\n", 333 | " for obj in root.findall('object'):\n", 334 | " class_name = obj.find('name').text\n", 335 | " bbox = obj.find('bndbox')\n", 336 | " xmin = int(bbox.find('xmin').text)\n", 337 | " ymin = int(bbox.find('ymin').text)\n", 338 | " xmax = int(bbox.find('xmax').text)\n", 339 | " ymax = int(bbox.find('ymax').text)\n", 340 | " boxes.append([ymin, xmin, ymax, xmax])\n", 341 | " classes.append(class_name)\n", 342 | " return boxes, classes\n", 343 | "\n", 344 | "# Örnek görüntü ve anotasyonu yükleme\n", 345 | "image_np = cv2.imread(sample_image_path)\n", 346 | "image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)\n", 347 | "boxes, classes = parse_voc_annotation(sample_annotation_path)\n", 348 | "\n", 349 | "# Kategori indeksi oluşturma\n", 350 | "voc_classes = [\n", 351 | " \"aeroplane\", \"bicycle\", \"bird\", \"boat\", \"bottle\", \"bus\", \"car\", \"cat\", \"chair\",\n", 352 | " \"cow\", \"diningtable\", \"dog\", \"horse\", \"motorbike\", \"person\", \"pottedplant\",\n", 353 | " \"sheep\", \"sofa\", \"train\", \"tvmonitor\"\n", 354 | "]\n", 355 | "category_index = {i+1: {'id': i+1, 'name': name} for i, name in enumerate(voc_classes)}\n", 356 | "\n", 357 | "# Görüntüyü hazırla\n", 358 | "image_tensor = tf.convert_to_tensor(image_np)\n", 359 | "image_tensor = tf.expand_dims(image_tensor, 0)\n", 360 | "\n", 361 | "# Faster R-CNN modelini TensorFlow Hub'dan yükle\n", 362 | "model_url = \"https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_640x640/1\"\n", 363 | "detector = hub.load(model_url)\n", 364 | "\n", 365 | "# 1. Region Proposal Generation\n", 366 | "detections = detector(image_tensor)\n", 367 | "num_detections = int(detections.pop('num_detections'))\n", 368 | "detections = {key: value[0, :num_detections].numpy() for key, value in detections.items()}\n", 369 | "detection_boxes = detections['detection_boxes']\n", 370 | "\n", 371 | "# Region Proposals'ı görselleştir\n", 372 | "def plot_region_proposals(image_np, boxes, max_boxes_to_draw=20):\n", 373 | " image_np_with_annotations = image_np.copy()\n", 374 | " for i in range(min(max_boxes_to_draw, boxes.shape[0])):\n", 375 | " box = tuple(boxes[i].tolist())\n", 376 | " image_np_with_annotations = cv2.rectangle(\n", 377 | " image_np_with_annotations,\n", 378 | " (int(box[1] * image_np.shape[1]), int(box[0] * image_np.shape[0])),\n", 379 | " (int(box[3] * image_np.shape[1]), int(box[2] * image_np.shape[0])),\n", 380 | " (0, 255, 0), 2\n", 381 | " )\n", 382 | " plt.figure(figsize=(12, 8))\n", 383 | " plt.imshow(image_np_with_annotations)\n", 384 | " plt.title('Region Proposals')\n", 385 | " plt.show()\n", 386 | "\n", 387 | "plot_region_proposals(image_np, detection_boxes)\n", 388 | "\n", 389 | "# 2. Feature Extraction\n", 390 | "# (Bu adım daha karmaşıktır ve genellikle derin öğrenme modelinin bir parçası olarak gerçekleştirilir)\n", 391 | "\n", 392 | "# 3. Classification\n", 393 | "detection_classes = detections['detection_classes'].astype(np.int64)\n", 394 | "detection_scores = detections['detection_scores']\n", 395 | "\n", 396 | "# Tespit sonuçlarını görselleştir\n", 397 | "def plot_detections(image_np, boxes, classes, scores, category_index, max_boxes_to_draw=20, min_score_thresh=.5):\n", 398 | " image_np_with_annotations = image_np.copy()\n", 399 | " for i in range(min(max_boxes_to_draw, boxes.shape[0])):\n", 400 | " if scores[i] >= min_score_thresh:\n", 401 | " box = tuple(boxes[i].tolist())\n", 402 | " class_id = classes[i]\n", 403 | " if class_id in category_index:\n", 404 | " class_name = category_index[class_id]['name']\n", 405 | " display_str = f'{class_name}: {int(100 * scores[i])}%'\n", 406 | " image_np_with_annotations = cv2.rectangle(\n", 407 | " image_np_with_annotations,\n", 408 | " (int(box[1] * image_np.shape[1]), int(box[0] * image_np.shape[0])),\n", 409 | " (int(box[3] * image_np.shape[1]), int(box[2] * image_np.shape[0])),\n", 410 | " (0, 255, 0), 2\n", 411 | " )\n", 412 | " image_np_with_annotations = cv2.putText(\n", 413 | " image_np_with_annotations, display_str,\n", 414 | " (int(box[1] * image_np.shape[1]), int(box[0] * image_np.shape[0]) - 10),\n", 415 | " cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2\n", 416 | " )\n", 417 | " plt.figure(figsize=(12, 8))\n", 418 | " plt.imshow(image_np_with_annotations)\n", 419 | " plt.title('Detected Objects')\n", 420 | " plt.show()\n", 421 | "\n", 422 | "# Tespit edilen nesneleri görselleştir\n", 423 | "plot_detections(image_np, detection_boxes, detection_classes, detection_scores, category_index)\n" 424 | ] 425 | } 426 | ], 427 | "metadata": { 428 | "colab": { 429 | "provenance": [] 430 | }, 431 | "kernelspec": { 432 | "display_name": "Python 3 (ipykernel)", 433 | "language": "python", 434 | "name": "python3" 435 | }, 436 | "language_info": { 437 | "codemirror_mode": { 438 | "name": "ipython", 439 | "version": 3 440 | }, 441 | "file_extension": ".py", 442 | "mimetype": "text/x-python", 443 | "name": "python", 444 | "nbconvert_exporter": "python", 445 | "pygments_lexer": "ipython3", 446 | "version": "3.10.12" 447 | } 448 | }, 449 | "nbformat": 4, 450 | "nbformat_minor": 1 451 | } 452 | -------------------------------------------------------------------------------- /Tematik_Egitimler/Derin_Ogrenme_ile_Goruntu_Isleme_Temmuz_2024/Nesne_Tespiti/OD_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "wQBMQHq-0W-4" 7 | }, 8 | "source": [ 9 | "# Nesne tanımlama skor gösterimi" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "colab": { 17 | "base_uri": "https://localhost:8080/" 18 | }, 19 | "id": "GRA8Hox6QCXD", 20 | "outputId": "413e172f-9bc1-4c67-84c8-5981d9b06347" 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "!pip install ultralytics\n", 25 | "!pip install opencv-python-headless\n", 26 | "!pip install matplotlib" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "id": "z7DYKirnL1Pv" 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "import cv2\n", 38 | "import matplotlib.pyplot as plt\n", 39 | "from ultralytics import YOLO\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "colab": { 47 | "base_uri": "https://localhost:8080/" 48 | }, 49 | "id": "gwuMsN55L3kj", 50 | "outputId": "78edd86f-55ec-4b5b-a9ff-971c9dd825bf" 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "model = YOLO('yolov8n.pt')\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "id": "VcHAIm3VL5qx" 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "def detect_objects(image_path):\n", 66 | " image = cv2.imread(image_path)\n", 67 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # OpenCV BGR formatını RGB'ye çevir\n", 68 | " results = model(image)[0]\n", 69 | "\n", 70 | " detections = []\n", 71 | " for result in results.boxes.data.tolist():\n", 72 | " x1, y1, x2, y2, score, class_id = result\n", 73 | " detections.append([int(x1), int(y1), int(x2), int(y2), round(score, 3),\n", 74 | " results.names[int(class_id)]])\n", 75 | "\n", 76 | " return detections, image\n" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "id": "nhsvIN9KL7ca" 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "def plot_detections(image, detections):\n", 88 | " plt.figure(figsize=(10, 10))\n", 89 | " plt.imshow(image)\n", 90 | " ax = plt.gca()\n", 91 | "\n", 92 | " for detection in detections:\n", 93 | " x1, y1, x2, y2, score, class_name = detection\n", 94 | " rect = plt.Rectangle((x1, y1), x2-x1, y2-y1, fill=False, color='red', linewidth=2)\n", 95 | " ax.add_patch(rect)\n", 96 | " plt.text(x1, y1, f'{class_name} {score}', bbox=dict(facecolor='yellow', alpha=0.5), fontsize=12, color='black')\n", 97 | "\n", 98 | " plt.axis('off')\n", 99 | " plt.show()\n" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "colab": { 107 | "base_uri": "https://localhost:8080/" 108 | }, 109 | "id": "NuD4Rauex2BQ", 110 | "outputId": "9e20fad8-9730-486e-b87f-039b0744f2d4" 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "import os\n", 115 | "import glob\n", 116 | "\n", 117 | "# Görüntü dosyalarının bulunduğu dizin\n", 118 | "image_directory = '/content/Data'\n", 119 | "\n", 120 | "# Dizindeki tüm görüntü dosyalarının yollarını al\n", 121 | "image_paths = glob.glob(os.path.join(image_directory, '*'))\n", 122 | "\n", 123 | "# Tespit edilen nesneleri ve görüntüleri işlemek için döngü\n", 124 | "for image_path in image_paths:\n", 125 | " detections, image = detect_objects(image_path)\n", 126 | " print(detections)\n" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "colab": { 134 | "base_uri": "https://localhost:8080/", 135 | "height": 1000 136 | }, 137 | "id": "fQLr4QBIMAKl", 138 | "outputId": "0f4ff94e-1508-450b-ce0a-b73d18313fc6" 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "import os\n", 143 | "import glob\n", 144 | "\n", 145 | "# Görüntü dosyalarının bulunduğu dizin\n", 146 | "image_directory = '/content/Data'\n", 147 | "\n", 148 | "# Dizindeki tüm görüntü dosyalarının yollarını al\n", 149 | "image_paths = glob.glob(os.path.join(image_directory, '*'))\n", 150 | "\n", 151 | "# Tespit edilen nesneleri ve görüntüleri işlemek için döngü\n", 152 | "for image_path in image_paths:\n", 153 | " detections, image = detect_objects(image_path)\n", 154 | " plot_detections(image, detections)\n", 155 | " print(detections)\n" 156 | ] 157 | } 158 | ], 159 | "metadata": { 160 | "colab": { 161 | "provenance": [] 162 | }, 163 | "kernelspec": { 164 | "display_name": "Python 3 (ipykernel)", 165 | "language": "python", 166 | "name": "python3" 167 | }, 168 | "language_info": { 169 | "codemirror_mode": { 170 | "name": "ipython", 171 | "version": 3 172 | }, 173 | "file_extension": ".py", 174 | "mimetype": "text/x-python", 175 | "name": "python", 176 | "nbconvert_exporter": "python", 177 | "pygments_lexer": "ipython3", 178 | "version": "3.10.12" 179 | } 180 | }, 181 | "nbformat": 4, 182 | "nbformat_minor": 1 183 | } 184 | -------------------------------------------------------------------------------- /Tematik_Egitimler/Derin_Ogrenme_ile_Goruntu_Isleme_Temmuz_2024/Nesne_Tespiti/OD_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "rdQTAtcq0_Ww" 7 | }, 8 | "source": [ 9 | "# coco data set ile yolo" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "id": "PDjELAsbV8j6" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "!pip install ultralytics\n", 21 | "!pip install opencv-python-headless\n", 22 | "!pip install matplotlib\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "id": "p3O8UuTBaS12" 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "import cv2\n", 34 | "import matplotlib.pyplot as plt\n", 35 | "from ultralytics import YOLO\n" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "colab": { 43 | "base_uri": "https://localhost:8080/" 44 | }, 45 | "id": "yCZ0j6MIaToL", 46 | "outputId": "808b1766-0c79-4a57-e166-ebbbad8bc3f8" 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "!wget -O example1.jpg http://images.cocodataset.org/val2017/000000397133.jpg\n", 51 | "!wget -O example2.jpg http://images.cocodataset.org/val2017/000000037777.jpg\n" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "colab": { 59 | "base_uri": "https://localhost:8080/" 60 | }, 61 | "id": "vpQctq4AaYHe", 62 | "outputId": "7809e662-f98f-4273-9a04-8028edb1f338" 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "model = YOLO('yolov8n.pt') # En küçük model olan YOLOv8n'yi kullanıyoruz\n" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "id": "F1HyDeWxaaex" 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "def detect_objects(image_path):\n", 78 | " image = cv2.imread(image_path)\n", 79 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # OpenCV BGR formatını RGB'ye çevir\n", 80 | " results = model(image)[0]\n", 81 | "\n", 82 | " detections = []\n", 83 | " for result in results.boxes.data.tolist():\n", 84 | " x1, y1, x2, y2, score, class_id = result\n", 85 | " detections.append([int(x1), int(y1), int(x2), int(y2), round(score, 3),\n", 86 | " results.names[int(class_id)]])\n", 87 | "\n", 88 | " return detections, image\n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "id": "237guk1nadDe" 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "def plot_detections(image, detections):\n", 100 | " plt.figure(figsize=(10, 10))\n", 101 | " plt.imshow(image)\n", 102 | " ax = plt.gca()\n", 103 | "\n", 104 | " for detection in detections:\n", 105 | " x1, y1, x2, y2, score, class_name = detection\n", 106 | " rect = plt.Rectangle((x1, y1), x2-x1, y2-y1, fill=False, color='red', linewidth=2)\n", 107 | " ax.add_patch(rect)\n", 108 | " plt.text(x1, y1, f'{class_name} {score}', bbox=dict(facecolor='yellow', alpha=0.5), fontsize=12, color='black')\n", 109 | "\n", 110 | " plt.axis('off')\n", 111 | " plt.show()\n" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "colab": { 119 | "base_uri": "https://localhost:8080/", 120 | "height": 1000 121 | }, 122 | "id": "hjY9dCehafb8", 123 | "outputId": "87ebe97d-c4f2-4797-d99c-24099b7b2b3c" 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "image_paths = ['example1.jpg', 'example2.jpg']\n", 128 | "\n", 129 | "for image_path in image_paths:\n", 130 | " detections, image = detect_objects(image_path)\n", 131 | " plot_detections(image, detections)\n" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "colab": { 139 | "base_uri": "https://localhost:8080/", 140 | "height": 859 141 | }, 142 | "id": "pbBmUK6Baied", 143 | "outputId": "b5a1740b-4b18-4b1d-a800-6866d1bacc05" 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "# Yeni görüntü üzerinde nesne tespiti yapın ve sonuçları görselleştirin\n", 148 | "image_path = '/content/köpek.jpg'\n", 149 | "detections, image = detect_objects(image_path)\n", 150 | "plot_detections(image, detections)" 151 | ] 152 | } 153 | ], 154 | "metadata": { 155 | "colab": { 156 | "provenance": [] 157 | }, 158 | "kernelspec": { 159 | "display_name": "Python 3 (ipykernel)", 160 | "language": "python", 161 | "name": "python3" 162 | }, 163 | "language_info": { 164 | "codemirror_mode": { 165 | "name": "ipython", 166 | "version": 3 167 | }, 168 | "file_extension": ".py", 169 | "mimetype": "text/x-python", 170 | "name": "python", 171 | "nbconvert_exporter": "python", 172 | "pygments_lexer": "ipython3", 173 | "version": "3.10.12" 174 | } 175 | }, 176 | "nbformat": 4, 177 | "nbformat_minor": 1 178 | } 179 | -------------------------------------------------------------------------------- /Tematik_Egitimler/Derin_Ogrenme_ile_Goruntu_Isleme_Temmuz_2024/Nesne_Tespiti/OD_3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "vqZn0ZwY1H76" 7 | }, 8 | "source": [ 9 | "# Yolo ile video işleme" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "colab": { 17 | "base_uri": "https://localhost:8080/" 18 | }, 19 | "id": "CMKqeH0aczN9", 20 | "outputId": "69e974e5-53dc-49df-a756-15c0afee641e" 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "!pip install ultralytics\n", 25 | "!pip install opencv-python-headless\n", 26 | "!pip install matplotlib\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "id": "ai8U9aRSc7KB" 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "import cv2\n", 38 | "import matplotlib.pyplot as plt\n", 39 | "from ultralytics import YOLO\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "colab": { 47 | "base_uri": "https://localhost:8080/" 48 | }, 49 | "id": "MCLqBmlEc9HY", 50 | "outputId": "5a436427-8cad-432f-de9c-b699560e6b90" 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "model = YOLO('yolov8n.pt') # En küçük model olan YOLOv8n'yi kullanıyoruz\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "colab": { 62 | "base_uri": "https://localhost:8080/", 63 | "height": 73 64 | }, 65 | "id": "so5PglyAe2Xd", 66 | "outputId": "1d758764-3e8b-494d-cc58-118898610f85" 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "from google.colab import files\n", 71 | "\n", 72 | "uploaded = files.upload() # Burada video dosyasını seçin ve yükleyin\n" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "colab": { 80 | "base_uri": "https://localhost:8080/" 81 | }, 82 | "id": "Ji5WT7xyfIEb", 83 | "outputId": "f1e299bc-72bd-4ee0-bd63-702cc2641555" 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "def detect_faces_in_video(video_path):\n", 88 | " cap = cv2.VideoCapture(video_path)\n", 89 | " if not cap.isOpened():\n", 90 | " print(\"Error: Could not open video.\")\n", 91 | " return\n", 92 | "\n", 93 | " # Video yazıcı ayarları\n", 94 | " fourcc = cv2.VideoWriter_fourcc(*'XVID')\n", 95 | " out = cv2.VideoWriter('output.avi', fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))\n", 96 | "\n", 97 | " while cap.isOpened():\n", 98 | " ret, frame = cap.read()\n", 99 | " if not ret:\n", 100 | " break\n", 101 | "\n", 102 | " # YOLOv8 modelini kullanarak yüz tespiti yap\n", 103 | " results = model(frame)[0]\n", 104 | "\n", 105 | " # Tespit edilen yüzleri çerçeve içine al\n", 106 | " for result in results.boxes.data.tolist():\n", 107 | " x1, y1, x2, y2, score, class_id = result\n", 108 | " class_name = results.names[int(class_id)]\n", 109 | " if class_name == 'person': # YOLOv8n modelinde 'person' olarak tanımlanmışsa\n", 110 | " x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)\n", 111 | " cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)\n", 112 | " cv2.putText(frame, f'{class_name} {score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)\n", 113 | "\n", 114 | " # Görüntüyü yaz\n", 115 | " out.write(frame)\n", 116 | "\n", 117 | " cap.release()\n", 118 | " out.release()\n", 119 | " cv2.destroyAllWindows()\n", 120 | "\n", 121 | "# Yüklediğiniz video dosyasının ismini buraya girin\n", 122 | "video_filename = list(uploaded.keys())[0]\n", 123 | "detect_faces_in_video(video_filename)\n" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "id": "Di6IZMnrfL8D" 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "from IPython.display import HTML\n", 135 | "from base64 import b64encode\n", 136 | "\n", 137 | "def display_video(file_path):\n", 138 | " video_file = open(file_path, \"rb\").read()\n", 139 | " video_url = f\"data:video/mp4;base64,{b64encode(video_file).decode()}\"\n", 140 | " return HTML(f\"\"\"\"\"\")\n", 141 | "\n", 142 | "# Çıktı videosunu göster\n", 143 | "display_video('output.avi')\n" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": { 150 | "colab": { 151 | "base_uri": "https://localhost:8080/" 152 | }, 153 | "id": "e4O_6bAic_uE", 154 | "outputId": "de8f9685-a809-488e-db21-317d2911d30e" 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "def detect_faces_from_camera():\n", 159 | " cap = cv2.VideoCapture(0) # Kamerayı aç\n", 160 | " if not cap.isOpened():\n", 161 | " print(\"Error: Could not open video stream.\")\n", 162 | " return\n", 163 | "\n", 164 | " while True:\n", 165 | " ret, frame = cap.read()\n", 166 | " if not ret:\n", 167 | " break\n", 168 | "\n", 169 | " # YOLOv8 modelini kullanarak yüz tespiti yap\n", 170 | " results = model(frame)[0]\n", 171 | "\n", 172 | " # Tespit edilen yüzleri çerçeve içine al\n", 173 | " for result in results.boxes.data.tolist():\n", 174 | " x1, y1, x2, y2, score, class_id = result\n", 175 | " class_name = results.names[int(class_id)]\n", 176 | " if class_name == 'person': # YOLOv8n modelinde 'person' olarak tanımlanmışsa\n", 177 | " x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)\n", 178 | " cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)\n", 179 | " cv2.putText(frame, f'{class_name} {score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)\n", 180 | "\n", 181 | " # Görüntüyü göster\n", 182 | " cv2.imshow('YOLOv8 Face Detection', frame)\n", 183 | "\n", 184 | " # 'q' tuşuna basıldığında çık\n", 185 | " if cv2.waitKey(1) & 0xFF == ord('q'):\n", 186 | " break\n", 187 | "\n", 188 | " cap.release()\n", 189 | " cv2.destroyAllWindows()\n", 190 | "\n", 191 | "# Kamera akışında yüz tespiti yap\n", 192 | "detect_faces_from_camera()\n" 193 | ] 194 | } 195 | ], 196 | "metadata": { 197 | "colab": { 198 | "provenance": [] 199 | }, 200 | "kernelspec": { 201 | "display_name": "Python 3 (ipykernel)", 202 | "language": "python", 203 | "name": "python3" 204 | }, 205 | "language_info": { 206 | "codemirror_mode": { 207 | "name": "ipython", 208 | "version": 3 209 | }, 210 | "file_extension": ".py", 211 | "mimetype": "text/x-python", 212 | "name": "python", 213 | "nbconvert_exporter": "python", 214 | "pygments_lexer": "ipython3", 215 | "version": "3.10.12" 216 | } 217 | }, 218 | "nbformat": 4, 219 | "nbformat_minor": 1 220 | } 221 | -------------------------------------------------------------------------------- /Tematik_Egitimler/Derin_Ogrenme_ile_Goruntu_Isleme_Temmuz_2024/Nesne_Tespiti/OD_Yolol.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "colab": { 8 | "base_uri": "https://localhost:8080/" 9 | }, 10 | "id": "aVBv02Lu91dY", 11 | "outputId": "86ba07ef-f819-4617-e54b-cc96da07161c" 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "!pip install ultralytics\n", 16 | "!pip install opencv-python-headless\n", 17 | "!pip install matplotlib" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": { 24 | "id": "styMcW0m93gq" 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "import cv2\n", 29 | "import matplotlib.pyplot as plt\n", 30 | "from ultralytics import YOLO" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "colab": { 38 | "base_uri": "https://localhost:8080/" 39 | }, 40 | "id": "YPEJe7cK95vY", 41 | "outputId": "9919a12d-beb4-41bd-c66b-b5560c060375" 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "model = YOLO('yolov8l.pt')" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": { 52 | "id": "takNl7J59-No" 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "def detect_objects(image_path):\n", 57 | " image = cv2.imread(image_path)\n", 58 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # OpenCV BGR formatını RGB'ye çevir\n", 59 | " results = model(image)[0]\n", 60 | "\n", 61 | " detections = []\n", 62 | " for result in results.boxes.data.tolist():\n", 63 | " x1, y1, x2, y2, score, class_id = result\n", 64 | " detections.append([int(x1), int(y1), int(x2), int(y2), round(score, 3),\n", 65 | " results.names[int(class_id)]])\n", 66 | "\n", 67 | " return detections, image" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": { 74 | "id": "xJcw8X0j-AZQ" 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "def plot_detections(image, detections):\n", 79 | " plt.figure(figsize=(10, 10))\n", 80 | " plt.imshow(image)\n", 81 | " ax = plt.gca()\n", 82 | "\n", 83 | " for detection in detections:\n", 84 | " x1, y1, x2, y2, score, class_name = detection\n", 85 | " rect = plt.Rectangle((x1, y1), x2-x1, y2-y1, fill=False, color='red', linewidth=2)\n", 86 | " ax.add_patch(rect)\n", 87 | " plt.text(x1, y1, f'{class_name} {score}', bbox=dict(facecolor='yellow', alpha=0.5), fontsize=12, color='black')\n", 88 | "\n", 89 | " plt.axis('off')\n", 90 | " plt.show()" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "colab": { 98 | "base_uri": "https://localhost:8080/" 99 | }, 100 | "id": "XOhaRe2k-Cch", 101 | "outputId": "bfdce9fa-ed5d-48ec-ae97-70580df247c3" 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "import os\n", 106 | "import glob\n", 107 | "\n", 108 | "# Görüntü dosyalarının bulunduğu dizin\n", 109 | "image_directory = '/content/Data'\n", 110 | "\n", 111 | "# Dizindeki tüm görüntü dosyalarının yollarını al\n", 112 | "image_paths = glob.glob(os.path.join(image_directory, '*'))\n", 113 | "\n", 114 | "# Tespit edilen nesneleri ve görüntüleri işlemek için döngü\n", 115 | "for image_path in image_paths:\n", 116 | " detections, image = detect_objects(image_path)\n", 117 | " print(detections)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": { 124 | "colab": { 125 | "base_uri": "https://localhost:8080/", 126 | "height": 1000 127 | }, 128 | "id": "vbuEhxsN-TER", 129 | "outputId": "7b888715-40b0-4d15-f6d8-28bcdbeca05c" 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "import os\n", 134 | "import glob\n", 135 | "\n", 136 | "# Görüntü dosyalarının bulunduğu dizin\n", 137 | "image_directory = '/content/Data'\n", 138 | "\n", 139 | "# Dizindeki tüm görüntü dosyalarının yollarını al\n", 140 | "image_paths = glob.glob(os.path.join(image_directory, '*'))\n", 141 | "\n", 142 | "# Tespit edilen nesneleri ve görüntüleri işlemek için döngü\n", 143 | "for image_path in image_paths:\n", 144 | " detections, image = detect_objects(image_path)\n", 145 | " plot_detections(image, detections)\n", 146 | " print(detections)" 147 | ] 148 | } 149 | ], 150 | "metadata": { 151 | "colab": { 152 | "provenance": [] 153 | }, 154 | "kernelspec": { 155 | "display_name": "Python 3 (ipykernel)", 156 | "language": "python", 157 | "name": "python3" 158 | }, 159 | "language_info": { 160 | "codemirror_mode": { 161 | "name": "ipython", 162 | "version": 3 163 | }, 164 | "file_extension": ".py", 165 | "mimetype": "text/x-python", 166 | "name": "python", 167 | "nbconvert_exporter": "python", 168 | "pygments_lexer": "ipython3", 169 | "version": "3.10.12" 170 | } 171 | }, 172 | "nbformat": 4, 173 | "nbformat_minor": 1 174 | } 175 | -------------------------------------------------------------------------------- /Tematik_Egitimler/Derin_Ogrenme_ile_Goruntu_Isleme_Temmuz_2024/Uygulamalar/T_Uyg_4_automatic_mask_generator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "5fa21d44", 7 | "metadata": { 8 | "id": "5fa21d44" 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "# Copyright (c) Meta Platforms, Inc. and affiliates." 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "id": "b7c0041e", 18 | "metadata": { 19 | "id": "b7c0041e" 20 | }, 21 | "source": [ 22 | "# SAM ile nesne maskelerini otomatik olarak oluşturma" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "289bb0b4", 28 | "metadata": { 29 | "id": "289bb0b4" 30 | }, 31 | "source": [ 32 | "SAM (Segment Anything Model) otomatik maskeleme, bilgisayarla görü ve görüntü segmentasyonu alanında kullanılan bir tekniktir. SAM, geniş bir nesne yelpazesini doğru bir şekilde tanımlayabilen ve segmentlere ayırabilen bir modeldir. Bu model, kullanıcıların manuel olarak etiketleme yapmalarını gerektirmeden, görüntülerdeki nesneleri otomatik olarak segmentlere ayırabilir. İşte SAM otomatik maskelemenin detayları:\n", 33 | "\n", 34 | "### 1. **Temel Kavramlar**\n", 35 | " - **Segmentasyon:** Görüntüdeki farklı nesneleri veya bölgeleri belirlemek ve ayırmak amacıyla yapılan işlemdir. Segmentasyon, görüntüyü bir dizi anlamlı parçaya ayırarak her bir parçayı bağımsız olarak analiz etmeyi sağlar.\n", 36 | " - **Maskeleme:** Segmentasyon işleminin bir parçası olarak, belirli bir nesneyi veya bölgeyi görüntü üzerinde izole etmek için kullanılan bir tekniktir. Maskeleme, belirli bir nesnenin arka plandan ayrılmasına yardımcı olur.\n", 37 | "\n", 38 | "### 2. **SAM Modelinin İşleyişi**\n", 39 | " - **Ön Eğitim ve Özellik Öğrenme:** SAM, genellikle büyük ve çeşitli veri kümesi üzerinde önceden eğitilmiş bir modeldir. Bu, modelin çeşitli nesneleri tanıma ve segmentasyon yapma yeteneğini geliştirir.\n", 40 | " - **Otomatik Maskeleme:** SAM, görüntüdeki her nesne için otomatik olarak bir maske oluşturur. Bu maskeler, nesnelerin sınırlarını belirler ve onları arka plandan ayırır.\n", 41 | " - **Kullanıcı Girdisi:** Bazı SAM modelleri, kullanıcıdan belirli bölgeleri işaretlemesini isteyebilir, ancak çoğu durumda bu tür girdiler gerekmez. Model, kendi başına görüntüdeki nesneleri tespit edip maskeleyebilir." 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "id": "c0b71431", 47 | "metadata": { 48 | "id": "c0b71431" 49 | }, 50 | "source": [ 51 | "## Set-up" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "id": "0685a2f5", 58 | "metadata": { 59 | "colab": { 60 | "base_uri": "https://localhost:8080/" 61 | }, 62 | "id": "0685a2f5", 63 | "outputId": "603427a3-b9b5-4b62-94f2-79a44cbc68a5" 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "import torch\n", 68 | "import torchvision\n", 69 | "print(\"PyTorch version:\", torch.__version__)\n", 70 | "print(\"Torchvision version:\", torchvision.__version__)\n", 71 | "print(\"CUDA is available:\", torch.cuda.is_available())\n", 72 | "import sys\n", 73 | "!{sys.executable} -m pip install opencv-python matplotlib\n", 74 | "!{sys.executable} -m pip install 'git+https://github.com/facebookresearch/segment-anything.git'\n", 75 | "\n", 76 | "!mkdir images\n", 77 | "!wget -P images https://raw.githubusercontent.com/facebookresearch/segment-anything/main/notebooks/images/dog.jpg\n", 78 | "\n", 79 | "!wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth\n" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "id": "fd2bc687", 85 | "metadata": { 86 | "id": "fd2bc687" 87 | }, 88 | "source": [ 89 | "## Gerekli kütüphanelerin kurulması" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "id": "560725a2", 96 | "metadata": { 97 | "id": "560725a2" 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "import numpy as np\n", 102 | "import torch\n", 103 | "import matplotlib.pyplot as plt\n", 104 | "import cv2" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "id": "3dVBoupe34wj", 110 | "metadata": { 111 | "id": "3dVBoupe34wj" 112 | }, 113 | "source": [ 114 | "# Maske gösterim fonksiyonu\n", 115 | "Bu Python kodu, matplotlib kütüphanesi ve NumPy kullanarak bir görüntü üzerinde segmentasyon sonuçlarını (maskeleri) görselleştirmek için yazılmış bir fonksiyondur. Fonksiyon, segmentasyon maskelerini renkli olarak gösterir ve her maskeye rastgele bir renk atar.\n", 116 | "\n", 117 | "### Fonksiyonun Açıklaması\n", 118 | "\n", 119 | "\n", 120 | "\n", 121 | "\n", 122 | "1. **Boş Liste Kontrolü:**\n", 123 | " ```python\n", 124 | " if len(anns) == 0:\n", 125 | " return\n", 126 | " ```\n", 127 | " - Eğer `anns` (annotation'lar) listesi boşsa, fonksiyon hiçbir şey yapmadan döner. Bu, segmentasyon sonuçları olmadığında fonksiyonun çalışmasını önler.\n", 128 | "\n", 129 | "2. **Anotasyonları Alanlarına Göre Sıralama:**\n", 130 | " ```python\n", 131 | " sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)\n", 132 | " ```\n", 133 | " - `anns` listesindeki her bir anotasyon `area` (alan) özelliğine göre azalan sırayla sıralanır. Bu, en büyük maskelerin önce gösterilmesini sağlar.\n", 134 | "\n", 135 | "3. **Matplotlib Eksenlerini Alma:**\n", 136 | " ```python\n", 137 | " ax = plt.gca()\n", 138 | " ax.set_autoscale_on(False)\n", 139 | " ```\n", 140 | " - Mevcut bir matplotlib ekseni (`ax`) alınır ve otomatik ölçekleme kapatılır. Bu, görüntü üzerinde sabit bir ölçekleme sağlar.\n", 141 | "\n", 142 | "4. **Boş Bir Görüntü Oluşturma:**\n", 143 | " ```python\n", 144 | " img = np.ones((sorted_anns[0]['segmentation'].shape[0], sorted_anns[0]['segmentation'].shape[1], 4))\n", 145 | " img[:,:,3] = 0\n", 146 | " ```\n", 147 | " - Segmentasyon maskelerinin boyutlarıyla uyumlu olarak tamamen beyaz (her pikselin değeri 1 olan) bir görüntü (`img`) oluşturulur. `4` kanalı, RGBA (Kırmızı, Yeşil, Mavi, Alfa) değerleri için kullanılır.\n", 148 | " - `img[:,:,3] = 0` ifadesi, görüntüdeki alfa kanalını sıfır yaparak görüntüyü başlangıçta tamamen şeffaf hale getirir.\n", 149 | "\n", 150 | "5. **Maskeleri Renkli Olarak Görselleştirme:**\n", 151 | " ```python\n", 152 | " for ann in sorted_anns:\n", 153 | " m = ann['segmentation']\n", 154 | " color_mask = np.concatenate([np.random.random(3), [0.35]])\n", 155 | " img[m] = color_mask\n", 156 | " ```\n", 157 | " - Her anotasyon (`ann`) için maskeyi (`m`) alır. Maskenin içindeki piksellerin, renkli maskenin değerleriyle güncellenmesi sağlanır.\n", 158 | " - `np.random.random(3)` ifadesi, RGB renk değerleri için rastgele bir renk oluşturur. `[0.35]` ise alfa (şeffaflık) kanalının değeridir. Bu, maskeyi hafif şeffaf hale getirir.\n", 159 | " - `color_mask` bu iki parçayı birleştirir ve `img[m] = color_mask` ifadesi, maskenin piksellerini bu renk ile günceller.\n", 160 | "\n", 161 | "6. **Görüntüyü Gösterme:**\n", 162 | " ```python\n", 163 | " ax.imshow(img)\n", 164 | " ```\n", 165 | " - `img` görüntüsü, matplotlib ekseninde (`ax`) görüntülenir. Bu, segmentasyon maskelerini renkli olarak gösterir.\n" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "id": "74b6e5f0", 172 | "metadata": { 173 | "id": "74b6e5f0" 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "def show_anns(anns):\n", 178 | " if len(anns) == 0:\n", 179 | " return\n", 180 | " sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)\n", 181 | " ax = plt.gca()\n", 182 | " ax.set_autoscale_on(False)\n", 183 | "\n", 184 | " img = np.ones((sorted_anns[0]['segmentation'].shape[0], sorted_anns[0]['segmentation'].shape[1], 4))\n", 185 | " img[:,:,3] = 0\n", 186 | " for ann in sorted_anns:\n", 187 | " m = ann['segmentation']\n", 188 | " color_mask = np.concatenate([np.random.random(3), [0.35]])\n", 189 | " img[m] = color_mask\n", 190 | " ax.imshow(img)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "id": "27c41445", 196 | "metadata": { 197 | "id": "27c41445" 198 | }, 199 | "source": [ 200 | "## Örnek görüntü" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "id": "ad354922", 207 | "metadata": { 208 | "id": "ad354922", 209 | "scrolled": false 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "image = cv2.imread('images/dog.jpg')\n", 214 | "image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "id": "e0ac8c67", 221 | "metadata": { 222 | "colab": { 223 | "base_uri": "https://localhost:8080/", 224 | "height": 764 225 | }, 226 | "id": "e0ac8c67", 227 | "outputId": "bf82cbd9-424b-405a-bcf3-35f629f1871c" 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "plt.figure(figsize=(10,10))\n", 232 | "plt.imshow(image)\n", 233 | "plt.axis('off')\n", 234 | "plt.show()" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "id": "b8c2824a", 240 | "metadata": { 241 | "id": "b8c2824a" 242 | }, 243 | "source": [ 244 | "## Otomatik maske üretme" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "id": "d9ef74c5", 250 | "metadata": { 251 | "id": "d9ef74c5" 252 | }, 253 | "source": [ 254 | "### **Modül ve Sınıfları İçe Aktarmak**\n", 255 | "\n", 256 | "```python\n", 257 | "from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor\n", 258 | "```\n", 259 | "- Bu satır, `segment_anything` adlı modülden gerekli sınıfları içe aktarır:\n", 260 | " - `sam_model_registry`: SAM modelinin kayıtlı türlerinin bulunduğu bir sözlük veya liste.\n", 261 | " - `SamAutomaticMaskGenerator`: Otomatik maskeleme işlemini gerçekleştiren sınıf.\n", 262 | " - `SamPredictor`: SAM modelinin tahmin yapabilen bir sınıf\n", 263 | "\n", 264 | "### 3. **Model ve Aygıt Ayarları**\n", 265 | "\n", 266 | "```python\n", 267 | "sam_checkpoint = \"sam_vit_h_4b8939.pth\"\n", 268 | "model_type = \"vit_h\"\n", 269 | "device = \"cuda\"\n", 270 | "```\n", 271 | "- `sam_checkpoint`: Eğitimli SAM modelinin ağırlık dosyasının adı.\n", 272 | "- `model_type`: Kullanılacak modelin türü. Bu örnekte `vit_h` (Vision Transformer, büyük boyutlu).\n", 273 | "- `device`: Modelin çalışacağı cihaz. `cuda` ifadesi, GPU (CUDA uyumlu) kullanmak anlamına gelir.\n", 274 | "\n", 275 | "### 4. **Modeli Yükleme ve Aygıta Gönderme**\n", 276 | "\n", 277 | "```python\n", 278 | "sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)\n", 279 | "sam.to(device=device)\n", 280 | "```\n", 281 | "- `sam_model_registry[model_type]`: Model türüne göre (`vit_h` burada) modelin sınıfını seçer ve oluşturur. `checkpoint` parametresi, modelin önceden eğitilmiş ağırlıklarını yükler.\n", 282 | "- `sam.to(device=device)`: Modeli belirtilen cihaz (`cuda`) üzerine taşır. Bu, modelin GPU üzerinde çalışmasını sağlar.\n", 283 | "\n", 284 | "### 5. **Otomatik Maske Üreticisi Oluşturma**\n", 285 | "\n", 286 | "```python\n", 287 | "mask_generator = SamAutomaticMaskGenerator(sam)\n", 288 | "```\n", 289 | "- `SamAutomaticMaskGenerator(sam)`: `sam` modelini kullanarak otomatik maskeler oluşturacak bir `SamAutomaticMaskGenerator` örneği oluşturur. Bu sınıf, modelin segmentasyon işlemlerini otomatik olarak yapmasını sağlar.\n" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "id": "1848a108", 296 | "metadata": { 297 | "id": "1848a108" 298 | }, 299 | "outputs": [], 300 | "source": [ 301 | "import sys\n", 302 | "sys.path.append(\"..\")\n", 303 | "from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor\n", 304 | "\n", 305 | "sam_checkpoint = \"sam_vit_h_4b8939.pth\"\n", 306 | "model_type = \"vit_h\"\n", 307 | "\n", 308 | "device = \"cuda\"\n", 309 | "\n", 310 | "sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)\n", 311 | "sam.to(device=device)\n", 312 | "\n", 313 | "mask_generator = SamAutomaticMaskGenerator(sam)" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "id": "391771c1", 320 | "metadata": { 321 | "id": "391771c1" 322 | }, 323 | "outputs": [], 324 | "source": [ 325 | "masks = mask_generator.generate(image)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "id": "e36a1a39", 331 | "metadata": { 332 | "id": "e36a1a39" 333 | }, 334 | "source": [ 335 | "Maske oluşturma, her maskenin maske hakkında çeşitli veriler içeren bir sözlük olduğu maskeler üzerinde bir liste döndürür. Bu anahtarlar şunlardır:\n", 336 | "\n", 337 | "- segmentation : maske\n", 338 | "- area : maskenin piksel cinsinden alanı\n", 339 | "- bbox : maskenin XYWH biçimindeki sınır kutusu\n", 340 | "- predicted_iou : maskenin kalitesi için modelin kendi tahmini\n", 341 | "- point_coords : bu maskeyi oluşturan örneklenmiş giriş noktası\n", 342 | "- stability_score : maske kalitesinin ek bir ölçüsü\n", 343 | "- crop_box : bu maskeyi XYWH biçiminde oluşturmak için kullanılan görüntünün kırpılması" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "id": "4fae8d66", 350 | "metadata": { 351 | "colab": { 352 | "base_uri": "https://localhost:8080/" 353 | }, 354 | "id": "4fae8d66", 355 | "outputId": "60c73ee1-02ad-4382-b484-3406c66ae12a" 356 | }, 357 | "outputs": [], 358 | "source": [ 359 | "print(len(masks))\n", 360 | "print(masks[0].keys())" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "id": "53009a1f", 366 | "metadata": { 367 | "id": "53009a1f" 368 | }, 369 | "source": [ 370 | "## Tüm maskeleri görüntüle" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "id": "77ac29c5", 377 | "metadata": { 378 | "colab": { 379 | "base_uri": "https://localhost:8080/", 380 | "height": 764 381 | }, 382 | "id": "77ac29c5", 383 | "outputId": "a5e9c453-5905-4d49-d093-dde1d70d44d1", 384 | "scrolled": false 385 | }, 386 | "outputs": [], 387 | "source": [ 388 | "plt.figure(figsize=(20,20))\n", 389 | "plt.imshow(image)\n", 390 | "show_anns(masks)\n", 391 | "plt.axis('off')\n", 392 | "plt.show()" 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "id": "00b3d6b2", 398 | "metadata": { 399 | "id": "00b3d6b2" 400 | }, 401 | "source": [ 402 | "## Otomatik maske oluşturma seçenekleri" 403 | ] 404 | }, 405 | { 406 | "cell_type": "markdown", 407 | "id": "183de84e", 408 | "metadata": { 409 | "id": "183de84e" 410 | }, 411 | "source": [ 412 | "\n", 413 | "### Parametreler ve Anlamları\n", 414 | "\n", 415 | "1. **`model=sam`**\n", 416 | " - Bu parametre, `SamAutomaticMaskGenerator` sınıfına, segmentasyon işlemlerini gerçekleştirecek SAM modelinin örneğini sağlar. Bu, daha önce tanımladığınız `sam` modelini kullanır.\n", 417 | "\n", 418 | "2. **`points_per_side=32`**\n", 419 | " - Bu parametre, her bir kenarın üzerinde kaç tane nokta kullanılacağını belirtir. Yüksek bir değer, daha detaylı ve hassas maskeler oluşturabilir. Genellikle, bu değer segmentasyonun hassasiyetini etkiler.\n", 420 | "\n", 421 | "3. **`pred_iou_thresh=0.86`**\n", 422 | " - Bu, modelin tahmin ettiği maskelerin doğruluğunu belirlemek için kullanılan bir eşik değeridir.\n", 423 | " - `IOU` (Intersection Over Union) skoru, tahmin edilen maske ile gerçek maske arasındaki örtüşmeyi ölçer. Bu eşik değer, sadece belirli bir IOU skoru veya daha yüksek olan maskeleri kabul eder.\n", 424 | "\n", 425 | "4. **`stability_score_thresh=0.92`**\n", 426 | " - Bu eşik değeri, segmentasyon maskelerinin kararlılığını değerlendirir.\n", 427 | " - Daha yüksek bir `stability_score_thresh`, maskelerin daha tutarlı ve güvenilir olmasını sağlar. Maskelerin bu eşik değerinin altında kalanları yoksayar.\n", 428 | "\n", 429 | "5. **`crop_n_layers=1`**\n", 430 | " - Maskelerin kesilmesinde kullanılacak katman sayısını belirler. Daha fazla katman, daha iyi ayrıntı ve kesinlik sağlayabilir, ancak işlem süresini uzatabilir.\n", 431 | "\n", 432 | "6. **`crop_n_points_downscale_factor=2`**\n", 433 | " - Bu faktör, maskelerin kesilmesinde kullanılan nokta sayısının ölçeklendirme faktörüdür. Düşük bir faktör, daha fazla nokta kullanır, bu da daha hassas maskeler oluşturur, ancak işlem süresini artırabilir.\n", 434 | "\n", 435 | "7. **`min_mask_region_area=100`**\n", 436 | " - Bu, maskelenmiş bölgelerin minimum alanını belirler. Bu değerin altında kalan bölgeler yoksayılır. Bu, küçük, önemsiz bölgelerin maskelenmesini engeller." 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": null, 442 | "id": "68364513", 443 | "metadata": { 444 | "id": "68364513" 445 | }, 446 | "outputs": [], 447 | "source": [ 448 | "mask_generator_2 = SamAutomaticMaskGenerator(\n", 449 | " model=sam,\n", 450 | " points_per_side=32,\n", 451 | " pred_iou_thresh=0.86,\n", 452 | " stability_score_thresh=0.92,\n", 453 | " crop_n_layers=1,\n", 454 | " crop_n_points_downscale_factor=2,\n", 455 | " min_mask_region_area=100, # Requires open-cv to run post-processing\n", 456 | ")" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "id": "bebcdaf1", 463 | "metadata": { 464 | "id": "bebcdaf1" 465 | }, 466 | "outputs": [], 467 | "source": [ 468 | "masks2 = mask_generator_2.generate(image)" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": null, 474 | "id": "b8473f3c", 475 | "metadata": { 476 | "colab": { 477 | "base_uri": "https://localhost:8080/" 478 | }, 479 | "id": "b8473f3c", 480 | "outputId": "0113fad6-debf-44f0-8752-97579a1dd69d" 481 | }, 482 | "outputs": [], 483 | "source": [ 484 | "len(masks2)" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": null, 490 | "id": "fb702ae3", 491 | "metadata": { 492 | "colab": { 493 | "base_uri": "https://localhost:8080/", 494 | "height": 764 495 | }, 496 | "id": "fb702ae3", 497 | "outputId": "957a3d9a-c087-426f-a6d9-98c37624cc36" 498 | }, 499 | "outputs": [], 500 | "source": [ 501 | "plt.figure(figsize=(20,20))\n", 502 | "plt.imshow(image)\n", 503 | "show_anns(masks2)\n", 504 | "plt.axis('off')\n", 505 | "plt.show()" 506 | ] 507 | } 508 | ], 509 | "metadata": { 510 | "accelerator": "GPU", 511 | "colab": { 512 | "gpuType": "T4", 513 | "provenance": [], 514 | "toc_visible": true 515 | }, 516 | "kernelspec": { 517 | "display_name": "Python 3 (ipykernel)", 518 | "language": "python", 519 | "name": "python3" 520 | }, 521 | "language_info": { 522 | "codemirror_mode": { 523 | "name": "ipython", 524 | "version": 3 525 | }, 526 | "file_extension": ".py", 527 | "mimetype": "text/x-python", 528 | "name": "python", 529 | "nbconvert_exporter": "python", 530 | "pygments_lexer": "ipython3", 531 | "version": "3.10.12" 532 | } 533 | }, 534 | "nbformat": 4, 535 | "nbformat_minor": 5 536 | } 537 | -------------------------------------------------------------------------------- /Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024/Kumeleme_Regresyon/data/experience_salary_dataset: -------------------------------------------------------------------------------- 1 | YearsExperience,Salary 2 | 1.1,39343 3 | 1.3,46205 4 | 1.5,37731 5 | 2,43525 6 | 2.2,39891 7 | 2.9,56642 8 | 3,60150 9 | 3.2,54445 10 | 3.2,64445 11 | 3.7,57189 12 | 3.9,63218 13 | 4,55794 14 | 4,56957 15 | 4.1,57081 16 | 4.5,61111 17 | 4.9,67938 18 | 5.1,66029 19 | 5.3,83088 20 | 5.9,81363 21 | 6,93940 22 | 6.8,91738 23 | 7.1,98273 24 | 7.9,101302 25 | 8.2,113812 26 | 8.7,109431 27 | 9,105582 28 | 9.5,116969 29 | 9.6,112635 30 | 10.3,122391 31 | 10.5,121872 -------------------------------------------------------------------------------- /Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024/Kumeleme_Regresyon/data/experience_sale_dataset: -------------------------------------------------------------------------------- 1 | YearsExperience,HouseSales 2 | 3.2,27 3 | 8.2,30 4 | 5.7,31 5 | 7.5,36 6 | 1.5,16 7 | 1.1,15 8 | 6.75,36 9 | 8.5,31 10 | 3.2,19 11 | 3.9,22 12 | 0.1,9 13 | 1.0,12 14 | 0.4,6 15 | 4.4,31 16 | 1.4,19 -------------------------------------------------------------------------------- /Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024/Uygulama_1/data/iris.csv: -------------------------------------------------------------------------------- 1 | Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species 2 | 1,5.1,3.5,1.4,0.2,Iris-setosa 3 | 2,4.9,3.0,1.4,0.2,Iris-setosa 4 | 3,4.7,3.2,1.3,0.2,Iris-setosa 5 | 4,4.6,3.1,1.5,0.2,Iris-setosa 6 | 5,5.0,3.6,1.4,0.2,Iris-setosa 7 | 6,5.4,3.9,1.7,0.4,Iris-setosa 8 | 7,4.6,3.4,1.4,0.3,Iris-setosa 9 | 8,5.0,3.4,1.5,0.2,Iris-setosa 10 | 9,4.4,2.9,1.4,0.2,Iris-setosa 11 | 10,4.9,3.1,1.5,0.1,Iris-setosa 12 | 11,5.4,3.7,1.5,0.2,Iris-setosa 13 | 12,4.8,3.4,1.6,0.2,Iris-setosa 14 | 13,4.8,3.0,1.4,0.1,Iris-setosa 15 | 14,4.3,3.0,1.1,0.1,Iris-setosa 16 | 15,5.8,4.0,1.2,0.2,Iris-setosa 17 | 16,5.7,4.4,1.5,0.4,Iris-setosa 18 | 17,5.4,3.9,1.3,0.4,Iris-setosa 19 | 18,5.1,3.5,1.4,0.3,Iris-setosa 20 | 19,5.7,3.8,1.7,0.3,Iris-setosa 21 | 20,5.1,3.8,1.5,0.3,Iris-setosa 22 | 21,5.4,3.4,1.7,0.2,Iris-setosa 23 | 22,5.1,3.7,1.5,0.4,Iris-setosa 24 | 23,4.6,3.6,1.0,0.2,Iris-setosa 25 | 24,5.1,3.3,1.7,0.5,Iris-setosa 26 | 25,4.8,3.4,1.9,0.2,Iris-setosa 27 | 26,5.0,3.0,1.6,0.2,Iris-setosa 28 | 27,5.0,3.4,1.6,0.4,Iris-setosa 29 | 28,5.2,3.5,1.5,0.2,Iris-setosa 30 | 29,5.2,3.4,1.4,0.2,Iris-setosa 31 | 30,4.7,3.2,1.6,0.2,Iris-setosa 32 | 31,4.8,3.1,1.6,0.2,Iris-setosa 33 | 32,5.4,3.4,1.5,0.4,Iris-setosa 34 | 33,5.2,4.1,1.5,0.1,Iris-setosa 35 | 34,5.5,4.2,1.4,0.2,Iris-setosa 36 | 35,4.9,3.1,1.5,0.1,Iris-setosa 37 | 36,5.0,3.2,1.2,0.2,Iris-setosa 38 | 37,5.5,3.5,1.3,0.2,Iris-setosa 39 | 38,4.9,3.1,1.5,0.1,Iris-setosa 40 | 39,4.4,3.0,1.3,0.2,Iris-setosa 41 | 40,5.1,3.4,1.5,0.2,Iris-setosa 42 | 41,5.0,3.5,1.3,0.3,Iris-setosa 43 | 42,4.5,2.3,1.3,0.3,Iris-setosa 44 | 43,4.4,3.2,1.3,0.2,Iris-setosa 45 | 44,5.0,3.5,1.6,0.6,Iris-setosa 46 | 45,5.1,3.8,1.9,0.4,Iris-setosa 47 | 46,4.8,3.0,1.4,0.3,Iris-setosa 48 | 47,5.1,3.8,1.6,0.2,Iris-setosa 49 | 48,4.6,3.2,1.4,0.2,Iris-setosa 50 | 49,5.3,3.7,1.5,0.2,Iris-setosa 51 | 50,5.0,3.3,1.4,0.2,Iris-setosa 52 | 51,7.0,3.2,4.7,1.4,Iris-versicolor 53 | 52,6.4,3.2,4.5,1.5,Iris-versicolor 54 | 53,6.9,3.1,4.9,1.5,Iris-versicolor 55 | 54,5.5,2.3,4.0,1.3,Iris-versicolor 56 | 55,6.5,2.8,4.6,1.5,Iris-versicolor 57 | 56,5.7,2.8,4.5,1.3,Iris-versicolor 58 | 57,6.3,3.3,4.7,1.6,Iris-versicolor 59 | 58,4.9,2.4,3.3,1.0,Iris-versicolor 60 | 59,6.6,2.9,4.6,1.3,Iris-versicolor 61 | 60,5.2,2.7,3.9,1.4,Iris-versicolor 62 | 61,5.0,2.0,3.5,1.0,Iris-versicolor 63 | 62,5.9,3.0,4.2,1.5,Iris-versicolor 64 | 63,6.0,2.2,4.0,1.0,Iris-versicolor 65 | 64,6.1,2.9,4.7,1.4,Iris-versicolor 66 | 65,5.6,2.9,3.6,1.3,Iris-versicolor 67 | 66,6.7,3.1,4.4,1.4,Iris-versicolor 68 | 67,5.6,3.0,4.5,1.5,Iris-versicolor 69 | 68,5.8,2.7,4.1,1.0,Iris-versicolor 70 | 69,6.2,2.2,4.5,1.5,Iris-versicolor 71 | 70,5.6,2.5,3.9,1.1,Iris-versicolor 72 | 71,5.9,3.2,4.8,1.8,Iris-versicolor 73 | 72,6.1,2.8,4.0,1.3,Iris-versicolor 74 | 73,6.3,2.5,4.9,1.5,Iris-versicolor 75 | 74,6.1,2.8,4.7,1.2,Iris-versicolor 76 | 75,6.4,2.9,4.3,1.3,Iris-versicolor 77 | 76,6.6,3.0,4.4,1.4,Iris-versicolor 78 | 77,6.8,2.8,4.8,1.4,Iris-versicolor 79 | 78,6.7,3.0,5.0,1.7,Iris-versicolor 80 | 79,6.0,2.9,4.5,1.5,Iris-versicolor 81 | 80,5.7,2.6,3.5,1.0,Iris-versicolor 82 | 81,5.5,2.4,3.8,1.1,Iris-versicolor 83 | 82,5.5,2.4,3.7,1.0,Iris-versicolor 84 | 83,5.8,2.7,3.9,1.2,Iris-versicolor 85 | 84,6.0,2.7,5.1,1.6,Iris-versicolor 86 | 85,5.4,3.0,4.5,1.5,Iris-versicolor 87 | 86,6.0,3.4,4.5,1.6,Iris-versicolor 88 | 87,6.7,3.1,4.7,1.5,Iris-versicolor 89 | 88,6.3,2.3,4.4,1.3,Iris-versicolor 90 | 89,5.6,3.0,4.1,1.3,Iris-versicolor 91 | 90,5.5,2.5,4.0,1.3,Iris-versicolor 92 | 91,5.5,2.6,4.4,1.2,Iris-versicolor 93 | 92,6.1,3.0,4.6,1.4,Iris-versicolor 94 | 93,5.8,2.6,4.0,1.2,Iris-versicolor 95 | 94,5.0,2.3,3.3,1.0,Iris-versicolor 96 | 95,5.6,2.7,4.2,1.3,Iris-versicolor 97 | 96,5.7,3.0,4.2,1.2,Iris-versicolor 98 | 97,5.7,2.9,4.2,1.3,Iris-versicolor 99 | 98,6.2,2.9,4.3,1.3,Iris-versicolor 100 | 99,5.1,2.5,3.0,1.1,Iris-versicolor 101 | 100,5.7,2.8,4.1,1.3,Iris-versicolor 102 | 101,6.3,3.3,6.0,2.5,Iris-virginica 103 | 102,5.8,2.7,5.1,1.9,Iris-virginica 104 | 103,7.1,3.0,5.9,2.1,Iris-virginica 105 | 104,6.3,2.9,5.6,1.8,Iris-virginica 106 | 105,6.5,3.0,5.8,2.2,Iris-virginica 107 | 106,7.6,3.0,6.6,2.1,Iris-virginica 108 | 107,4.9,2.5,4.5,1.7,Iris-virginica 109 | 108,7.3,2.9,6.3,1.8,Iris-virginica 110 | 109,6.7,2.5,5.8,1.8,Iris-virginica 111 | 110,7.2,3.6,6.1,2.5,Iris-virginica 112 | 111,6.5,3.2,5.1,2.0,Iris-virginica 113 | 112,6.4,2.7,5.3,1.9,Iris-virginica 114 | 113,6.8,3.0,5.5,2.1,Iris-virginica 115 | 114,5.7,2.5,5.0,2.0,Iris-virginica 116 | 115,5.8,2.8,5.1,2.4,Iris-virginica 117 | 116,6.4,3.2,5.3,2.3,Iris-virginica 118 | 117,6.5,3.0,5.5,1.8,Iris-virginica 119 | 118,7.7,3.8,6.7,2.2,Iris-virginica 120 | 119,7.7,2.6,6.9,2.3,Iris-virginica 121 | 120,6.0,2.2,5.0,1.5,Iris-virginica 122 | 121,6.9,3.2,5.7,2.3,Iris-virginica 123 | 122,5.6,2.8,4.9,2.0,Iris-virginica 124 | 123,7.7,2.8,6.7,2.0,Iris-virginica 125 | 124,6.3,2.7,4.9,1.8,Iris-virginica 126 | 125,6.7,3.3,5.7,2.1,Iris-virginica 127 | 126,7.2,3.2,6.0,1.8,Iris-virginica 128 | 127,6.2,2.8,4.8,1.8,Iris-virginica 129 | 128,6.1,3.0,4.9,1.8,Iris-virginica 130 | 129,6.4,2.8,5.6,2.1,Iris-virginica 131 | 130,7.2,3.0,5.8,1.6,Iris-virginica 132 | 131,7.4,2.8,6.1,1.9,Iris-virginica 133 | 132,7.9,3.8,6.4,2.0,Iris-virginica 134 | 133,6.4,2.8,5.6,2.2,Iris-virginica 135 | 134,6.3,2.8,5.1,1.5,Iris-virginica 136 | 135,6.1,2.6,5.6,1.4,Iris-virginica 137 | 136,7.7,3.0,6.1,2.3,Iris-virginica 138 | 137,6.3,3.4,5.6,2.4,Iris-virginica 139 | 138,6.4,3.1,5.5,1.8,Iris-virginica 140 | 139,6.0,3.0,4.8,1.8,Iris-virginica 141 | 140,6.9,3.1,5.4,2.1,Iris-virginica 142 | 141,6.7,3.1,5.6,2.4,Iris-virginica 143 | 142,6.9,3.1,5.1,2.3,Iris-virginica 144 | 143,5.8,2.7,5.1,1.9,Iris-virginica 145 | 144,6.8,3.2,5.9,2.3,Iris-virginica 146 | 145,6.7,3.3,5.7,2.5,Iris-virginica 147 | 146,6.7,3.0,5.2,2.3,Iris-virginica 148 | 147,6.3,2.5,5.0,1.9,Iris-virginica 149 | 148,6.5,3.0,5.2,2.0,Iris-virginica 150 | 149,6.2,3.4,5.4,2.3,Iris-virginica 151 | 150,5.9,3.0,5.1,1.8,Iris-virginica 152 | -------------------------------------------------------------------------------- /Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024/Uygulama_1/encoding_test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "id": "initial_id", 6 | "metadata": { 7 | "collapsed": true, 8 | "ExecuteTime": { 9 | "end_time": "2024-05-29T13:47:07.658506Z", 10 | "start_time": "2024-05-29T13:47:07.643712Z" 11 | } 12 | }, 13 | "source": [ 14 | "import random\n", 15 | "from pandas import DataFrame\n", 16 | "\n", 17 | "size = 1000\n", 18 | "\n", 19 | "months = [\"January\",\n", 20 | " \"February\",\n", 21 | " \"March\",\n", 22 | " \"April\",\n", 23 | " \"May\",\n", 24 | " \"June\",\n", 25 | " \"July\",\n", 26 | " \"August\",\n", 27 | " \"September\",\n", 28 | " \"October\",\n", 29 | " \"November\",\n", 30 | " \"December\"]\n", 31 | "\n", 32 | "df = DataFrame({\"X\": [random.randint(0, 100) for x in range(size)],\n", 33 | " \"Y\": [random.choice(months) for y in range(size)]})\n", 34 | "\n", 35 | "df.head(10)" 36 | ], 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | " X Y\n", 42 | "0 44 June\n", 43 | "1 3 September\n", 44 | "2 98 July\n", 45 | "3 60 May\n", 46 | "4 84 July\n", 47 | "5 63 May\n", 48 | "6 62 July\n", 49 | "7 19 February\n", 50 | "8 37 October\n", 51 | "9 37 March" 52 | ], 53 | "text/html": [ 54 | "
\n", 55 | "\n", 68 | "\n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | "
XY
044June
13September
298July
360May
484July
563May
662July
719February
837October
937March
\n", 129 | "
" 130 | ] 131 | }, 132 | "execution_count": 1, 133 | "metadata": {}, 134 | "output_type": "execute_result" 135 | } 136 | ], 137 | "execution_count": 1 138 | }, 139 | { 140 | "cell_type": "code", 141 | "source": [ 142 | "from utils_encoding import ordinal_encoding, encode_one_hot" 143 | ], 144 | "metadata": { 145 | "collapsed": false, 146 | "ExecuteTime": { 147 | "end_time": "2024-05-29T13:47:07.855628Z", 148 | "start_time": "2024-05-29T13:47:07.659867Z" 149 | } 150 | }, 151 | "id": "de3f437b7860f219", 152 | "outputs": [], 153 | "execution_count": 2 154 | }, 155 | { 156 | "cell_type": "code", 157 | "source": [ 158 | "df2 = ordinal_encoding(df, column=\"Y\")" 159 | ], 160 | "metadata": { 161 | "collapsed": false, 162 | "ExecuteTime": { 163 | "end_time": "2024-05-29T13:47:07.969953Z", 164 | "start_time": "2024-05-29T13:47:07.858338Z" 165 | } 166 | }, 167 | "id": "240fe879972f6387", 168 | "outputs": [], 169 | "execution_count": 3 170 | }, 171 | { 172 | "cell_type": "code", 173 | "source": [ 174 | "df2.head(100)" 175 | ], 176 | "metadata": { 177 | "collapsed": false, 178 | "ExecuteTime": { 179 | "end_time": "2024-05-29T13:47:08.109668Z", 180 | "start_time": "2024-05-29T13:47:07.971338Z" 181 | } 182 | }, 183 | "id": "8cf0a58a72d9b559", 184 | "outputs": [ 185 | { 186 | "data": { 187 | "text/plain": [ 188 | " X Y YOrdT\n", 189 | "0 44 June 6.0\n", 190 | "1 3 September 11.0\n", 191 | "2 98 July 5.0\n", 192 | "3 60 May 8.0\n", 193 | "4 84 July 5.0\n", 194 | ".. .. ... ...\n", 195 | "95 82 July 5.0\n", 196 | "96 13 August 1.0\n", 197 | "97 40 May 8.0\n", 198 | "98 58 December 2.0\n", 199 | "99 53 May 8.0\n", 200 | "\n", 201 | "[100 rows x 3 columns]" 202 | ], 203 | "text/html": [ 204 | "
\n", 205 | "\n", 218 | "\n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | "
XYYOrdT
044June6.0
13September11.0
298July5.0
360May8.0
484July5.0
............
9582July5.0
9613August1.0
9740May8.0
9858December2.0
9953May8.0
\n", 296 | "

100 rows × 3 columns

\n", 297 | "
" 298 | ] 299 | }, 300 | "execution_count": 4, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "execution_count": 4 306 | }, 307 | { 308 | "cell_type": "code", 309 | "source": [ 310 | "df3 = encode_one_hot(df, column=\"Y\")" 311 | ], 312 | "metadata": { 313 | "collapsed": false, 314 | "ExecuteTime": { 315 | "end_time": "2024-05-29T13:47:08.319424Z", 316 | "start_time": "2024-05-29T13:47:08.111099Z" 317 | } 318 | }, 319 | "id": "58e5ac11d389e16f", 320 | "outputs": [], 321 | "execution_count": 5 322 | }, 323 | { 324 | "cell_type": "code", 325 | "source": [ 326 | "df3.head(100)" 327 | ], 328 | "metadata": { 329 | "collapsed": false, 330 | "ExecuteTime": { 331 | "end_time": "2024-05-29T13:47:08.490888Z", 332 | "start_time": "2024-05-29T13:47:08.320899Z" 333 | } 334 | }, 335 | "id": "b8ad526557ea1f10", 336 | "outputs": [ 337 | { 338 | "data": { 339 | "text/plain": [ 340 | " X Y YOrdT Y_T0 Y_T1 Y_T2 Y_T3 Y_T4 Y_T5 Y_T6 Y_T7 \\\n", 341 | "0 44 June 6.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", 342 | "1 3 September 11.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 343 | "2 98 July 5.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", 344 | "3 60 May 8.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 345 | "4 84 July 5.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", 346 | ".. .. ... ... ... ... ... ... ... ... ... ... \n", 347 | "95 82 July 5.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", 348 | "96 13 August 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 349 | "97 40 May 8.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 350 | "98 58 December 2.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", 351 | "99 53 May 8.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", 352 | "\n", 353 | " Y_T8 Y_T9 Y_T10 Y_T11 \n", 354 | "0 0.0 0.0 0.0 0.0 \n", 355 | "1 0.0 0.0 0.0 1.0 \n", 356 | "2 0.0 0.0 0.0 0.0 \n", 357 | "3 1.0 0.0 0.0 0.0 \n", 358 | "4 0.0 0.0 0.0 0.0 \n", 359 | ".. ... ... ... ... \n", 360 | "95 0.0 0.0 0.0 0.0 \n", 361 | "96 0.0 0.0 0.0 0.0 \n", 362 | "97 1.0 0.0 0.0 0.0 \n", 363 | "98 0.0 0.0 0.0 0.0 \n", 364 | "99 1.0 0.0 0.0 0.0 \n", 365 | "\n", 366 | "[100 rows x 15 columns]" 367 | ], 368 | "text/html": [ 369 | "
\n", 370 | "\n", 383 | "\n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | "
XYYOrdTY_T0Y_T1Y_T2Y_T3Y_T4Y_T5Y_T6Y_T7Y_T8Y_T9Y_T10Y_T11
044June6.00.00.00.00.00.00.01.00.00.00.00.00.0
13September11.00.00.00.00.00.00.00.00.00.00.00.01.0
298July5.00.00.00.00.00.01.00.00.00.00.00.00.0
360May8.00.00.00.00.00.00.00.00.01.00.00.00.0
484July5.00.00.00.00.00.01.00.00.00.00.00.00.0
................................................
9582July5.00.00.00.00.00.01.00.00.00.00.00.00.0
9613August1.00.01.00.00.00.00.00.00.00.00.00.00.0
9740May8.00.00.00.00.00.00.00.00.01.00.00.00.0
9858December2.00.00.01.00.00.00.00.00.00.00.00.00.0
9953May8.00.00.00.00.00.00.00.00.01.00.00.00.0
\n", 605 | "

100 rows × 15 columns

\n", 606 | "
" 607 | ] 608 | }, 609 | "execution_count": 6, 610 | "metadata": {}, 611 | "output_type": "execute_result" 612 | } 613 | ], 614 | "execution_count": 6 615 | }, 616 | { 617 | "cell_type": "code", 618 | "source": [], 619 | "metadata": { 620 | "collapsed": false, 621 | "ExecuteTime": { 622 | "end_time": "2024-05-29T13:47:08.494934Z", 623 | "start_time": "2024-05-29T13:47:08.492459Z" 624 | } 625 | }, 626 | "id": "a439ae06155a80c4", 627 | "outputs": [], 628 | "execution_count": 6 629 | } 630 | ], 631 | "metadata": { 632 | "kernelspec": { 633 | "display_name": "Python 3", 634 | "language": "python", 635 | "name": "python3" 636 | }, 637 | "language_info": { 638 | "codemirror_mode": { 639 | "name": "ipython", 640 | "version": 2 641 | }, 642 | "file_extension": ".py", 643 | "mimetype": "text/x-python", 644 | "name": "python", 645 | "nbconvert_exporter": "python", 646 | "pygments_lexer": "ipython2", 647 | "version": "2.7.6" 648 | } 649 | }, 650 | "nbformat": 4, 651 | "nbformat_minor": 5 652 | } 653 | -------------------------------------------------------------------------------- /Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024/Uygulama_1/transformations.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pandas import DataFrame 4 | from sklearn.preprocessing import PowerTransformer, StandardScaler 5 | 6 | 7 | def standardize_dataset(df: DataFrame): 8 | scaler = StandardScaler().fit(df) 9 | df2 = DataFrame(data=scaler.transform(df), columns=df.columns) 10 | return df2 11 | 12 | 13 | def normalize_dataset(df: DataFrame, 14 | all_pos=False, 15 | min_val: float = 0.000001, 16 | exclude: List[str] = []): 17 | df2 = df.copy() 18 | for c in df.columns: 19 | if c not in exclude: 20 | if all_pos: 21 | df2[c] = min_val + (df[c] - df[c].min()) / (df[c].max() - df[c].min() - min_val) 22 | else: 23 | df2[c] = (df[c] - df[c].min()) / (df[c].max() - df[c].min()) 24 | return df2 25 | 26 | 27 | def box_cox(data, 28 | normalize=False, 29 | standardize=False): 30 | fitter = PowerTransformer(method="box-cox", 31 | standardize=standardize) 32 | dt = fitter.fit_transform(data) 33 | if normalize: 34 | dt = (dt - dt.min()) / (dt.max() - dt.min()) 35 | return dt 36 | 37 | 38 | def yeo_johnson(data, 39 | normalize=False, 40 | standardize=False): 41 | fitter = PowerTransformer(method="yeo-johnson", 42 | standardize=standardize) 43 | dt = fitter.fit_transform(data) 44 | if normalize: 45 | dt = (dt - dt.min()) / (dt.max() - dt.min()) 46 | return dt 47 | -------------------------------------------------------------------------------- /Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024/Uygulama_1/utils_encoding.py: -------------------------------------------------------------------------------- 1 | from pandas import DataFrame 2 | from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder 3 | 4 | 5 | def encode_one_hot(df: DataFrame, 6 | column: str) -> DataFrame: 7 | encoder = OneHotEncoder(handle_unknown='ignore', 8 | sparse_output=False) 9 | df2 = DataFrame(data=df[column], 10 | columns=[column]) 11 | encoder.fit(df2) 12 | transformed = encoder.transform(df2) 13 | for column_counter in range(transformed.shape[1]): 14 | column_name = column + "_T" + str(column_counter) 15 | df[column_name] = transformed[:, column_counter] 16 | return df 17 | 18 | 19 | def ordinal_encoding(df: DataFrame, 20 | column: str) -> DataFrame: 21 | encoder = OrdinalEncoder() 22 | df2 = DataFrame(data=df[column], 23 | columns=[column]) 24 | encoder.fit(df2) 25 | df[column + "OrdT"] = encoder.transform(df2) 26 | return df 27 | -------------------------------------------------------------------------------- /Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024/Uygulama_1/utils_missing_data.py: -------------------------------------------------------------------------------- 1 | import pandas, numpy, random, copy 2 | 3 | from sklearn.svm import SVC 4 | from sklearn.pipeline import make_pipeline 5 | from sklearn.preprocessing import StandardScaler 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.svm import SVR 8 | from sklearn.metrics import r2_score, root_mean_squared_error, mean_squared_error 9 | from sklearn.metrics import accuracy_score, f1_score 10 | from matplotlib import pyplot as plotter 11 | from typing import List 12 | 13 | # Basic Functionalities 14 | 15 | # Show Feature Completeness 16 | # Show Line Completeness 17 | 18 | 19 | # Show Overall Completeness 20 | 21 | def completeness_overall(df: pandas.DataFrame): 22 | if df.size > 0: 23 | return 100 - (df.isna().sum().sum() / df.size * 100) 24 | return 0 25 | 26 | 27 | def completeness_feature(df: pandas.DataFrame): 28 | report = {} 29 | for colum_name in df.columns: 30 | report[colum_name] = completeness_overall(df[colum_name]) 31 | return report 32 | 33 | 34 | def completeness_data_point(df: pandas.DataFrame): 35 | return df.notna().all(axis=1).sum(), df.notna().all(axis=1).sum() / df.shape[0] * 100 36 | 37 | 38 | def remove_columns(df: pandas.DataFrame, 39 | completeness_limit: float): 40 | comp_report = completeness_feature(df) 41 | to_drop = [] 42 | for feature_name in comp_report: 43 | if comp_report[feature_name] < completeness_limit: 44 | to_drop.append(feature_name) 45 | print(str(len(to_drop)) + " columns dropped") 46 | return df.drop(columns=to_drop) 47 | 48 | 49 | def parse_date(df: pandas.DataFrame, 50 | one_hot_encoded: bool = False): 51 | # Date Format: 27.01.2023 05:00:56 52 | df["month"] = pandas.Series(dtype=int) 53 | df["hour"] = pandas.Series(dtype=int) 54 | for index, row in df.iterrows(): 55 | time_stamp = row['DATE'] 56 | parsed_ts = time_stamp.split(" ") 57 | date = parsed_ts[0] 58 | time = parsed_ts[1] 59 | parsed_date = date.split(".") 60 | M = parsed_date[1] 61 | hour = time.split(":")[0] 62 | df.at[index, "month"] = M 63 | df.at[index, "hour"] = hour 64 | if not one_hot_encoded: 65 | return df 66 | df = pandas.get_dummies(df, 67 | columns=['month', 'hour']) 68 | return df 69 | 70 | 71 | def introduce_nans(df: pandas.DataFrame, 72 | row_ratio: float, 73 | ratio: float, 74 | border: float = None): 75 | # Add NaN values to dataframe 76 | # Ratio_rows: Ratio of rows that will be nanned 77 | # Ratio: Ratio of the number of nans that will be added to each row 78 | # Borders: The variance between min and max number of nan for each row 79 | 80 | if border is None: 81 | border = 0 82 | for index, row in df.iterrows(): 83 | if random.random() < row_ratio: 84 | no_of_nans = int((border + ratio) * df.shape[1]) 85 | nan_locs = random.sample(range(0, df.shape[1]), no_of_nans) 86 | for nan_loc in nan_locs: 87 | df.at[index, df.columns[nan_loc]] = numpy.nan 88 | return df 89 | 90 | 91 | def report_completeness(df: pandas.DataFrame, 92 | detail: bool = False): 93 | comp_row, compp_row_perc = completeness_data_point(df) 94 | if detail: 95 | print("Overall completeness: " + str(completeness_overall(df))) 96 | comps = completeness_feature(df) 97 | 98 | for d_key in comps: 99 | print(d_key + " - " + str(comps[d_key])) 100 | 101 | print("COMP ROW COUNT: " + str(comp_row)) 102 | print("COMP ROW PERC%: " + str(compp_row_perc)) 103 | else: 104 | print("Overall: " + str(completeness_overall(df)) + " - COMP ROW: " + str(comp_row) + " - COMP ROW PERC%: " + str(compp_row_perc)) 105 | 106 | 107 | def sub_dataframe(df: pandas.DataFrame, 108 | pollutant: str = None, 109 | station: str = None, 110 | data_completeness: float = None, 111 | year: int = None): 112 | if pollutant is None and station is None and data_completeness is None and year is None: 113 | print("No selection criteria!") 114 | return df 115 | 116 | neo_df = copy.deepcopy(df) 117 | 118 | # Filter Pollutants 119 | if pollutant is not None: 120 | for column_name in neo_df.columns: 121 | if pollutant not in column_name and column_name != 'DATE': 122 | neo_df = neo_df.drop([column_name], axis=1) 123 | 124 | # Filter Stations 125 | if station is not None: 126 | for column_name in neo_df.columns: 127 | if str(station + "_") not in column_name and column_name != 'DATE': 128 | neo_df = neo_df.drop([column_name], axis=1) 129 | 130 | # Filter Data Completeness 131 | if data_completeness is not None: 132 | for column_name in neo_df.columns: 133 | if completeness_feature(neo_df[column_name]) < data_completeness and column_name != 'DATE': 134 | neo_df = neo_df.drop([column_name], axis=1) 135 | 136 | # Filter Year 137 | if year is not None: 138 | neo_df = neo_df[neo_df['DATE'].str.contains(str(year))] 139 | 140 | return neo_df 141 | 142 | 143 | # Air Quality 144 | 145 | 146 | pols = ["NO2", "PM10", "O3", "PM25"] 147 | pol_levels = {"NO2": [50, 100, 200, 400], 148 | "PM10": [25, 50, 90, 180], 149 | "O3": [60, 120, 180, 240], 150 | "PM25": [15, 30, 55, 110]} 151 | 152 | 153 | def polcon2AQI(pol_name: str, val:float): 154 | global pols 155 | registered_pollutant = False 156 | pol_id = None 157 | for pol in pols: 158 | if pol in pol_name: 159 | registered_pollutant = True 160 | pol_id = pol 161 | if not registered_pollutant: 162 | return 0 163 | if val < pol_levels[pol_id][0]: 164 | return 1 165 | elif val < pol_levels[pol_id][1]: 166 | return 2 167 | elif val < pol_levels[pol_id][2]: 168 | return 3 169 | elif val < pol_levels[pol_id][3]: 170 | return 4 171 | return 5 172 | 173 | 174 | def generate_class_label(row: pandas.Series): 175 | vals = [] 176 | for key in row.keys(): 177 | vals.append(polcon2AQI(pol_name=key, 178 | val=row[key])) 179 | return max(vals) 180 | 181 | 182 | def generate_class_labels(df: pandas.DataFrame): 183 | AQIs = [] 184 | for index, row in df.iterrows(): 185 | AQIs.append(generate_class_label(row=dict(row))) 186 | AQIs.append(0) 187 | df.loc[:, "OUT"] = AQIs[1:] 188 | return df 189 | 190 | 191 | # Classification 192 | 193 | 194 | def test_data_classification(df: pandas.DataFrame, 195 | df_org: pandas.DataFrame, 196 | k: int = 5, 197 | data_out: bool = False): 198 | # df -> Imputed Dataset 199 | # df_org -> Original Dataset 200 | # k -> Cross Fold K 201 | 202 | if data_out: 203 | _data_out(df, df_org) 204 | 205 | test_size = 1.0 / k 206 | accuracies = [] 207 | 208 | for cv_iter in range(k): 209 | imp_data_out = df["OUT"].to_numpy() 210 | imp_data_in = df.iloc[:, :-1].to_numpy() 211 | org_data_out = df_org["OUT"].to_numpy() 212 | org_data_in = df_org.iloc[:, :-1].to_numpy() 213 | 214 | test_indexes = [int((cv_iter * test_size) * df.shape[0]), int(((cv_iter + 1) * test_size) * df.shape[0])] 215 | data_in_test = org_data_in[test_indexes[0]: test_indexes[1],:] 216 | data_out_test = org_data_out[test_indexes[0]: test_indexes[1]] 217 | data_in_train = numpy.delete(imp_data_in, range(test_indexes[0], test_indexes[1]), 0) 218 | data_out_train = numpy.delete(imp_data_out, range(test_indexes[0], test_indexes[1]), 0) 219 | 220 | clf = make_pipeline(StandardScaler(), SVC(gamma='auto')) 221 | clf.fit(data_in_train, data_out_train) 222 | data_out_pred = clf.predict(data_in_test) 223 | 224 | accuracies.append(accuracy_score(data_out_test, data_out_pred)) 225 | 226 | return sum(accuracies) / len(accuracies) 227 | 228 | 229 | def test_data_regression(df:pandas.DataFrame, df_org:pandas.DataFrame, 230 | k:int = 5, 231 | kernel:str = "linear", 232 | data_out=False): 233 | # df -> Imputed Dataset 234 | # df_org -> Orginal Dataset 235 | # k -> Cross Fold K 236 | 237 | if data_out: 238 | _data_out(df, df_org) 239 | 240 | test_size = 1.0 / k 241 | 242 | accuracies_rmse = [] 243 | accuracies_r2 = [] 244 | accuracies_mse = [] 245 | 246 | for cv_iter in range(k): 247 | imp_data_out = df["OUT"].to_numpy() 248 | imp_data_in = df.iloc[:, :-1].to_numpy() 249 | org_data_out = df_org["OUT"].to_numpy() 250 | org_data_in = df_org.iloc[:, :-1].to_numpy() 251 | 252 | test_indexes = [int((cv_iter * test_size) * df.shape[0]), int(((cv_iter + 1) * test_size) * df.shape[0])] 253 | data_in_test = org_data_in[test_indexes[0]: test_indexes[1],:] 254 | data_out_test = org_data_out[test_indexes[0]: test_indexes[1]] 255 | data_in_train = numpy.delete(imp_data_in, range(test_indexes[0], test_indexes[1]), 0) 256 | data_out_train = numpy.delete(imp_data_out, range(test_indexes[0], test_indexes[1]), 0) 257 | 258 | regr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2)) 259 | regr.fit(data_in_train, data_out_train) 260 | data_out_pred = regr.predict(data_in_test) 261 | 262 | accuracies_rmse.append(root_mean_squared_error(data_out_test, data_out_pred)) 263 | accuracies_mse.append(mean_squared_error(data_out_test, data_out_pred)) 264 | accuracies_r2.append(r2_score(data_out_test, data_out_pred)) 265 | 266 | return (sum(accuracies_rmse) / len(accuracies_rmse), 267 | sum(accuracies_mse) / len(accuracies_mse), 268 | sum(accuracies_r2) / len(accuracies_r2)) 269 | 270 | 271 | DATA_OUT_COUNTER = 0 272 | 273 | 274 | def _data_out(df: pandas.DataFrame, 275 | df_org: pandas.DataFrame): 276 | global DATA_OUT_COUNTER 277 | df.to_csv("temp/nanned" + str(DATA_OUT_COUNTER) + ".csv") 278 | df_org.to_csv("temp/org" + str(DATA_OUT_COUNTER) + ".csv") 279 | 280 | from typing import List 281 | 282 | def sum_results(res_list: List[dict]): 283 | summed_dict = {} 284 | for ress in res_list: 285 | for re_dict_key in ress.keys(): 286 | if re_dict_key not in summed_dict.keys(): 287 | summed_dict[re_dict_key] = [ress[re_dict_key]] 288 | else: 289 | summed_dict[re_dict_key].append(ress[re_dict_key]) 290 | 291 | for keyy in summed_dict.keys(): 292 | summed_dict[keyy] = sum(summed_dict[keyy]) / len(summed_dict[keyy]) 293 | return summed_dict 294 | 295 | def plot_results(data_in: List[dict]): 296 | data = sum_results(data_in) 297 | 298 | names = list(data.keys()) 299 | values = list(data.values()) 300 | 301 | plotter.bar(range(len(data)), values, tick_label=names) 302 | plotter.show() -------------------------------------------------------------------------------- /Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024/Uygulama_2/data/car_prices.csv: -------------------------------------------------------------------------------- 1 | car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,carwidth,carheight,curbweight,enginetype,cylindernumber,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price 2 | 1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,2548,dohc,four,130,mpfi,3.47,2.68,9,111,5000,21,27,13495 3 | 2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,2548,dohc,four,130,mpfi,3.47,2.68,9,111,5000,21,27,16500 4 | 3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,171.2,65.5,52.4,2823,ohcv,six,152,mpfi,2.68,3.47,9,154,5000,19,26,16500 5 | 4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,176.6,66.2,54.3,2337,ohc,four,109,mpfi,3.19,3.4,10,102,5500,24,30,13950 6 | 5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,176.6,66.4,54.3,2824,ohc,five,136,mpfi,3.19,3.4,8,115,5500,18,22,17450 7 | 6,2,audi fox,gas,std,two,sedan,fwd,front,99.8,177.3,66.3,53.1,2507,ohc,five,136,mpfi,3.19,3.4,8.5,110,5500,19,25,15250 8 | 7,1,audi 100ls,gas,std,four,sedan,fwd,front,105.8,192.7,71.4,55.7,2844,ohc,five,136,mpfi,3.19,3.4,8.5,110,5500,19,25,17710 9 | 8,1,audi 5000,gas,std,four,wagon,fwd,front,105.8,192.7,71.4,55.7,2954,ohc,five,136,mpfi,3.19,3.4,8.5,110,5500,19,25,18920 10 | 9,1,audi 4000,gas,turbo,four,sedan,fwd,front,105.8,192.7,71.4,55.9,3086,ohc,five,131,mpfi,3.13,3.4,8.3,140,5500,17,20,23875 11 | 10,0,audi 5000s (diesel),gas,turbo,two,hatchback,4wd,front,99.5,178.2,67.9,52,3053,ohc,five,131,mpfi,3.13,3.4,7,160,5500,16,22,17859.167 12 | 11,2,bmw 320i,gas,std,two,sedan,rwd,front,101.2,176.8,64.8,54.3,2395,ohc,four,108,mpfi,3.5,2.8,8.8,101,5800,23,29,16430 13 | 12,0,bmw 320i,gas,std,four,sedan,rwd,front,101.2,176.8,64.8,54.3,2395,ohc,four,108,mpfi,3.5,2.8,8.8,101,5800,23,29,16925 14 | 13,0,bmw x1,gas,std,two,sedan,rwd,front,101.2,176.8,64.8,54.3,2710,ohc,six,164,mpfi,3.31,3.19,9,121,4250,21,28,20970 15 | 14,0,bmw x3,gas,std,four,sedan,rwd,front,101.2,176.8,64.8,54.3,2765,ohc,six,164,mpfi,3.31,3.19,9,121,4250,21,28,21105 16 | 15,1,bmw z4,gas,std,four,sedan,rwd,front,103.5,189,66.9,55.7,3055,ohc,six,164,mpfi,3.31,3.19,9,121,4250,20,25,24565 17 | 16,0,bmw x4,gas,std,four,sedan,rwd,front,103.5,189,66.9,55.7,3230,ohc,six,209,mpfi,3.62,3.39,8,182,5400,16,22,30760 18 | 17,0,bmw x5,gas,std,two,sedan,rwd,front,103.5,193.8,67.9,53.7,3380,ohc,six,209,mpfi,3.62,3.39,8,182,5400,16,22,41315 19 | 18,0,bmw x3,gas,std,four,sedan,rwd,front,110,197,70.9,56.3,3505,ohc,six,209,mpfi,3.62,3.39,8,182,5400,15,20,36880 20 | 19,2,chevrolet impala,gas,std,two,hatchback,fwd,front,88.4,141.1,60.3,53.2,1488,l,three,61,2bbl,2.91,3.03,9.5,48,5100,47,53,5151 21 | 20,1,chevrolet monte carlo,gas,std,two,hatchback,fwd,front,94.5,155.9,63.6,52,1874,ohc,four,90,2bbl,3.03,3.11,9.6,70,5400,38,43,6295 22 | 21,0,chevrolet vega 2300,gas,std,four,sedan,fwd,front,94.5,158.8,63.6,52,1909,ohc,four,90,2bbl,3.03,3.11,9.6,70,5400,38,43,6575 23 | 22,1,dodge rampage,gas,std,two,hatchback,fwd,front,93.7,157.3,63.8,50.8,1876,ohc,four,90,2bbl,2.97,3.23,9.41,68,5500,37,41,5572 24 | 23,1,dodge challenger se,gas,std,two,hatchback,fwd,front,93.7,157.3,63.8,50.8,1876,ohc,four,90,2bbl,2.97,3.23,9.4,68,5500,31,38,6377 25 | 24,1,dodge d200,gas,turbo,two,hatchback,fwd,front,93.7,157.3,63.8,50.8,2128,ohc,four,98,mpfi,3.03,3.39,7.6,102,5500,24,30,7957 26 | 25,1,dodge monaco (sw),gas,std,four,hatchback,fwd,front,93.7,157.3,63.8,50.6,1967,ohc,four,90,2bbl,2.97,3.23,9.4,68,5500,31,38,6229 27 | 26,1,dodge colt hardtop,gas,std,four,sedan,fwd,front,93.7,157.3,63.8,50.6,1989,ohc,four,90,2bbl,2.97,3.23,9.4,68,5500,31,38,6692 28 | 27,1,dodge colt (sw),gas,std,four,sedan,fwd,front,93.7,157.3,63.8,50.6,1989,ohc,four,90,2bbl,2.97,3.23,9.4,68,5500,31,38,7609 29 | 28,1,dodge coronet custom,gas,turbo,two,sedan,fwd,front,93.7,157.3,63.8,50.6,2191,ohc,four,98,mpfi,3.03,3.39,7.6,102,5500,24,30,8558 30 | 29,-1,dodge dart custom,gas,std,four,wagon,fwd,front,103.3,174.6,64.6,59.8,2535,ohc,four,122,2bbl,3.34,3.46,8.5,88,5000,24,30,8921 31 | 30,3,dodge coronet custom (sw),gas,turbo,two,hatchback,fwd,front,95.9,173.2,66.3,50.2,2811,ohc,four,156,mfi,3.6,3.9,7,145,5000,19,24,12964 32 | 31,2,honda civic,gas,std,two,hatchback,fwd,front,86.6,144.6,63.9,50.8,1713,ohc,four,92,1bbl,2.91,3.41,9.6,58,4800,49,54,6479 33 | 32,2,honda civic cvcc,gas,std,two,hatchback,fwd,front,86.6,144.6,63.9,50.8,1819,ohc,four,92,1bbl,2.91,3.41,9.2,76,6000,31,38,6855 34 | 33,1,honda civic,gas,std,two,hatchback,fwd,front,93.7,150,64,52.6,1837,ohc,four,79,1bbl,2.91,3.07,10.1,60,5500,38,42,5399 35 | 34,1,honda accord cvcc,gas,std,two,hatchback,fwd,front,93.7,150,64,52.6,1940,ohc,four,92,1bbl,2.91,3.41,9.2,76,6000,30,34,6529 36 | 35,1,honda civic cvcc,gas,std,two,hatchback,fwd,front,93.7,150,64,52.6,1956,ohc,four,92,1bbl,2.91,3.41,9.2,76,6000,30,34,7129 37 | 36,0,honda accord lx,gas,std,four,sedan,fwd,front,96.5,163.4,64,54.5,2010,ohc,four,92,1bbl,2.91,3.41,9.2,76,6000,30,34,7295 38 | 37,0,honda civic 1500 gl,gas,std,four,wagon,fwd,front,96.5,157.1,63.9,58.3,2024,ohc,four,92,1bbl,2.92,3.41,9.2,76,6000,30,34,7295 39 | 38,0,honda accord,gas,std,two,hatchback,fwd,front,96.5,167.5,65.2,53.3,2236,ohc,four,110,1bbl,3.15,3.58,9,86,5800,27,33,7895 40 | 39,0,honda civic 1300,gas,std,two,hatchback,fwd,front,96.5,167.5,65.2,53.3,2289,ohc,four,110,1bbl,3.15,3.58,9,86,5800,27,33,9095 41 | 40,0,honda prelude,gas,std,four,sedan,fwd,front,96.5,175.4,65.2,54.1,2304,ohc,four,110,1bbl,3.15,3.58,9,86,5800,27,33,8845 42 | 41,0,honda accord,gas,std,four,sedan,fwd,front,96.5,175.4,62.5,54.1,2372,ohc,four,110,1bbl,3.15,3.58,9,86,5800,27,33,10295 43 | 42,0,honda civic,gas,std,four,sedan,fwd,front,96.5,175.4,65.2,54.1,2465,ohc,four,110,mpfi,3.15,3.58,9,101,5800,24,28,12945 44 | 43,1,honda civic (auto),gas,std,two,sedan,fwd,front,96.5,169.1,66,51,2293,ohc,four,110,2bbl,3.15,3.58,9.1,100,5500,25,31,10345 45 | 44,0,isuzu MU-X,gas,std,four,sedan,rwd,front,94.3,170.7,61.8,53.5,2337,ohc,four,111,2bbl,3.31,3.23,8.5,78,4800,24,29,6785 46 | 45,1,isuzu D-Max ,gas,std,two,sedan,fwd,front,94.5,155.9,63.6,52,1874,ohc,four,90,2bbl,3.03,3.11,9.6,70,5400,38,43,8916.5 47 | 46,0,isuzu D-Max V-Cross,gas,std,four,sedan,fwd,front,94.5,155.9,63.6,52,1909,ohc,four,90,2bbl,3.03,3.11,9.6,70,5400,38,43,8916.5 48 | 47,2,isuzu D-Max ,gas,std,two,hatchback,rwd,front,96,172.6,65.2,51.4,2734,ohc,four,119,spfi,3.43,3.23,9.2,90,5000,24,29,11048 49 | 48,0,jaguar xj,gas,std,four,sedan,rwd,front,113,199.6,69.6,52.8,4066,dohc,six,258,mpfi,3.63,4.17,8.1,176,4750,15,19,32250 50 | 49,0,jaguar xf,gas,std,four,sedan,rwd,front,113,199.6,69.6,52.8,4066,dohc,six,258,mpfi,3.63,4.17,8.1,176,4750,15,19,35550 51 | 50,0,jaguar xk,gas,std,two,sedan,rwd,front,102,191.7,70.6,47.8,3950,ohcv,twelve,326,mpfi,3.54,2.76,11.5,262,5000,13,17,36000 52 | 51,1,maxda rx3,gas,std,two,hatchback,fwd,front,93.1,159.1,64.2,54.1,1890,ohc,four,91,2bbl,3.03,3.15,9,68,5000,30,31,5195 53 | 52,1,maxda glc deluxe,gas,std,two,hatchback,fwd,front,93.1,159.1,64.2,54.1,1900,ohc,four,91,2bbl,3.03,3.15,9,68,5000,31,38,6095 54 | 53,1,mazda rx2 coupe,gas,std,two,hatchback,fwd,front,93.1,159.1,64.2,54.1,1905,ohc,four,91,2bbl,3.03,3.15,9,68,5000,31,38,6795 55 | 54,1,mazda rx-4,gas,std,four,sedan,fwd,front,93.1,166.8,64.2,54.1,1945,ohc,four,91,2bbl,3.03,3.15,9,68,5000,31,38,6695 56 | 55,1,mazda glc deluxe,gas,std,four,sedan,fwd,front,93.1,166.8,64.2,54.1,1950,ohc,four,91,2bbl,3.08,3.15,9,68,5000,31,38,7395 57 | 56,3,mazda 626,gas,std,two,hatchback,rwd,front,95.3,169,65.7,49.6,2380,rotor,two,70,4bbl,3.33,3.255,9.4,101,6000,17,23,10945 58 | 57,3,mazda glc,gas,std,two,hatchback,rwd,front,95.3,169,65.7,49.6,2380,rotor,two,70,4bbl,3.33,3.255,9.4,101,6000,17,23,11845 59 | 58,3,mazda rx-7 gs,gas,std,two,hatchback,rwd,front,95.3,169,65.7,49.6,2385,rotor,two,70,4bbl,3.33,3.255,9.4,101,6000,17,23,13645 60 | 59,3,mazda glc 4,gas,std,two,hatchback,rwd,front,95.3,169,65.7,49.6,2500,rotor,two,80,mpfi,3.33,3.255,9.4,135,6000,16,23,15645 61 | 60,1,mazda 626,gas,std,two,hatchback,fwd,front,98.8,177.8,66.5,53.7,2385,ohc,four,122,2bbl,3.39,3.39,8.6,84,4800,26,32,8845 62 | 61,0,mazda glc custom l,gas,std,four,sedan,fwd,front,98.8,177.8,66.5,55.5,2410,ohc,four,122,2bbl,3.39,3.39,8.6,84,4800,26,32,8495 63 | 62,1,mazda glc custom,gas,std,two,hatchback,fwd,front,98.8,177.8,66.5,53.7,2385,ohc,four,122,2bbl,3.39,3.39,8.6,84,4800,26,32,10595 64 | 63,0,mazda rx-4,gas,std,four,sedan,fwd,front,98.8,177.8,66.5,55.5,2410,ohc,four,122,2bbl,3.39,3.39,8.6,84,4800,26,32,10245 65 | 64,0,mazda glc deluxe,diesel,std,four,sedan,fwd,front,98.8,177.8,66.5,55.5,2443,ohc,four,122,idi,3.39,3.39,22.7,64,4650,36,42,10795 66 | 65,0,mazda 626,gas,std,four,hatchback,fwd,front,98.8,177.8,66.5,55.5,2425,ohc,four,122,2bbl,3.39,3.39,8.6,84,4800,26,32,11245 67 | 66,0,mazda glc,gas,std,four,sedan,rwd,front,104.9,175,66.1,54.4,2670,ohc,four,140,mpfi,3.76,3.16,8,120,5000,19,27,18280 68 | 67,0,mazda rx-7 gs,diesel,std,four,sedan,rwd,front,104.9,175,66.1,54.4,2700,ohc,four,134,idi,3.43,3.64,22,72,4200,31,39,18344 69 | 68,-1,buick electra 225 custom,diesel,turbo,four,sedan,rwd,front,110,190.9,70.3,56.5,3515,ohc,five,183,idi,3.58,3.64,21.5,123,4350,22,25,25552 70 | 69,-1,buick century luxus (sw),diesel,turbo,four,wagon,rwd,front,110,190.9,70.3,58.7,3750,ohc,five,183,idi,3.58,3.64,21.5,123,4350,22,25,28248 71 | 70,0,buick century,diesel,turbo,two,hardtop,rwd,front,106.7,187.5,70.3,54.9,3495,ohc,five,183,idi,3.58,3.64,21.5,123,4350,22,25,28176 72 | 71,-1,buick skyhawk,diesel,turbo,four,sedan,rwd,front,115.6,202.6,71.7,56.3,3770,ohc,five,183,idi,3.58,3.64,21.5,123,4350,22,25,31600 73 | 72,-1,buick opel isuzu deluxe,gas,std,four,sedan,rwd,front,115.6,202.6,71.7,56.5,3740,ohcv,eight,234,mpfi,3.46,3.1,8.3,155,4750,16,18,34184 74 | 73,3,buick skylark,gas,std,two,convertible,rwd,front,96.6,180.3,70.5,50.8,3685,ohcv,eight,234,mpfi,3.46,3.1,8.3,155,4750,16,18,35056 75 | 74,0,buick century special,gas,std,four,sedan,rwd,front,120.9,208.1,71.7,56.7,3900,ohcv,eight,308,mpfi,3.8,3.35,8,184,4500,14,16,40960 76 | 75,1,buick regal sport coupe (turbo),gas,std,two,hardtop,rwd,front,112,199.2,72,55.4,3715,ohcv,eight,304,mpfi,3.8,3.35,8,184,4500,14,16,45400 77 | 76,1,mercury cougar,gas,turbo,two,hatchback,rwd,front,102.7,178.4,68,54.8,2910,ohc,four,140,mpfi,3.78,3.12,8,175,5000,19,24,16503 78 | 77,2,mitsubishi mirage,gas,std,two,hatchback,fwd,front,93.7,157.3,64.4,50.8,1918,ohc,four,92,2bbl,2.97,3.23,9.4,68,5500,37,41,5389 79 | 78,2,mitsubishi lancer,gas,std,two,hatchback,fwd,front,93.7,157.3,64.4,50.8,1944,ohc,four,92,2bbl,2.97,3.23,9.4,68,5500,31,38,6189 80 | 79,2,mitsubishi outlander,gas,std,two,hatchback,fwd,front,93.7,157.3,64.4,50.8,2004,ohc,four,92,2bbl,2.97,3.23,9.4,68,5500,31,38,6669 81 | 80,1,mitsubishi g4,gas,turbo,two,hatchback,fwd,front,93,157.3,63.8,50.8,2145,ohc,four,98,spdi,3.03,3.39,7.6,102,5500,24,30,7689 82 | 81,3,mitsubishi mirage g4,gas,turbo,two,hatchback,fwd,front,96.3,173,65.4,49.4,2370,ohc,four,110,spdi,3.17,3.46,7.5,116,5500,23,30,9959 83 | 82,3,mitsubishi g4,gas,std,two,hatchback,fwd,front,96.3,173,65.4,49.4,2328,ohc,four,122,2bbl,3.35,3.46,8.5,88,5000,25,32,8499 84 | 83,3,mitsubishi outlander,gas,turbo,two,hatchback,fwd,front,95.9,173.2,66.3,50.2,2833,ohc,four,156,spdi,3.58,3.86,7,145,5000,19,24,12629 85 | 84,3,mitsubishi g4,gas,turbo,two,hatchback,fwd,front,95.9,173.2,66.3,50.2,2921,ohc,four,156,spdi,3.59,3.86,7,145,5000,19,24,14869 86 | 85,3,mitsubishi mirage g4,gas,turbo,two,hatchback,fwd,front,95.9,173.2,66.3,50.2,2926,ohc,four,156,spdi,3.59,3.86,7,145,5000,19,24,14489 87 | 86,1,mitsubishi montero,gas,std,four,sedan,fwd,front,96.3,172.4,65.4,51.6,2365,ohc,four,122,2bbl,3.35,3.46,8.5,88,5000,25,32,6989 88 | 87,1,mitsubishi pajero,gas,std,four,sedan,fwd,front,96.3,172.4,65.4,51.6,2405,ohc,four,122,2bbl,3.35,3.46,8.5,88,5000,25,32,8189 89 | 88,1,mitsubishi outlander,gas,turbo,four,sedan,fwd,front,96.3,172.4,65.4,51.6,2403,ohc,four,110,spdi,3.17,3.46,7.5,116,5500,23,30,9279 90 | 89,-1,mitsubishi mirage g4,gas,std,four,sedan,fwd,front,96.3,172.4,65.4,51.6,2403,ohc,four,110,spdi,3.17,3.46,7.5,116,5500,23,30,9279 91 | 90,1,Nissan versa,gas,std,two,sedan,fwd,front,94.5,165.3,63.8,54.5,1889,ohc,four,97,2bbl,3.15,3.29,9.4,69,5200,31,37,5499 92 | 91,1,nissan gt-r,diesel,std,two,sedan,fwd,front,94.5,165.3,63.8,54.5,2017,ohc,four,103,idi,2.99,3.47,21.9,55,4800,45,50,7099 93 | 92,1,nissan rogue,gas,std,two,sedan,fwd,front,94.5,165.3,63.8,54.5,1918,ohc,four,97,2bbl,3.15,3.29,9.4,69,5200,31,37,6649 94 | 93,1,nissan latio,gas,std,four,sedan,fwd,front,94.5,165.3,63.8,54.5,1938,ohc,four,97,2bbl,3.15,3.29,9.4,69,5200,31,37,6849 95 | 94,1,nissan titan,gas,std,four,wagon,fwd,front,94.5,170.2,63.8,53.5,2024,ohc,four,97,2bbl,3.15,3.29,9.4,69,5200,31,37,7349 96 | 95,1,nissan leaf,gas,std,two,sedan,fwd,front,94.5,165.3,63.8,54.5,1951,ohc,four,97,2bbl,3.15,3.29,9.4,69,5200,31,37,7299 97 | 96,1,nissan juke,gas,std,two,hatchback,fwd,front,94.5,165.6,63.8,53.3,2028,ohc,four,97,2bbl,3.15,3.29,9.4,69,5200,31,37,7799 98 | 97,1,nissan latio,gas,std,four,sedan,fwd,front,94.5,165.3,63.8,54.5,1971,ohc,four,97,2bbl,3.15,3.29,9.4,69,5200,31,37,7499 99 | 98,1,nissan note,gas,std,four,wagon,fwd,front,94.5,170.2,63.8,53.5,2037,ohc,four,97,2bbl,3.15,3.29,9.4,69,5200,31,37,7999 100 | 99,2,nissan clipper,gas,std,two,hardtop,fwd,front,95.1,162.4,63.8,53.3,2008,ohc,four,97,2bbl,3.15,3.29,9.4,69,5200,31,37,8249 101 | 100,0,nissan rogue,gas,std,four,hatchback,fwd,front,97.2,173.4,65.2,54.7,2324,ohc,four,120,2bbl,3.33,3.47,8.5,97,5200,27,34,8949 102 | 101,0,nissan nv200,gas,std,four,sedan,fwd,front,97.2,173.4,65.2,54.7,2302,ohc,four,120,2bbl,3.33,3.47,8.5,97,5200,27,34,9549 103 | 102,0,nissan dayz,gas,std,four,sedan,fwd,front,100.4,181.7,66.5,55.1,3095,ohcv,six,181,mpfi,3.43,3.27,9,152,5200,17,22,13499 104 | 103,0,nissan fuga,gas,std,four,wagon,fwd,front,100.4,184.6,66.5,56.1,3296,ohcv,six,181,mpfi,3.43,3.27,9,152,5200,17,22,14399 105 | 104,0,nissan otti,gas,std,four,sedan,fwd,front,100.4,184.6,66.5,55.1,3060,ohcv,six,181,mpfi,3.43,3.27,9,152,5200,19,25,13499 106 | 105,3,nissan teana,gas,std,two,hatchback,rwd,front,91.3,170.7,67.9,49.7,3071,ohcv,six,181,mpfi,3.43,3.27,9,160,5200,19,25,17199 107 | 106,3,nissan kicks,gas,turbo,two,hatchback,rwd,front,91.3,170.7,67.9,49.7,3139,ohcv,six,181,mpfi,3.43,3.27,7.8,200,5200,17,23,19699 108 | 107,1,nissan clipper,gas,std,two,hatchback,rwd,front,99.2,178.5,67.9,49.7,3139,ohcv,six,181,mpfi,3.43,3.27,9,160,5200,19,25,18399 109 | 108,0,peugeot 504,gas,std,four,sedan,rwd,front,107.9,186.7,68.4,56.7,3020,l,four,120,mpfi,3.46,3.19,8.4,97,5000,19,24,11900 110 | 109,0,peugeot 304,diesel,turbo,four,sedan,rwd,front,107.9,186.7,68.4,56.7,3197,l,four,152,idi,3.7,3.52,21,95,4150,28,33,13200 111 | 110,0,peugeot 504 (sw),gas,std,four,wagon,rwd,front,114.2,198.9,68.4,58.7,3230,l,four,120,mpfi,3.46,3.19,8.4,97,5000,19,24,12440 112 | 111,0,peugeot 504,diesel,turbo,four,wagon,rwd,front,114.2,198.9,68.4,58.7,3430,l,four,152,idi,3.7,3.52,21,95,4150,25,25,13860 113 | 112,0,peugeot 504,gas,std,four,sedan,rwd,front,107.9,186.7,68.4,56.7,3075,l,four,120,mpfi,3.46,2.19,8.4,95,5000,19,24,15580 114 | 113,0,peugeot 604sl,diesel,turbo,four,sedan,rwd,front,107.9,186.7,68.4,56.7,3252,l,four,152,idi,3.7,3.52,21,95,4150,28,33,16900 115 | 114,0,peugeot 504,gas,std,four,wagon,rwd,front,114.2,198.9,68.4,56.7,3285,l,four,120,mpfi,3.46,2.19,8.4,95,5000,19,24,16695 116 | 115,0,peugeot 505s turbo diesel,diesel,turbo,four,wagon,rwd,front,114.2,198.9,68.4,58.7,3485,l,four,152,idi,3.7,3.52,21,95,4150,25,25,17075 117 | 116,0,peugeot 504,gas,std,four,sedan,rwd,front,107.9,186.7,68.4,56.7,3075,l,four,120,mpfi,3.46,3.19,8.4,97,5000,19,24,16630 118 | 117,0,peugeot 504,diesel,turbo,four,sedan,rwd,front,107.9,186.7,68.4,56.7,3252,l,four,152,idi,3.7,3.52,21,95,4150,28,33,17950 119 | 118,0,peugeot 604sl,gas,turbo,four,sedan,rwd,front,108,186.7,68.3,56,3130,l,four,134,mpfi,3.61,3.21,7,142,5600,18,24,18150 120 | 119,1,plymouth fury iii,gas,std,two,hatchback,fwd,front,93.7,157.3,63.8,50.8,1918,ohc,four,90,2bbl,2.97,3.23,9.4,68,5500,37,41,5572 121 | 120,1,plymouth cricket,gas,turbo,two,hatchback,fwd,front,93.7,157.3,63.8,50.8,2128,ohc,four,98,spdi,3.03,3.39,7.6,102,5500,24,30,7957 122 | 121,1,plymouth fury iii,gas,std,four,hatchback,fwd,front,93.7,157.3,63.8,50.6,1967,ohc,four,90,2bbl,2.97,3.23,9.4,68,5500,31,38,6229 123 | 122,1,plymouth satellite custom (sw),gas,std,four,sedan,fwd,front,93.7,167.3,63.8,50.8,1989,ohc,four,90,2bbl,2.97,3.23,9.4,68,5500,31,38,6692 124 | 123,1,plymouth fury gran sedan,gas,std,four,sedan,fwd,front,93.7,167.3,63.8,50.8,2191,ohc,four,98,2bbl,2.97,3.23,9.4,68,5500,31,38,7609 125 | 124,-1,plymouth valiant,gas,std,four,wagon,fwd,front,103.3,174.6,64.6,59.8,2535,ohc,four,122,2bbl,3.35,3.46,8.5,88,5000,24,30,8921 126 | 125,3,plymouth duster,gas,turbo,two,hatchback,rwd,front,95.9,173.2,66.3,50.2,2818,ohc,four,156,spdi,3.59,3.86,7,145,5000,19,24,12764 127 | 126,3,porsche macan,gas,std,two,hatchback,rwd,front,94.5,168.9,68.3,50.2,2778,ohc,four,151,mpfi,3.94,3.11,9.5,143,5500,19,27,22018 128 | 127,3,porcshce panamera,gas,std,two,hardtop,rwd,rear,89.5,168.9,65,51.6,2756,ohcf,six,194,mpfi,3.74,2.9,9.5,207,5900,17,25,32528 129 | 128,3,porsche cayenne,gas,std,two,hardtop,rwd,rear,89.5,168.9,65,51.6,2756,ohcf,six,194,mpfi,3.74,2.9,9.5,207,5900,17,25,34028 130 | 129,3,porsche boxter,gas,std,two,convertible,rwd,rear,89.5,168.9,65,51.6,2800,ohcf,six,194,mpfi,3.74,2.9,9.5,207,5900,17,25,37028 131 | 130,1,porsche cayenne,gas,std,two,hatchback,rwd,front,98.4,175.7,72.3,50.5,3366,dohcv,eight,203,mpfi,3.94,3.11,10,288,5750,17,28,31400.5 132 | 131,0,renault 12tl,gas,std,four,wagon,fwd,front,96.1,181.5,66.5,55.2,2579,ohc,four,132,mpfi,3.46,3.9,8.7,90,5100,23,31,9295 133 | 132,2,renault 5 gtl,gas,std,two,hatchback,fwd,front,96.1,176.8,66.6,50.5,2460,ohc,four,132,mpfi,3.46,3.9,8.7,90,5100,23,31,9895 134 | 133,3,saab 99e,gas,std,two,hatchback,fwd,front,99.1,186.6,66.5,56.1,2658,ohc,four,121,mpfi,3.54,3.07,9.31,110,5250,21,28,11850 135 | 134,2,saab 99le,gas,std,four,sedan,fwd,front,99.1,186.6,66.5,56.1,2695,ohc,four,121,mpfi,3.54,3.07,9.3,110,5250,21,28,12170 136 | 135,3,saab 99le,gas,std,two,hatchback,fwd,front,99.1,186.6,66.5,56.1,2707,ohc,four,121,mpfi,2.54,2.07,9.3,110,5250,21,28,15040 137 | 136,2,saab 99gle,gas,std,four,sedan,fwd,front,99.1,186.6,66.5,56.1,2758,ohc,four,121,mpfi,3.54,3.07,9.3,110,5250,21,28,15510 138 | 137,3,saab 99gle,gas,turbo,two,hatchback,fwd,front,99.1,186.6,66.5,56.1,2808,dohc,four,121,mpfi,3.54,3.07,9,160,5500,19,26,18150 139 | 138,2,saab 99e,gas,turbo,four,sedan,fwd,front,99.1,186.6,66.5,56.1,2847,dohc,four,121,mpfi,3.54,3.07,9,160,5500,19,26,18620 140 | 139,2,subaru,gas,std,two,hatchback,fwd,front,93.7,156.9,63.4,53.7,2050,ohcf,four,97,2bbl,3.62,2.36,9,69,4900,31,36,5118 141 | 140,2,subaru dl,gas,std,two,hatchback,fwd,front,93.7,157.9,63.6,53.7,2120,ohcf,four,108,2bbl,3.62,2.64,8.7,73,4400,26,31,7053 142 | 141,2,subaru dl,gas,std,two,hatchback,4wd,front,93.3,157.3,63.8,55.7,2240,ohcf,four,108,2bbl,3.62,2.64,8.7,73,4400,26,31,7603 143 | 142,0,subaru,gas,std,four,sedan,fwd,front,97.2,172,65.4,52.5,2145,ohcf,four,108,2bbl,3.62,2.64,9.5,82,4800,32,37,7126 144 | 143,0,subaru brz,gas,std,four,sedan,fwd,front,97.2,172,65.4,52.5,2190,ohcf,four,108,2bbl,3.62,2.64,9.5,82,4400,28,33,7775 145 | 144,0,subaru baja,gas,std,four,sedan,fwd,front,97.2,172,65.4,52.5,2340,ohcf,four,108,mpfi,3.62,2.64,9,94,5200,26,32,9960 146 | 145,0,subaru r1,gas,std,four,sedan,4wd,front,97,172,65.4,54.3,2385,ohcf,four,108,2bbl,3.62,2.64,9,82,4800,24,25,9233 147 | 146,0,subaru r2,gas,turbo,four,sedan,4wd,front,97,172,65.4,54.3,2510,ohcf,four,108,mpfi,3.62,2.64,7.7,111,4800,24,29,11259 148 | 147,0,subaru trezia,gas,std,four,wagon,fwd,front,97,173.5,65.4,53,2290,ohcf,four,108,2bbl,3.62,2.64,9,82,4800,28,32,7463 149 | 148,0,subaru tribeca,gas,std,four,wagon,fwd,front,97,173.5,65.4,53,2455,ohcf,four,108,mpfi,3.62,2.64,9,94,5200,25,31,10198 150 | 149,0,subaru dl,gas,std,four,wagon,4wd,front,96.9,173.6,65.4,54.9,2420,ohcf,four,108,2bbl,3.62,2.64,9,82,4800,23,29,8013 151 | 150,0,subaru dl,gas,turbo,four,wagon,4wd,front,96.9,173.6,65.4,54.9,2650,ohcf,four,108,mpfi,3.62,2.64,7.7,111,4800,23,23,11694 152 | 151,1,toyota corona mark ii,gas,std,two,hatchback,fwd,front,95.7,158.7,63.6,54.5,1985,ohc,four,92,2bbl,3.05,3.03,9,62,4800,35,39,5348 153 | 152,1,toyota corona,gas,std,two,hatchback,fwd,front,95.7,158.7,63.6,54.5,2040,ohc,four,92,2bbl,3.05,3.03,9,62,4800,31,38,6338 154 | 153,1,toyota corolla 1200,gas,std,four,hatchback,fwd,front,95.7,158.7,63.6,54.5,2015,ohc,four,92,2bbl,3.05,3.03,9,62,4800,31,38,6488 155 | 154,0,toyota corona hardtop,gas,std,four,wagon,fwd,front,95.7,169.7,63.6,59.1,2280,ohc,four,92,2bbl,3.05,3.03,9,62,4800,31,37,6918 156 | 155,0,toyota corolla 1600 (sw),gas,std,four,wagon,4wd,front,95.7,169.7,63.6,59.1,2290,ohc,four,92,2bbl,3.05,3.03,9,62,4800,27,32,7898 157 | 156,0,toyota carina,gas,std,four,wagon,4wd,front,95.7,169.7,63.6,59.1,3110,ohc,four,92,2bbl,3.05,3.03,9,62,4800,27,32,8778 158 | 157,0,toyota mark ii,gas,std,four,sedan,fwd,front,95.7,166.3,64.4,53,2081,ohc,four,98,2bbl,3.19,3.03,9,70,4800,30,37,6938 159 | 158,0,toyota corolla 1200,gas,std,four,hatchback,fwd,front,95.7,166.3,64.4,52.8,2109,ohc,four,98,2bbl,3.19,3.03,9,70,4800,30,37,7198 160 | 159,0,toyota corona,diesel,std,four,sedan,fwd,front,95.7,166.3,64.4,53,2275,ohc,four,110,idi,3.27,3.35,22.5,56,4500,34,36,7898 161 | 160,0,toyota corolla,diesel,std,four,hatchback,fwd,front,95.7,166.3,64.4,52.8,2275,ohc,four,110,idi,3.27,3.35,22.5,56,4500,38,47,7788 162 | 161,0,toyota corona,gas,std,four,sedan,fwd,front,95.7,166.3,64.4,53,2094,ohc,four,98,2bbl,3.19,3.03,9,70,4800,38,47,7738 163 | 162,0,toyota corolla,gas,std,four,hatchback,fwd,front,95.7,166.3,64.4,52.8,2122,ohc,four,98,2bbl,3.19,3.03,9,70,4800,28,34,8358 164 | 163,0,toyota mark ii,gas,std,four,sedan,fwd,front,95.7,166.3,64.4,52.8,2140,ohc,four,98,2bbl,3.19,3.03,9,70,4800,28,34,9258 165 | 164,1,toyota corolla liftback,gas,std,two,sedan,rwd,front,94.5,168.7,64,52.6,2169,ohc,four,98,2bbl,3.19,3.03,9,70,4800,29,34,8058 166 | 165,1,toyota corona,gas,std,two,hatchback,rwd,front,94.5,168.7,64,52.6,2204,ohc,four,98,2bbl,3.19,3.03,9,70,4800,29,34,8238 167 | 166,1,toyota celica gt liftback,gas,std,two,sedan,rwd,front,94.5,168.7,64,52.6,2265,dohc,four,98,mpfi,3.24,3.08,9.4,112,6600,26,29,9298 168 | 167,1,toyota corolla tercel,gas,std,two,hatchback,rwd,front,94.5,168.7,64,52.6,2300,dohc,four,98,mpfi,3.24,3.08,9.4,112,6600,26,29,9538 169 | 168,2,toyota corona liftback,gas,std,two,hardtop,rwd,front,98.4,176.2,65.6,52,2540,ohc,four,146,mpfi,3.62,3.5,9.3,116,4800,24,30,8449 170 | 169,2,toyota corolla,gas,std,two,hardtop,rwd,front,98.4,176.2,65.6,52,2536,ohc,four,146,mpfi,3.62,3.5,9.3,116,4800,24,30,9639 171 | 170,2,toyota starlet,gas,std,two,hatchback,rwd,front,98.4,176.2,65.6,52,2551,ohc,four,146,mpfi,3.62,3.5,9.3,116,4800,24,30,9989 172 | 171,2,toyota tercel,gas,std,two,hardtop,rwd,front,98.4,176.2,65.6,52,2679,ohc,four,146,mpfi,3.62,3.5,9.3,116,4800,24,30,11199 173 | 172,2,toyota corolla,gas,std,two,hatchback,rwd,front,98.4,176.2,65.6,52,2714,ohc,four,146,mpfi,3.62,3.5,9.3,116,4800,24,30,11549 174 | 173,2,toyota cressida,gas,std,two,convertible,rwd,front,98.4,176.2,65.6,53,2975,ohc,four,146,mpfi,3.62,3.5,9.3,116,4800,24,30,17669 175 | 174,-1,toyota corolla,gas,std,four,sedan,fwd,front,102.4,175.6,66.5,54.9,2326,ohc,four,122,mpfi,3.31,3.54,8.7,92,4200,29,34,8948 176 | 175,-1,toyota celica gt,diesel,turbo,four,sedan,fwd,front,102.4,175.6,66.5,54.9,2480,ohc,four,110,idi,3.27,3.35,22.5,73,4500,30,33,10698 177 | 176,-1,toyota corona,gas,std,four,hatchback,fwd,front,102.4,175.6,66.5,53.9,2414,ohc,four,122,mpfi,3.31,3.54,8.7,92,4200,27,32,9988 178 | 177,-1,toyota corolla,gas,std,four,sedan,fwd,front,102.4,175.6,66.5,54.9,2414,ohc,four,122,mpfi,3.31,3.54,8.7,92,4200,27,32,10898 179 | 178,-1,toyota mark ii,gas,std,four,hatchback,fwd,front,102.4,175.6,66.5,53.9,2458,ohc,four,122,mpfi,3.31,3.54,8.7,92,4200,27,32,11248 180 | 179,3,toyota corolla liftback,gas,std,two,hatchback,rwd,front,102.9,183.5,67.7,52,2976,dohc,six,171,mpfi,3.27,3.35,9.3,161,5200,20,24,16558 181 | 180,3,toyota corona,gas,std,two,hatchback,rwd,front,102.9,183.5,67.7,52,3016,dohc,six,171,mpfi,3.27,3.35,9.3,161,5200,19,24,15998 182 | 181,-1,toyota starlet,gas,std,four,sedan,rwd,front,104.5,187.8,66.5,54.1,3131,dohc,six,171,mpfi,3.27,3.35,9.2,156,5200,20,24,15690 183 | 182,-1,toyouta tercel,gas,std,four,wagon,rwd,front,104.5,187.8,66.5,54.1,3151,dohc,six,161,mpfi,3.27,3.35,9.2,156,5200,19,24,15750 184 | 183,2,vokswagen rabbit,diesel,std,two,sedan,fwd,front,97.3,171.7,65.5,55.7,2261,ohc,four,97,idi,3.01,3.4,23,52,4800,37,46,7775 185 | 184,2,volkswagen 1131 deluxe sedan,gas,std,two,sedan,fwd,front,97.3,171.7,65.5,55.7,2209,ohc,four,109,mpfi,3.19,3.4,9,85,5250,27,34,7975 186 | 185,2,volkswagen model 111,diesel,std,four,sedan,fwd,front,97.3,171.7,65.5,55.7,2264,ohc,four,97,idi,3.01,3.4,23,52,4800,37,46,7995 187 | 186,2,volkswagen type 3,gas,std,four,sedan,fwd,front,97.3,171.7,65.5,55.7,2212,ohc,four,109,mpfi,3.19,3.4,9,85,5250,27,34,8195 188 | 187,2,volkswagen 411 (sw),gas,std,four,sedan,fwd,front,97.3,171.7,65.5,55.7,2275,ohc,four,109,mpfi,3.19,3.4,9,85,5250,27,34,8495 189 | 188,2,volkswagen super beetle,diesel,turbo,four,sedan,fwd,front,97.3,171.7,65.5,55.7,2319,ohc,four,97,idi,3.01,3.4,23,68,4500,37,42,9495 190 | 189,2,volkswagen dasher,gas,std,four,sedan,fwd,front,97.3,171.7,65.5,55.7,2300,ohc,four,109,mpfi,3.19,3.4,10,100,5500,26,32,9995 191 | 190,3,vw dasher,gas,std,two,convertible,fwd,front,94.5,159.3,64.2,55.6,2254,ohc,four,109,mpfi,3.19,3.4,8.5,90,5500,24,29,11595 192 | 191,3,vw rabbit,gas,std,two,hatchback,fwd,front,94.5,165.7,64,51.4,2221,ohc,four,109,mpfi,3.19,3.4,8.5,90,5500,24,29,9980 193 | 192,0,volkswagen rabbit,gas,std,four,sedan,fwd,front,100.4,180.2,66.9,55.1,2661,ohc,five,136,mpfi,3.19,3.4,8.5,110,5500,19,24,13295 194 | 193,0,volkswagen rabbit custom,diesel,turbo,four,sedan,fwd,front,100.4,180.2,66.9,55.1,2579,ohc,four,97,idi,3.01,3.4,23,68,4500,33,38,13845 195 | 194,0,volkswagen dasher,gas,std,four,wagon,fwd,front,100.4,183.1,66.9,55.1,2563,ohc,four,109,mpfi,3.19,3.4,9,88,5500,25,31,12290 196 | 195,-2,volvo 145e (sw),gas,std,four,sedan,rwd,front,104.3,188.8,67.2,56.2,2912,ohc,four,141,mpfi,3.78,3.15,9.5,114,5400,23,28,12940 197 | 196,-1,volvo 144ea,gas,std,four,wagon,rwd,front,104.3,188.8,67.2,57.5,3034,ohc,four,141,mpfi,3.78,3.15,9.5,114,5400,23,28,13415 198 | 197,-2,volvo 244dl,gas,std,four,sedan,rwd,front,104.3,188.8,67.2,56.2,2935,ohc,four,141,mpfi,3.78,3.15,9.5,114,5400,24,28,15985 199 | 198,-1,volvo 245,gas,std,four,wagon,rwd,front,104.3,188.8,67.2,57.5,3042,ohc,four,141,mpfi,3.78,3.15,9.5,114,5400,24,28,16515 200 | 199,-2,volvo 264gl,gas,turbo,four,sedan,rwd,front,104.3,188.8,67.2,56.2,3045,ohc,four,130,mpfi,3.62,3.15,7.5,162,5100,17,22,18420 201 | 200,-1,volvo diesel,gas,turbo,four,wagon,rwd,front,104.3,188.8,67.2,57.5,3157,ohc,four,130,mpfi,3.62,3.15,7.5,162,5100,17,22,18950 202 | 201,-1,volvo 145e (sw),gas,std,four,sedan,rwd,front,109.1,188.8,68.9,55.5,2952,ohc,four,141,mpfi,3.78,3.15,9.5,114,5400,23,28,16845 203 | 202,-1,volvo 144ea,gas,turbo,four,sedan,rwd,front,109.1,188.8,68.8,55.5,3049,ohc,four,141,mpfi,3.78,3.15,8.7,160,5300,19,25,19045 204 | 203,-1,volvo 244dl,gas,std,four,sedan,rwd,front,109.1,188.8,68.9,55.5,3012,ohcv,six,173,mpfi,3.58,2.87,8.8,134,5500,18,23,21485 205 | 204,-1,volvo 246,diesel,turbo,four,sedan,rwd,front,109.1,188.8,68.9,55.5,3217,ohc,six,145,idi,3.01,3.4,23,106,4800,26,27,22470 206 | 205,-1,volvo 264gl,gas,turbo,four,sedan,rwd,front,109.1,188.8,68.9,55.5,3062,ohc,four,141,mpfi,3.78,3.15,9.5,114,5400,19,25,22625 207 | -------------------------------------------------------------------------------- /Tematik_Egitimler/Makine_Ogrenmesi_Mayis_2024/Uygulama_2/reg_diamonds.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "id": "initial_id", 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "source": [ 10 | "import warnings\n", 11 | "warnings.filterwarnings('ignore')\n", 12 | "\n", 13 | "import numpy as np \n", 14 | "import pandas as pd \n", 15 | "import matplotlib.pyplot as plotter\n", 16 | "import seaborn as sns\n", 17 | "\n", 18 | "from sklearn.preprocessing import LabelEncoder\n", 19 | "from sklearn.model_selection import train_test_split\n", 20 | "from sklearn.preprocessing import StandardScaler\n", 21 | "from sklearn.pipeline import Pipeline\n", 22 | "\n", 23 | "from sklearn.linear_model import LinearRegression\n", 24 | "from sklearn. linear_model import Lasso\n", 25 | "from sklearn.tree import DecisionTreeRegressor\n", 26 | "from sklearn.ensemble import RandomForestRegressor\n", 27 | "from sklearn.neighbors import KNeighborsRegressor\n", 28 | "from xgboost import XGBRegressor\n", 29 | "\n", 30 | "from sklearn.model_selection import cross_val_score\n", 31 | "from sklearn.metrics import mean_squared_error\n", 32 | "from sklearn import metrics " 33 | ], 34 | "outputs": [], 35 | "execution_count": null 36 | }, 37 | { 38 | "cell_type": "code", 39 | "source": [ 40 | "data_df = pd.read_csv(\"./data/diamonds.csv\")\n", 41 | "data_df.sample(10)" 42 | ], 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "id": "743e17792969cf46", 47 | "outputs": [], 48 | "execution_count": null 49 | }, 50 | { 51 | "cell_type": "code", 52 | "source": [ 53 | "data_df.info()" 54 | ], 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "id": "7a5570159ae83d99", 59 | "outputs": [], 60 | "execution_count": null 61 | }, 62 | { 63 | "cell_type": "code", 64 | "source": [ 65 | "plotter.figure(figsize=(10,8))\n", 66 | "cols = [\"#A0522D\",\"#A52A2A\",\"#CD853F\",\"#F4A460\",\"#DEB887\"]\n", 67 | "ax = sns.violinplot(x=\"cut\",y=\"price\", data=data_df, palette=cols,scale= \"count\")\n", 68 | "ax.set_title(\"Diamond Cut for Price\", color=\"#774571\", fontsize = 20)\n", 69 | "ax.set_ylabel(\"Price\", color=\"#4e4c39\", fontsize = 15)\n", 70 | "ax.set_xlabel(\"Cut\", color=\"#4e4c39\", fontsize = 15)\n", 71 | "plotter.show()" 72 | ], 73 | "metadata": { 74 | "collapsed": false 75 | }, 76 | "id": "804aa2416a05940d", 77 | "outputs": [], 78 | "execution_count": null 79 | }, 80 | { 81 | "cell_type": "code", 82 | "source": [ 83 | "plotter.figure(figsize=(12,8))\n", 84 | "ax = sns.violinplot(x=\"color\",y=\"price\", data=data_df, palette=cols,scale= \"count\")\n", 85 | "ax.set_title(\"Diamond Colors for Price\", color=\"#774571\", fontsize = 20)\n", 86 | "ax.set_ylabel(\"Price\", color=\"#4e4c39\", fontsize = 15)\n", 87 | "ax.set_xlabel(\"Color\", color=\"#4e4c39\", fontsize = 15)\n", 88 | "plotter.show()" 89 | ], 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "id": "cb82efb874199a1c", 94 | "outputs": [], 95 | "execution_count": null 96 | }, 97 | { 98 | "cell_type": "code", 99 | "source": [ 100 | "plotter.figure(figsize=(13,8))\n", 101 | "ax = sns.violinplot(x=\"clarity\",y=\"price\", data=data_df, palette=cols,scale= \"count\")\n", 102 | "ax.set_title(\"Diamond Clarity for Price\", color=\"#774571\", fontsize = 20)\n", 103 | "ax.set_ylabel(\"Price\", color=\"#4e4c39\", fontsize = 15)\n", 104 | "ax.set_xlabel(\"Clarity\", color=\"#4e4c39\", fontsize = 15)\n", 105 | "plotter.show()" 106 | ], 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "id": "5a494793deff2c0d", 111 | "outputs": [], 112 | "execution_count": null 113 | }, 114 | { 115 | "metadata": {}, 116 | "cell_type": "code", 117 | "source": "data_df['clarity'].unique()", 118 | "id": "cfc20ae6a22560fc", 119 | "outputs": [], 120 | "execution_count": null 121 | }, 122 | { 123 | "metadata": {}, 124 | "cell_type": "code", 125 | "source": "data_df['color'].unique()", 126 | "id": "a1875cbf4612cca4", 127 | "outputs": [], 128 | "execution_count": null 129 | }, 130 | { 131 | "cell_type": "code", 132 | "source": [ 133 | "data_df.describe().T" 134 | ], 135 | "metadata": { 136 | "collapsed": false 137 | }, 138 | "id": "b739ccdc12c3e9b9", 139 | "outputs": [], 140 | "execution_count": null 141 | }, 142 | { 143 | "cell_type": "code", 144 | "source": [ 145 | "ax = sns.pairplot(data_df, hue= \"cut\", palette = cols)" 146 | ], 147 | "metadata": { 148 | "collapsed": false 149 | }, 150 | "id": "70a288d653feae6", 151 | "outputs": [], 152 | "execution_count": null 153 | }, 154 | { 155 | "cell_type": "code", 156 | "source": [ 157 | "lm = sns.lmplot(x=\"price\", y=\"y\", data=data_df, scatter_kws={\"color\": \"#BC8F8F\"}, line_kws={\"color\": \"#8B4513\"})\n", 158 | "plotter.title(\"Line Plot on Price vs 'y'\", color=\"#774571\", fontsize = 20)\n", 159 | "plotter.show()" 160 | ], 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "id": "e6a0d06087f8e577", 165 | "outputs": [], 166 | "execution_count": null 167 | }, 168 | { 169 | "cell_type": "code", 170 | "source": [ 171 | "lm = sns.lmplot(x=\"price\", y=\"z\", data=data_df, scatter_kws={\"color\": \"#BC8F8F\"}, line_kws={\"color\": \"#8B4513\"})\n", 172 | "plotter.title(\"Line Plot on Price vs 'z'\", color=\"#774571\", fontsize = 20)\n", 173 | "plotter.show()" 174 | ], 175 | "metadata": { 176 | "collapsed": false 177 | }, 178 | "id": "8c02ae3f0aadc1b5", 179 | "outputs": [], 180 | "execution_count": null 181 | }, 182 | { 183 | "cell_type": "code", 184 | "source": [ 185 | "lm = sns.lmplot(x=\"price\", y=\"depth\", data=data_df, scatter_kws={\"color\": \"#BC8F8F\"}, line_kws={\"color\": \"#8B4513\"})\n", 186 | "plotter.title(\"Line Plot on Price vs 'depth'\", color=\"#774571\", fontsize = 20)\n", 187 | "plotter.show()" 188 | ], 189 | "metadata": { 190 | "collapsed": false 191 | }, 192 | "id": "65d4d6b27abe2b0e", 193 | "outputs": [], 194 | "execution_count": null 195 | }, 196 | { 197 | "cell_type": "code", 198 | "source": [ 199 | "lm = sns.lmplot(x=\"price\", y=\"table\", data=data_df, scatter_kws={\"color\": \"#BC8F8F\"}, line_kws={\"color\": \"#8B4513\"})\n", 200 | "plotter.title(\"Line Plot on Price vs 'Table'\", color=\"#774571\", fontsize = 20)\n", 201 | "plotter.show()" 202 | ], 203 | "metadata": { 204 | "collapsed": false 205 | }, 206 | "id": "10f925bdb5fb340d", 207 | "outputs": [], 208 | "execution_count": null 209 | }, 210 | { 211 | "cell_type": "code", 212 | "source": [ 213 | "# Removing the feature \"Unnamed\"\n", 214 | "data_df = data_df.drop([\"Unnamed: 0\"], axis=1)\n", 215 | "data_df.shape" 216 | ], 217 | "metadata": { 218 | "collapsed": false 219 | }, 220 | "id": "1b4632a6b9c76cfc", 221 | "outputs": [], 222 | "execution_count": null 223 | }, 224 | { 225 | "cell_type": "code", 226 | "source": [ 227 | "# Removing the datapoints having min 0 value in either x, y or z features \n", 228 | "data_df = data_df.drop(data_df[data_df[\"x\"]==0].index)\n", 229 | "data_df = data_df.drop(data_df[data_df[\"y\"]==0].index)\n", 230 | "data_df = data_df.drop(data_df[data_df[\"z\"]==0].index)\n", 231 | "data_df.shape" 232 | ], 233 | "metadata": { 234 | "collapsed": false 235 | }, 236 | "id": "de25a847981e7683", 237 | "outputs": [], 238 | "execution_count": null 239 | }, 240 | { 241 | "cell_type": "code", 242 | "source": [ 243 | "# Dropping the outliers (since we have huge dataset) by defining appropriate measures across features \n", 244 | "data_df = data_df[(data_df[\"depth\"]<75)&(data_df[\"depth\"]>45)]\n", 245 | "data_df = data_df[(data_df[\"table\"]<80)&(data_df[\"table\"]>40)]\n", 246 | "data_df = data_df[(data_df[\"x\"]<40)]\n", 247 | "data_df = data_df[(data_df[\"y\"]<40)]\n", 248 | "data_df = data_df[(data_df[\"z\"]<40)&(data_df[\"z\"]>2)]\n", 249 | "data_df.shape " 250 | ], 251 | "metadata": { 252 | "collapsed": false 253 | }, 254 | "id": "78357bd081f5f3f0", 255 | "outputs": [], 256 | "execution_count": null 257 | }, 258 | { 259 | "cell_type": "code", 260 | "source": [ 261 | "# Making a copy to keep original data in its form intact\n", 262 | "data1 = data_df.copy()\n", 263 | "\n", 264 | "# Applying label encoder to columns with categorical data\n", 265 | "columns = ['cut','color','clarity']\n", 266 | "label_encoder = LabelEncoder()\n", 267 | "for col in columns:\n", 268 | " data1[col] = label_encoder.fit_transform(data1[col])\n", 269 | "data1.describe()" 270 | ], 271 | "metadata": { 272 | "collapsed": false 273 | }, 274 | "id": "bf245268582bb6b4", 275 | "outputs": [], 276 | "execution_count": null 277 | }, 278 | { 279 | "cell_type": "code", 280 | "source": [ 281 | "cmap = sns.diverging_palette(205, 133, 63, as_cmap=True)\n", 282 | "cols = ([\"#682F2F\", \"#9E726F\", \"#D6B2B1\", \"#B9C0C9\", \"#9F8A78\", \"#F3AB60\"])\n", 283 | "corrmat= data1.corr()\n", 284 | "f, ax = plotter.subplots(figsize=(15,12))\n", 285 | "sns.heatmap(corrmat,cmap=cols,annot=True)" 286 | ], 287 | "metadata": { 288 | "collapsed": false 289 | }, 290 | "id": "a9d4e454535e619c", 291 | "outputs": [], 292 | "execution_count": null 293 | }, 294 | { 295 | "cell_type": "code", 296 | "source": [ 297 | "# Defining the independent and dependent variables\n", 298 | "X= data1.drop([\"price\"],axis =1)\n", 299 | "y= data1[\"price\"]\n", 300 | "X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.20, random_state=25)" 301 | ], 302 | "metadata": { 303 | "collapsed": false 304 | }, 305 | "id": "8b1b6f0cef045e37", 306 | "outputs": [], 307 | "execution_count": null 308 | }, 309 | { 310 | "cell_type": "code", 311 | "source": [ 312 | "# Building pipelins of standard scaler and model for various regressors.\n", 313 | "\n", 314 | "pipeline_lr=Pipeline([(\"scalar1\",StandardScaler()),\n", 315 | " (\"lr\",LinearRegression())])\n", 316 | "\n", 317 | "pipeline_lasso=Pipeline([(\"scalar2\", StandardScaler()),\n", 318 | " (\"lasso\",Lasso())])\n", 319 | "\n", 320 | "pipeline_dt=Pipeline([(\"scalar3\",StandardScaler()),\n", 321 | " (\"dt\",DecisionTreeRegressor())])\n", 322 | "\n", 323 | "pipeline_rf=Pipeline([(\"scalar4\",StandardScaler()),\n", 324 | " (\"rf\",RandomForestRegressor())])\n", 325 | "\n", 326 | "\n", 327 | "pipeline_kn=Pipeline([(\"scalar5\",StandardScaler()),\n", 328 | " (\"kn\",KNeighborsRegressor())])\n", 329 | "\n", 330 | "\n", 331 | "pipeline_xgb=Pipeline([(\"scalar6\",StandardScaler()),\n", 332 | " (\"xgb\",XGBRegressor())])\n", 333 | "\n", 334 | "# List of all the pipelines\n", 335 | "pipelines = [pipeline_lr, pipeline_lasso, pipeline_dt, pipeline_rf, pipeline_kn, pipeline_xgb]\n", 336 | "\n", 337 | "# Dictionary of pipelines and model types for ease of reference\n", 338 | "pipeline_dict = {0: \"LinearRegression\", 1: \"Lasso\", 2: \"DecisionTree\", 3: \"RandomForest\",4: \"KNeighbors\", 5: \"XGBRegressor\"}\n", 339 | "\n", 340 | "# Fit the pipelines\n", 341 | "for pipe in pipelines:\n", 342 | " pipe.fit(X_train, y_train)" 343 | ], 344 | "metadata": { 345 | "collapsed": false 346 | }, 347 | "id": "4edb211dfa94c038", 348 | "outputs": [], 349 | "execution_count": null 350 | }, 351 | { 352 | "cell_type": "code", 353 | "source": [ 354 | "cv_results_rms = []\n", 355 | "for i, model in enumerate(pipelines):\n", 356 | " cv_score = cross_val_score(model, X_train,y_train,scoring=\"neg_root_mean_squared_error\", cv=12)\n", 357 | " cv_results_rms.append(cv_score)\n", 358 | " print(\"%s: %f \" % (pipeline_dict[i], -1 * cv_score.mean()))" 359 | ], 360 | "metadata": { 361 | "collapsed": false 362 | }, 363 | "id": "f8ffbd831940335b", 364 | "outputs": [], 365 | "execution_count": null 366 | }, 367 | { 368 | "cell_type": "code", 369 | "source": [ 370 | "# Model prediction on test data with XGBClassifier which gave us the least RMSE \n", 371 | "pred = pipeline_xgb.predict(X_test)\n", 372 | "print(\"R^2:\",metrics.r2_score(y_test, pred))\n", 373 | "print(\"Adjusted R^2:\",1 - (1-metrics.r2_score(y_test, pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))" 374 | ], 375 | "metadata": { 376 | "collapsed": false 377 | }, 378 | "id": "501fb6bb0a11cb2e", 379 | "execution_count": null, 380 | "outputs": [] 381 | } 382 | ], 383 | "metadata": { 384 | "kernelspec": { 385 | "display_name": "Python 3", 386 | "language": "python", 387 | "name": "python3" 388 | }, 389 | "language_info": { 390 | "codemirror_mode": { 391 | "name": "ipython", 392 | "version": 2 393 | }, 394 | "file_extension": ".py", 395 | "mimetype": "text/x-python", 396 | "name": "python", 397 | "nbconvert_exporter": "python", 398 | "pygments_lexer": "ipython2", 399 | "version": "2.7.6" 400 | } 401 | }, 402 | "nbformat": 4, 403 | "nbformat_minor": 5 404 | } 405 | -------------------------------------------------------------------------------- /Tematik_Egitimler/Makine_Ogrenmesi_Temmuz_2024/Regresyon/data/experience_salary_dataset: -------------------------------------------------------------------------------- 1 | YearsExperience,Salary 2 | 1.1,39343 3 | 1.3,46205 4 | 1.5,37731 5 | 2,43525 6 | 2.2,39891 7 | 2.9,56642 8 | 3,60150 9 | 3.2,54445 10 | 3.2,64445 11 | 3.7,57189 12 | 3.9,63218 13 | 4,55794 14 | 4,56957 15 | 4.1,57081 16 | 4.5,61111 17 | 4.9,67938 18 | 5.1,66029 19 | 5.3,83088 20 | 5.9,81363 21 | 6,93940 22 | 6.8,91738 23 | 7.1,98273 24 | 7.9,101302 25 | 8.2,113812 26 | 8.7,109431 27 | 9,105582 28 | 9.5,116969 29 | 9.6,112635 30 | 10.3,122391 31 | 10.5,121872 -------------------------------------------------------------------------------- /Tematik_Egitimler/Makine_Ogrenmesi_Temmuz_2024/Regresyon/data/experience_sale_dataset: -------------------------------------------------------------------------------- 1 | YearsExperience,HouseSales 2 | 3.2,27 3 | 8.2,30 4 | 5.7,31 5 | 7.5,36 6 | 1.5,16 7 | 1.1,15 8 | 6.75,36 9 | 8.5,31 10 | 3.2,19 11 | 3.9,22 12 | 0.1,9 13 | 1.0,12 14 | 0.4,6 15 | 4.4,31 16 | 1.4,19 -------------------------------------------------------------------------------- /Webinar/Webinar-II-VeriOnisleme/data_generator.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy 3 | 4 | from typing import List 5 | from scipy.stats import skewnorm 6 | from sklearn.datasets import make_blobs 7 | 8 | 9 | def generate_skewed_data(location: float = 1.0, 10 | skewness: float = 0.5, 11 | size: int = 10000, 12 | scale: float = 1, 13 | floor: float = 0.0): 14 | gen_data = skewnorm.rvs(a=skewness, 15 | loc=location, 16 | size=size, 17 | scale=1.0) 18 | gen_data = gen_data - min(gen_data) 19 | gen_data = gen_data / max(gen_data) 20 | gen_data = gen_data * scale 21 | gen_data = gen_data + floor 22 | return gen_data 23 | 24 | 25 | def add_noise(data_point: float, 26 | gaussian: [float, float]): 27 | mean_shift = (random.random() * gaussian[0]) * random.choice([-1, 1]) 28 | return numpy.random.normal(loc=data_point + mean_shift, 29 | scale=gaussian[1]) 30 | 31 | 32 | def generate_skewed_data_left(size: int = 10000): 33 | return numpy.power(generate_skewed_data(100.0, 34 | skewness=-5, 35 | size=size), 36 | 0.75) 37 | 38 | 39 | def generate_skewed_data_right(size: int = 10000): 40 | return numpy.power(generate_skewed_data(100.0, 41 | skewness=5, 42 | size=size), 43 | 1.0) 44 | 45 | 46 | def generate_extreme_skewed_data_right(size: int = 10000, 47 | scale: int = 1): 48 | return numpy.power(generate_skewed_data(100.0, 49 | skewness=5, 50 | size=size, 51 | scale=scale), 52 | 3.0) 53 | 54 | 55 | def generate_uniform_dataset(n_data_points=100, 56 | feature_range: int = 1.0, 57 | random_state: int = None): 58 | if random_state is None: 59 | random_state = int(random.random() * 100) 60 | return numpy.random.RandomState(random_state).uniform(0, 61 | feature_range, 62 | size=(n_data_points, 2)) 63 | 64 | 65 | def generate_blobbed_dataset(n_data_points=100, 66 | feature_range: int = 1.0, 67 | n_blobs: int = 3, 68 | blob_std: float = None, 69 | blob_centers: List[List[float]] = None, 70 | size_blobs: List[int] = None, 71 | random_state: int = None): 72 | if random_state is None: 73 | random_state = int(random.random() * 100) 74 | if size_blobs is None: 75 | size_blobs = [int(random.random() * 10) for x in range(n_blobs)] 76 | if blob_std is None: 77 | blob_std = random.random() * 0.2 * feature_range 78 | if blob_centers is None: 79 | blob_centers = [[random.random() * feature_range, 80 | random.random() * feature_range] for x in range(n_blobs)] 81 | blob_samples = [] 82 | for blob_counter in range(len(size_blobs)): 83 | blob_samples.append(n_data_points * size_blobs[blob_counter] // sum(size_blobs)) 84 | return make_blobs(n_samples=blob_samples, 85 | cluster_std=blob_std, 86 | centers=blob_centers, 87 | random_state=random_state)[0] 88 | -------------------------------------------------------------------------------- /Webinar/Webinar-II-VeriOnisleme/discretization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "initial_id", 7 | "metadata": { 8 | "collapsed": true 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "import numpy\n", 13 | "\n", 14 | "import matplotlib.pyplot as plotter\n", 15 | "from sklearn.preprocessing import KBinsDiscretizer\n", 16 | "\n", 17 | "from data_generator import generate_uniform_dataset, generate_blobbed_dataset" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "outputs": [], 23 | "source": [ 24 | "data_points = 200" 25 | ], 26 | "metadata": { 27 | "collapsed": false 28 | }, 29 | "id": "562985a6fb87b0e1", 30 | "execution_count": null 31 | }, 32 | { 33 | "cell_type": "code", 34 | "outputs": [], 35 | "source": [ 36 | "uniform_dataset = generate_uniform_dataset(n_data_points=data_points)" 37 | ], 38 | "metadata": { 39 | "collapsed": false 40 | }, 41 | "id": "c2b711da5b2a7343", 42 | "execution_count": null 43 | }, 44 | { 45 | "cell_type": "code", 46 | "outputs": [], 47 | "source": [ 48 | "# Show Dataset\n", 49 | "\n", 50 | "print(uniform_dataset)" 51 | ], 52 | "metadata": { 53 | "collapsed": false 54 | }, 55 | "id": "8881fe5a7e4bed79", 56 | "execution_count": null 57 | }, 58 | { 59 | "cell_type": "code", 60 | "outputs": [], 61 | "source": [ 62 | "arr_x = numpy.array(uniform_dataset[:, 0])\n", 63 | "arr_y = numpy.array(uniform_dataset[:, 1])\n", 64 | "\n", 65 | "plotter.scatter(x=arr_x, y = arr_y)" 66 | ], 67 | "metadata": { 68 | "collapsed": false 69 | }, 70 | "id": "1af8676fb37e4e04", 71 | "execution_count": null 72 | }, 73 | { 74 | "cell_type": "code", 75 | "outputs": [], 76 | "source": [ 77 | "blobbed_dataset = generate_blobbed_dataset(n_data_points=data_points,\n", 78 | " n_blobs=4,\n", 79 | " blob_std=0.1,\n", 80 | " blob_centers=[[0.7, 0.2], \n", 81 | " [0.2, 0.1], \n", 82 | " [0.3, 0.8], \n", 83 | " [0.9, 0.9]],\n", 84 | " size_blobs=[2, \n", 85 | " 3, \n", 86 | " 5, \n", 87 | " 7])" 88 | ], 89 | "metadata": { 90 | "collapsed": false 91 | }, 92 | "id": "1fbe4b965d08e77b", 93 | "execution_count": null 94 | }, 95 | { 96 | "cell_type": "code", 97 | "outputs": [], 98 | "source": [ 99 | "# Show Dataset\n", 100 | "\n", 101 | "print(blobbed_dataset)" 102 | ], 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "id": "f49208a53b300b58", 107 | "execution_count": null 108 | }, 109 | { 110 | "cell_type": "code", 111 | "outputs": [], 112 | "source": [ 113 | "arr_x = numpy.array(blobbed_dataset[:, 0])\n", 114 | "arr_y = numpy.array(blobbed_dataset[:, 1])\n", 115 | "\n", 116 | "plotter.scatter(x=arr_x, y = arr_y)" 117 | ], 118 | "metadata": { 119 | "collapsed": false 120 | }, 121 | "id": "e07f2fcf6cb88342", 122 | "execution_count": null 123 | }, 124 | { 125 | "cell_type": "code", 126 | "outputs": [], 127 | "source": [ 128 | "strategy_uniform = \"uniform\" \n", 129 | "strategy_quantile = \"quantile\" \n", 130 | "strategy_kmeans = \"kmeans\" \n", 131 | "\n", 132 | "# Use for quantile\n", 133 | "sub_sample_quantile = 200_000\n", 134 | "\n", 135 | "# Use for kmeans or uniform\n", 136 | "sub_sample_others = None" 137 | ], 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "id": "f5a88ccdb685e3d1", 142 | "execution_count": null 143 | }, 144 | { 145 | "cell_type": "code", 146 | "outputs": [], 147 | "source": [ 148 | "enc = KBinsDiscretizer(n_bins=4, \n", 149 | " encode=\"ordinal\", \n", 150 | " strategy=\"kmeans\", \n", 151 | " subsample=sub_sample_quantile)\n", 152 | "enc.fit(uniform_dataset)\n", 153 | "gd_data = enc.transform(uniform_dataset)" 154 | ], 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "id": "9e0babbfe43b50d6", 159 | "execution_count": null 160 | }, 161 | { 162 | "cell_type": "code", 163 | "outputs": [], 164 | "source": [ 165 | "print(gd_data)" 166 | ], 167 | "metadata": { 168 | "collapsed": false 169 | }, 170 | "id": "32c31b300525ad0b", 171 | "execution_count": null 172 | }, 173 | { 174 | "cell_type": "code", 175 | "outputs": [], 176 | "source": [ 177 | "uniform_dataset = generate_uniform_dataset(n_data_points=100)\n", 178 | "\n", 179 | "blobbed_dataset_0 = generate_blobbed_dataset(n_data_points=data_points,\n", 180 | " n_blobs=2,\n", 181 | " blob_std=0.1,\n", 182 | " blob_centers=[[0.1, 0.1], \n", 183 | " [0.9, 0.1]],\n", 184 | " size_blobs=[3, \n", 185 | " 5])\n", 186 | "\n", 187 | "blobbed_dataset_1 = generate_blobbed_dataset(n_data_points=data_points,\n", 188 | " n_blobs=3,\n", 189 | " blob_std=0.15,\n", 190 | " blob_centers=[[0.7, 0.2], \n", 191 | " [0.2, 0.1], \n", 192 | " [0.3, 0.8]],\n", 193 | " size_blobs=[2, \n", 194 | " 3, \n", 195 | " 12])" 196 | ], 197 | "metadata": { 198 | "collapsed": false 199 | }, 200 | "id": "deaff0798e3ae2aa", 201 | "execution_count": null 202 | }, 203 | { 204 | "cell_type": "code", 205 | "outputs": [], 206 | "source": [ 207 | "datasets = [uniform_dataset, \n", 208 | " blobbed_dataset_0, \n", 209 | " blobbed_dataset_1]\n", 210 | "\n", 211 | "strategies = [strategy_uniform,\n", 212 | " strategy_quantile,\n", 213 | " strategy_kmeans]\n", 214 | "\n", 215 | "n_bins = 2" 216 | ], 217 | "metadata": { 218 | "collapsed": false 219 | }, 220 | "id": "2ee1fc69302563f0", 221 | "execution_count": null 222 | }, 223 | { 224 | "cell_type": "code", 225 | "outputs": [], 226 | "source": [ 227 | "figure = plotter.figure(figsize=(14, 9))\n", 228 | "i = 1\n", 229 | "for ds_cnt, X in enumerate(datasets):\n", 230 | " ax = plotter.subplot(len(datasets), len(strategies) + 1, i)\n", 231 | " ax.scatter(X[:, 0], X[:, 1], edgecolors=\"k\")\n", 232 | " if ds_cnt == 0:\n", 233 | " ax.set_title(\"Input data\", size=14)\n", 234 | "\n", 235 | " xx, yy = numpy.meshgrid(\n", 236 | " numpy.linspace(X[:, 0].min(), X[:, 0].max(), 300),\n", 237 | " numpy.linspace(X[:, 1].min(), X[:, 1].max(), 300),\n", 238 | " )\n", 239 | " grid = numpy.c_[xx.ravel(), yy.ravel()]\n", 240 | "\n", 241 | " ax.set_xlim(xx.min(), xx.max())\n", 242 | " ax.set_ylim(yy.min(), yy.max())\n", 243 | " ax.set_xticks(())\n", 244 | " ax.set_yticks(())\n", 245 | "\n", 246 | " i += 1\n", 247 | " # transform the dataset with KBinsDiscretizer\n", 248 | " for strategy in strategies:\n", 249 | " enc = KBinsDiscretizer(\n", 250 | " n_bins=n_bins, encode=\"ordinal\", strategy=strategy, subsample=200_000\n", 251 | " )\n", 252 | " enc.fit(X)\n", 253 | " grid_encoded = enc.transform(grid)\n", 254 | "\n", 255 | " ax = plotter.subplot(len(datasets), len(strategies) + 1, i)\n", 256 | "\n", 257 | " # horizontal stripes\n", 258 | " horizontal = grid_encoded[:, 0].reshape(xx.shape)\n", 259 | " ax.contourf(xx, yy, horizontal, alpha=0.5)\n", 260 | " # vertical stripes\n", 261 | " vertical = grid_encoded[:, 1].reshape(xx.shape)\n", 262 | " ax.contourf(xx, yy, vertical, alpha=0.5)\n", 263 | "\n", 264 | " ax.scatter(X[:, 0], X[:, 1], edgecolors=\"k\")\n", 265 | " ax.set_xlim(xx.min(), xx.max())\n", 266 | " ax.set_ylim(yy.min(), yy.max())\n", 267 | " ax.set_xticks(())\n", 268 | " ax.set_yticks(())\n", 269 | " if ds_cnt == 0:\n", 270 | " ax.set_title(\"strategy='%s'\" % (strategy,), size=14)\n", 271 | "\n", 272 | " i += 1\n", 273 | "\n", 274 | "plotter.tight_layout()\n", 275 | "plotter.show()\n", 276 | "\n", 277 | "# Source: https://scikit-learn.org/stable/auto_examples/preprocessing/plot_discretization_strategies.html#sphx-glr-auto-examples-preprocessing-plot-discretization-strategies-py" 278 | ], 279 | "metadata": { 280 | "collapsed": false 281 | }, 282 | "id": "17417ac6ac03eff9", 283 | "execution_count": null 284 | } 285 | ], 286 | "metadata": { 287 | "kernelspec": { 288 | "display_name": "Python 3", 289 | "language": "python", 290 | "name": "python3" 291 | }, 292 | "language_info": { 293 | "codemirror_mode": { 294 | "name": "ipython", 295 | "version": 2 296 | }, 297 | "file_extension": ".py", 298 | "mimetype": "text/x-python", 299 | "name": "python", 300 | "nbconvert_exporter": "python", 301 | "pygments_lexer": "ipython2", 302 | "version": "2.7.6" 303 | } 304 | }, 305 | "nbformat": 4, 306 | "nbformat_minor": 5 307 | } 308 | -------------------------------------------------------------------------------- /Webinar/Webinar-II-VeriOnisleme/transformations.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pandas import DataFrame 4 | from sklearn.preprocessing import PowerTransformer, StandardScaler 5 | 6 | 7 | def standardize_dataset(df: DataFrame): 8 | scaler = StandardScaler().fit(df) 9 | df2 = DataFrame(data=scaler.transform(df), columns=df.columns) 10 | return df2 11 | 12 | 13 | def normalize_dataset(df: DataFrame, 14 | all_pos=False, 15 | min_val: float = 0.000001, 16 | exclude: List[str] = []): 17 | df2 = df.copy() 18 | for c in df.columns: 19 | if c not in exclude: 20 | if all_pos: 21 | df2[c] = min_val + (df[c] - df[c].min()) / (df[c].max() - df[c].min() - min_val) 22 | else: 23 | df2[c] = (df[c] - df[c].min()) / (df[c].max() - df[c].min()) 24 | return df2 25 | 26 | 27 | def box_cox(data, 28 | normalize=False, 29 | standardize=False): 30 | fitter = PowerTransformer(method="box-cox", 31 | standardize=standardize) 32 | dt = fitter.fit_transform(data) 33 | if normalize: 34 | dt = (dt - dt.min()) / (dt.max() - dt.min()) 35 | return dt 36 | 37 | 38 | def yeo_johnson(data, 39 | normalize=False, 40 | standardize=False): 41 | fitter = PowerTransformer(method="yeo-johnson", 42 | standardize=standardize) 43 | dt = fitter.fit_transform(data) 44 | if normalize: 45 | dt = (dt - dt.min()) / (dt.max() - dt.min()) 46 | return dt 47 | -------------------------------------------------------------------------------- /Webinar/Webinar-II-VeriOnisleme/transformations_basic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "outputs": [], 6 | "source": [ 7 | "import numpy\n", 8 | "from pandas import DataFrame\n", 9 | "from matplotlib import pyplot as plotter\n", 10 | "\n", 11 | "from transformations import normalize_dataset\n", 12 | "from data_generator import generate_skewed_data_right, generate_skewed_data_left, generate_extreme_skewed_data_right" 13 | ], 14 | "metadata": { 15 | "collapsed": false 16 | }, 17 | "id": "9ebf22c3433966bd", 18 | "execution_count": null 19 | }, 20 | { 21 | "cell_type": "code", 22 | "outputs": [], 23 | "source": [ 24 | "feature_a = 'feature_a'\n", 25 | "feature_b = 'feature_b'\n", 26 | "feature_c = 'feature_c'\n", 27 | "\n", 28 | "sqrt = \"SQRT\"\n", 29 | "log = \"LOG\"\n", 30 | "reverse = \"1/N\"" 31 | ], 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "id": "bf29a585765aea02", 36 | "execution_count": null 37 | }, 38 | { 39 | "cell_type": "code", 40 | "outputs": [], 41 | "source": [ 42 | "# Variables\n", 43 | "\n", 44 | "plot_hist_color = 'cyan'\n", 45 | "plot_hist_alpha = 0.63\n", 46 | "plot_bar_count = 1000\n", 47 | "\n", 48 | "size = 100000" 49 | ], 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "id": "427d88adaff9081c", 54 | "execution_count": null 55 | }, 56 | { 57 | "cell_type": "code", 58 | "outputs": [], 59 | "source": [ 60 | "df_data = DataFrame({feature_a: generate_skewed_data_left(size=size),\n", 61 | " feature_b: generate_skewed_data_right(size=size),\n", 62 | " feature_c: generate_extreme_skewed_data_right(size=size)})\n", 63 | "\n", 64 | "df_data = normalize_dataset(df=df_data, \n", 65 | " all_pos=True)" 66 | ], 67 | "metadata": { 68 | "collapsed": false 69 | }, 70 | "id": "98953e5840227807", 71 | "execution_count": null 72 | }, 73 | { 74 | "cell_type": "code", 75 | "outputs": [], 76 | "source": [ 77 | "df_data.head()" 78 | ], 79 | "metadata": { 80 | "collapsed": false 81 | }, 82 | "id": "fed5f9ffffaa76f8", 83 | "execution_count": null 84 | }, 85 | { 86 | "cell_type": "code", 87 | "outputs": [], 88 | "source": [ 89 | "# Plot Feature A\n", 90 | "\n", 91 | "plotter.hist(df_data['feature_a'], \n", 92 | " plot_bar_count, \n", 93 | " density=True, \n", 94 | " color=plot_hist_color, \n", 95 | " alpha=plot_hist_alpha);" 96 | ], 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "id": "fc881d581c387792", 101 | "execution_count": null 102 | }, 103 | { 104 | "cell_type": "code", 105 | "outputs": [], 106 | "source": [ 107 | "plotter.hist(df_data['feature_b'], \n", 108 | " plot_bar_count, \n", 109 | " density=True, \n", 110 | " color=plot_hist_color, \n", 111 | " alpha=plot_hist_alpha);" 112 | ], 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "id": "2410770500cfcf61", 117 | "execution_count": null 118 | }, 119 | { 120 | "cell_type": "code", 121 | "outputs": [], 122 | "source": [ 123 | "plotter.hist(df_data['feature_c'], \n", 124 | " plot_bar_count, \n", 125 | " density=True, \n", 126 | " color=plot_hist_color, \n", 127 | " alpha=plot_hist_alpha);" 128 | ], 129 | "metadata": { 130 | "collapsed": false 131 | }, 132 | "id": "6b12ca65a4f09a0e", 133 | "execution_count": null 134 | }, 135 | { 136 | "cell_type": "code", 137 | "outputs": [], 138 | "source": [ 139 | "# Apply Transformations on all Features\n", 140 | "# * Sqrt Transformation\n", 141 | "# * Logarithmic Transformation\n", 142 | "# * 1/N Transformation\n", 143 | "\n", 144 | "for column in df_data:\n", 145 | " df_data[column + \"_\" + sqrt] = numpy.sqrt(df_data[column])\n", 146 | " df_data[column + \"_\" + log] = numpy.log2(df_data[column])\n", 147 | " df_data[column + \"_\" + reverse] = numpy.power(df_data[column], -1.0) \n" 148 | ], 149 | "metadata": { 150 | "collapsed": false 151 | }, 152 | "id": "160fcc5a67431248", 153 | "execution_count": null 154 | }, 155 | { 156 | "cell_type": "code", 157 | "outputs": [], 158 | "source": [ 159 | "# Show Plots\n", 160 | "# * Sqrt on Feature A \n", 161 | "\n", 162 | "plotter.hist(df_data[feature_a + \"_\" + sqrt], \n", 163 | " plot_bar_count, \n", 164 | " density=True, \n", 165 | " color=plot_hist_color, \n", 166 | " alpha=plot_hist_alpha);" 167 | ], 168 | "metadata": { 169 | "collapsed": false 170 | }, 171 | "id": "bbe9aec2ad944b51", 172 | "execution_count": null 173 | }, 174 | { 175 | "cell_type": "code", 176 | "outputs": [], 177 | "source": [ 178 | "# Show Plots\n", 179 | "# * Log on Feature A \n", 180 | "\n", 181 | "plotter.hist(df_data[feature_a + \"_\" + log], \n", 182 | " plot_bar_count, \n", 183 | " range=(-1.0, 0.0), \n", 184 | " density=True, \n", 185 | " color=plot_hist_color, \n", 186 | " alpha=plot_hist_alpha);" 187 | ], 188 | "metadata": { 189 | "collapsed": false 190 | }, 191 | "id": "27de24f18a0f56a5", 192 | "execution_count": null 193 | }, 194 | { 195 | "cell_type": "code", 196 | "outputs": [], 197 | "source": [ 198 | "# Show Plots\n", 199 | "# * Reverse on Feature A \n", 200 | "\n", 201 | "plotter.hist(df_data[feature_a + \"_\" + reverse], \n", 202 | " plot_bar_count, \n", 203 | " range=(1.0, 3.0), \n", 204 | " density=True, \n", 205 | " color=plot_hist_color, \n", 206 | " alpha=plot_hist_alpha);" 207 | ], 208 | "metadata": { 209 | "collapsed": false 210 | }, 211 | "id": "c4c0fe2f0e48b79f", 212 | "execution_count": null 213 | }, 214 | { 215 | "cell_type": "code", 216 | "outputs": [], 217 | "source": [ 218 | "df_data.head(1000)" 219 | ], 220 | "metadata": { 221 | "collapsed": false 222 | }, 223 | "id": "10be9a26bd812eac", 224 | "execution_count": null 225 | }, 226 | { 227 | "cell_type": "code", 228 | "outputs": [], 229 | "source": [ 230 | "# Show Plots\n", 231 | "# * Sqrt on Feature B \n", 232 | "\n", 233 | "plotter.hist(df_data[feature_b + \"_\" + sqrt], \n", 234 | " plot_bar_count, \n", 235 | " density=True, \n", 236 | " color=plot_hist_color, \n", 237 | " alpha=plot_hist_alpha);" 238 | ], 239 | "metadata": { 240 | "collapsed": false 241 | }, 242 | "id": "347bf1dccb8b2526", 243 | "execution_count": null 244 | }, 245 | { 246 | "cell_type": "code", 247 | "outputs": [], 248 | "source": [ 249 | "# Show Plots\n", 250 | "# * Log on Feature B \n", 251 | "\n", 252 | "plotter.hist(df_data[feature_b + \"_\" + log], \n", 253 | " plot_bar_count, \n", 254 | " range= (-5.0, 0.0), \n", 255 | " density=True, \n", 256 | " color=plot_hist_color, \n", 257 | " alpha=plot_hist_alpha);" 258 | ], 259 | "metadata": { 260 | "collapsed": false 261 | }, 262 | "id": "f57b7d86bfc1e582", 263 | "execution_count": null 264 | }, 265 | { 266 | "cell_type": "code", 267 | "outputs": [], 268 | "source": [ 269 | "# Show Plots\n", 270 | "# * Reverse on Feature B \n", 271 | "\n", 272 | "plotter.hist(df_data[feature_b + \"_\" + reverse], \n", 273 | " plot_bar_count, \n", 274 | " range=(0.0, 20.0), \n", 275 | " density=True, \n", 276 | " color=plot_hist_color, \n", 277 | " alpha=plot_hist_alpha);" 278 | ], 279 | "metadata": { 280 | "collapsed": false 281 | }, 282 | "id": "64347cc4f9063cf4", 283 | "execution_count": null 284 | }, 285 | { 286 | "cell_type": "code", 287 | "outputs": [], 288 | "source": [ 289 | "# Show Plots\n", 290 | "# * Sqrt on Feature C \n", 291 | "\n", 292 | "plotter.hist(df_data[feature_c + \"_\" + sqrt], \n", 293 | " plot_bar_count, \n", 294 | " density=True, \n", 295 | " color=plot_hist_color, \n", 296 | " alpha=plot_hist_alpha);" 297 | ], 298 | "metadata": { 299 | "collapsed": false 300 | }, 301 | "id": "400db10aa4830224", 302 | "execution_count": null 303 | }, 304 | { 305 | "cell_type": "code", 306 | "outputs": [], 307 | "source": [ 308 | "# Show Plots\n", 309 | "# * Log on Feature C \n", 310 | "\n", 311 | "plotter.hist(df_data[feature_c + \"_\" + log], \n", 312 | " plot_bar_count, \n", 313 | " density=True, \n", 314 | " color=plot_hist_color, \n", 315 | " alpha=plot_hist_alpha);" 316 | ], 317 | "metadata": { 318 | "collapsed": false 319 | }, 320 | "id": "f239f4f69d389d9c", 321 | "execution_count": null 322 | }, 323 | { 324 | "cell_type": "code", 325 | "outputs": [], 326 | "source": [ 327 | "# Show Plots\n", 328 | "# * Reverse on Feature C \n", 329 | "\n", 330 | "plotter.hist(df_data[feature_c + \"_\" + reverse], \n", 331 | " plot_bar_count, \n", 332 | " range=(0.0, 1000.0), \n", 333 | " density=True, \n", 334 | " color=plot_hist_color, \n", 335 | " alpha=plot_hist_alpha);" 336 | ], 337 | "metadata": { 338 | "collapsed": false 339 | }, 340 | "id": "e457a440ed4faa81", 341 | "execution_count": null 342 | } 343 | ], 344 | "metadata": { 345 | "kernelspec": { 346 | "display_name": "Python 3", 347 | "language": "python", 348 | "name": "python3" 349 | }, 350 | "language_info": { 351 | "codemirror_mode": { 352 | "name": "ipython", 353 | "version": 2 354 | }, 355 | "file_extension": ".py", 356 | "mimetype": "text/x-python", 357 | "name": "python", 358 | "nbconvert_exporter": "python", 359 | "pygments_lexer": "ipython2", 360 | "version": "2.7.6" 361 | } 362 | }, 363 | "nbformat": 4, 364 | "nbformat_minor": 5 365 | } 366 | -------------------------------------------------------------------------------- /Webinar/Webinar-II-VeriOnisleme/transformations_bcyj.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "initial_id", 7 | "metadata": { 8 | "collapsed": true 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "import matplotlib.pyplot as plotter\n", 13 | "from transformations import normalize_dataset, standardize_dataset, box_cox, yeo_johnson\n", 14 | "from pandas import DataFrame, read_csv\n", 15 | "from scipy.stats import pearsonr" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "outputs": [], 21 | "source": [ 22 | "# Initialize Variables\n", 23 | "\n", 24 | "# Plotting variables\n", 25 | "size = 1000\n", 26 | "plot_hist_color = 'cyan'\n", 27 | "plot_hist_alpha = 0.63\n", 28 | "\n", 29 | "# Problem Specific Variables\n", 30 | "output_key = 'OUTPUT'\n", 31 | "milage_key = 'MIL'\n", 32 | "year_key = 'YEAR'\n", 33 | "fuel_consumption_key = 'FCON'\n", 34 | "condition_key = 'COND'\n", 35 | "\n", 36 | "# Data Path\n", 37 | "file_path = \"data/carprices.csv\"" 38 | ], 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "id": "dbdc7e93a8676816", 43 | "execution_count": null 44 | }, 45 | { 46 | "cell_type": "code", 47 | "outputs": [], 48 | "source": [ 49 | "chosen_key = fuel_consumption_key" 50 | ], 51 | "metadata": { 52 | "collapsed": false 53 | }, 54 | "id": "254e99d28bf8d0a6", 55 | "execution_count": null 56 | }, 57 | { 58 | "cell_type": "code", 59 | "outputs": [], 60 | "source": [ 61 | "# Load and Present Dataset\n", 62 | "\n", 63 | "# Load\n", 64 | "df_raw = read_csv(file_path)\n", 65 | "\n", 66 | "# Show\n", 67 | "df_raw.head()" 68 | ], 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "id": "a71f8a6f036e2277", 73 | "execution_count": null 74 | }, 75 | { 76 | "cell_type": "code", 77 | "outputs": [], 78 | "source": [ 79 | "# Stats, Raw Data\n", 80 | "\n", 81 | "print(\"Average: \" + str(df_raw[chosen_key].sum() / len(df_raw[chosen_key])))\n", 82 | "print(\"Min: \" + str(df_raw[chosen_key].min()))\n", 83 | "print(\"Max: \" + str(df_raw[chosen_key].max()))" 84 | ], 85 | "metadata": { 86 | "collapsed": false 87 | }, 88 | "id": "78797d1af0b6023d", 89 | "execution_count": null 90 | }, 91 | { 92 | "cell_type": "code", 93 | "outputs": [], 94 | "source": [ 95 | "# Plot Raw Data Distribution\n", 96 | "\n", 97 | "plotter.hist(df_raw[chosen_key], \n", 98 | " 25, \n", 99 | " density=True, \n", 100 | " color=plot_hist_color, \n", 101 | " alpha=plot_hist_alpha);" 102 | ], 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "id": "820e6ef8b20966a5", 107 | "execution_count": null 108 | }, 109 | { 110 | "cell_type": "code", 111 | "outputs": [], 112 | "source": [ 113 | "# Normalize Dataset\n", 114 | "\n", 115 | "df_norm = normalize_dataset(df_raw, \n", 116 | " all_pos=True)" 117 | ], 118 | "metadata": { 119 | "collapsed": false 120 | }, 121 | "id": "d9828351fb1e3985", 122 | "execution_count": null 123 | }, 124 | { 125 | "cell_type": "code", 126 | "outputs": [], 127 | "source": [ 128 | "# Show Normalized Data\n", 129 | "\n", 130 | "df_norm.head()" 131 | ], 132 | "metadata": { 133 | "collapsed": false 134 | }, 135 | "id": "8cf7790b73cd08b4", 136 | "execution_count": null 137 | }, 138 | { 139 | "cell_type": "code", 140 | "outputs": [], 141 | "source": [ 142 | "# Stats, Normalized Data\n", 143 | "\n", 144 | "print(\"Average: \" + str(df_norm[chosen_key].sum() / len(df_norm[chosen_key])))\n", 145 | "print(\"Min: \" + str(df_norm[chosen_key].min()))\n", 146 | "print(\"Max: \" + str(df_norm[chosen_key].max()))" 147 | ], 148 | "metadata": { 149 | "collapsed": false 150 | }, 151 | "id": "1edadf28403380d8", 152 | "execution_count": null 153 | }, 154 | { 155 | "cell_type": "code", 156 | "outputs": [], 157 | "source": [ 158 | "# Plot Normalized Data Distribution\n", 159 | "\n", 160 | "plotter.hist(df_norm[chosen_key], \n", 161 | " 25, \n", 162 | " density=True, \n", 163 | " color=plot_hist_color, \n", 164 | " alpha=plot_hist_alpha);" 165 | ], 166 | "metadata": { 167 | "collapsed": false 168 | }, 169 | "id": "f1f20bc0178588b9", 170 | "execution_count": null 171 | }, 172 | { 173 | "cell_type": "code", 174 | "outputs": [], 175 | "source": [ 176 | "# Standardize Dataset\n", 177 | "\n", 178 | "df_stan = standardize_dataset(df_raw)" 179 | ], 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "id": "25d895415c88fbe7", 184 | "execution_count": null 185 | }, 186 | { 187 | "cell_type": "code", 188 | "outputs": [], 189 | "source": [ 190 | "# Show Standardized Data\n", 191 | "\n", 192 | "df_stan.head()" 193 | ], 194 | "metadata": { 195 | "collapsed": false 196 | }, 197 | "id": "7c41d4a6b528ca93", 198 | "execution_count": null 199 | }, 200 | { 201 | "cell_type": "code", 202 | "outputs": [], 203 | "source": [ 204 | "# Stats, Standardized Data\n", 205 | "\n", 206 | "print(\"Average: \" + str(df_stan[chosen_key].sum() / len(df_stan[chosen_key])))\n", 207 | "print(\"Min: \" + str(df_stan[chosen_key].min()))\n", 208 | "print(\"Max: \" + str(df_stan[chosen_key].max()))" 209 | ], 210 | "metadata": { 211 | "collapsed": false 212 | }, 213 | "id": "8d4de31e77af8991", 214 | "execution_count": null 215 | }, 216 | { 217 | "cell_type": "code", 218 | "outputs": [], 219 | "source": [ 220 | "# Plot Standardized Data Distribution\n", 221 | "\n", 222 | "plotter.hist(df_stan[chosen_key], \n", 223 | " 25, \n", 224 | " density=True, \n", 225 | " color=plot_hist_color, \n", 226 | " alpha=plot_hist_alpha);" 227 | ], 228 | "metadata": { 229 | "collapsed": false 230 | }, 231 | "id": "dc819b34f4f3de6e", 232 | "execution_count": null 233 | }, 234 | { 235 | "cell_type": "code", 236 | "outputs": [], 237 | "source": [ 238 | "# Box Cox, Raw out\n", 239 | "\n", 240 | "df_bc_raw = DataFrame(box_cox(DataFrame(df_norm[chosen_key], \n", 241 | " columns=[chosen_key])), \n", 242 | " columns=[chosen_key])\n", 243 | "\n", 244 | "plotter.hist(df_bc_raw[chosen_key], \n", 245 | " 25, \n", 246 | " density=True, \n", 247 | " color=plot_hist_color, \n", 248 | " alpha=plot_hist_alpha);" 249 | ], 250 | "metadata": { 251 | "collapsed": false 252 | }, 253 | "id": "3bc49bc7f7fb26", 254 | "execution_count": null 255 | }, 256 | { 257 | "cell_type": "code", 258 | "outputs": [], 259 | "source": [ 260 | "# Box Cox, Normalize After\n", 261 | "\n", 262 | "df_bc_norm = DataFrame(box_cox(DataFrame(df_norm[chosen_key], columns=[chosen_key]), normalize=True), columns=[chosen_key])\n", 263 | "\n", 264 | "plotter.hist(df_bc_norm[chosen_key], \n", 265 | " 25, \n", 266 | " density=True, \n", 267 | " color=plot_hist_color, \n", 268 | " alpha=plot_hist_alpha);" 269 | ], 270 | "metadata": { 271 | "collapsed": false 272 | }, 273 | "id": "a479128673e43c0a", 274 | "execution_count": null 275 | }, 276 | { 277 | "cell_type": "code", 278 | "outputs": [], 279 | "source": [ 280 | "# Box Cox, Standardize After\n", 281 | "\n", 282 | "df_bc_stan = DataFrame(box_cox(DataFrame(df_norm[chosen_key], \n", 283 | " columns=[chosen_key]), \n", 284 | " standardize=True), \n", 285 | " columns=[chosen_key])\n", 286 | "\n", 287 | "plotter.hist(df_bc_stan[chosen_key], \n", 288 | " 25, \n", 289 | " density=True, \n", 290 | " color=plot_hist_color, \n", 291 | " alpha=plot_hist_alpha);" 292 | ], 293 | "metadata": { 294 | "collapsed": false 295 | }, 296 | "id": "70168e817a6c8197", 297 | "execution_count": null 298 | }, 299 | { 300 | "cell_type": "code", 301 | "outputs": [], 302 | "source": [ 303 | "# Box Cox Data Inspection\n", 304 | "\n", 305 | "df_temp = DataFrame({'raw': df_raw[chosen_key],\n", 306 | " 'norm': df_norm[chosen_key],\n", 307 | " 'bc_raw': df_bc_raw[chosen_key],\n", 308 | " 'bc_norm': df_bc_norm[chosen_key],\n", 309 | " 'bc_stan': df_bc_stan[chosen_key]})\n", 310 | "\n", 311 | "df_temp.head()" 312 | ], 313 | "metadata": { 314 | "collapsed": false 315 | }, 316 | "id": "bdfd92a387b5aa1a", 317 | "execution_count": null 318 | }, 319 | { 320 | "cell_type": "code", 321 | "outputs": [], 322 | "source": [ 323 | "# Yeo Johnson, Raw\n", 324 | "\n", 325 | "df_yj_raw = DataFrame(yeo_johnson(DataFrame(df_norm[chosen_key], \n", 326 | " columns=[chosen_key])), \n", 327 | " columns=[chosen_key])\n", 328 | "\n", 329 | "plotter.hist(df_yj_raw[chosen_key], \n", 330 | " 25, \n", 331 | " density=True, \n", 332 | " color=plot_hist_color, \n", 333 | " alpha=plot_hist_alpha);" 334 | ], 335 | "metadata": { 336 | "collapsed": false 337 | }, 338 | "id": "a5b6ed83cc671769", 339 | "execution_count": null 340 | }, 341 | { 342 | "cell_type": "code", 343 | "outputs": [], 344 | "source": [ 345 | "df_yj_norm = DataFrame(yeo_johnson(DataFrame(df_norm[chosen_key], columns=[chosen_key]), normalize=True), columns=[chosen_key])\n", 346 | "\n", 347 | "plotter.hist(df_yj_norm[chosen_key], \n", 348 | " 25, \n", 349 | " density=True, \n", 350 | " color=plot_hist_color, \n", 351 | " alpha=plot_hist_alpha);" 352 | ], 353 | "metadata": { 354 | "collapsed": false 355 | }, 356 | "id": "156ea178d52d8059", 357 | "execution_count": null 358 | }, 359 | { 360 | "cell_type": "code", 361 | "outputs": [], 362 | "source": [ 363 | "df_yj_stan = DataFrame(yeo_johnson(DataFrame(df_norm[chosen_key], columns=[chosen_key]), standardize=True), columns=[chosen_key])\n", 364 | "\n", 365 | "plotter.hist(df_yj_stan[chosen_key], \n", 366 | " 25, \n", 367 | " density=True, \n", 368 | " color=plot_hist_color, \n", 369 | " alpha=plot_hist_alpha);" 370 | ], 371 | "metadata": { 372 | "collapsed": false 373 | }, 374 | "id": "9f2b586c127b8375", 375 | "execution_count": null 376 | }, 377 | { 378 | "cell_type": "code", 379 | "outputs": [], 380 | "source": [ 381 | "# Yeo Johnson Inspection\n", 382 | "\n", 383 | "df_temp = DataFrame({'raw': df_raw[chosen_key],\n", 384 | " 'norm': df_norm[chosen_key],\n", 385 | " 'bc_raw': df_bc_raw[chosen_key],\n", 386 | " 'bc_norm': df_bc_norm[chosen_key],\n", 387 | " 'bc_stan': df_bc_stan[chosen_key],\n", 388 | " 'yj_raw': df_yj_raw[chosen_key],\n", 389 | " 'yj_norm': df_yj_norm[chosen_key],\n", 390 | " 'yj_stan': df_yj_stan[chosen_key]})\n", 391 | "\n", 392 | "df_temp.head()" 393 | ], 394 | "metadata": { 395 | "collapsed": false 396 | }, 397 | "id": "722a766bf5de18ee", 398 | "execution_count": null 399 | }, 400 | { 401 | "cell_type": "code", 402 | "outputs": [], 403 | "source": [ 404 | "print(\"Correlation (RAW): \" + str(pearsonr(df_raw[chosen_key], df_raw[output_key])[0]))\n", 405 | "print(\"Correlation (Norm): \" + str(pearsonr(df_norm[chosen_key], df_norm[output_key])[0]))\n", 406 | "print(\"Correlation (Stan): \" + str(pearsonr(df_stan[chosen_key], df_stan[output_key])[0]))\n", 407 | "print(\"Box Cox Transformation (RAW)\" + str(str(pearsonr(df_bc_raw[chosen_key], df_norm[output_key])[0])))\n", 408 | "print(\"Box Cox Transformation (Norm)\" + str(str(pearsonr(df_bc_norm[chosen_key], df_norm[output_key])[0])))\n", 409 | "print(\"Box Cox Transformation (Stan)\" + str(str(pearsonr(df_bc_stan[chosen_key], df_norm[output_key])[0])))\n", 410 | "print(\"Yeo Johnson Transformation (RAW)\" + str(str(pearsonr(df_yj_raw[chosen_key], df_norm[output_key])[0])))\n", 411 | "print(\"Yeo Johnson Transformation (Norm)\" + str(str(pearsonr(df_yj_norm[chosen_key], df_norm[output_key])[0])))\n", 412 | "print(\"Yeo Johnson Transformation (Stan)\" + str(str(pearsonr(df_yj_stan[chosen_key], df_norm[output_key])[0])))" 413 | ], 414 | "metadata": { 415 | "collapsed": false 416 | }, 417 | "id": "f0657e00402ae422", 418 | "execution_count": null 419 | } 420 | ], 421 | "metadata": { 422 | "kernelspec": { 423 | "display_name": "Python 3", 424 | "language": "python", 425 | "name": "python3" 426 | }, 427 | "language_info": { 428 | "codemirror_mode": { 429 | "name": "ipython", 430 | "version": 2 431 | }, 432 | "file_extension": ".py", 433 | "mimetype": "text/x-python", 434 | "name": "python", 435 | "nbconvert_exporter": "python", 436 | "pygments_lexer": "ipython2", 437 | "version": "2.7.6" 438 | } 439 | }, 440 | "nbformat": 4, 441 | "nbformat_minor": 5 442 | } 443 | -------------------------------------------------------------------------------- /Webinar/Webinar-VII-YOLO ile Mamografi Görüntülerinden Kitle Tespit Uygulaması/data.yaml: -------------------------------------------------------------------------------- 1 | train: /workspace/notebooks/webinar/data/Fold0/train/images 2 | val: /workspace/notebooks/webinar/data/Fold0/validation/images 3 | nc: 1 4 | names: ["MASS"] -------------------------------------------------------------------------------- /Webinar/Webinar-VII-YOLO ile Mamografi Görüntülerinden Kitle Tespit Uygulaması/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os 3 | import glob 4 | import cv2 5 | import utils 6 | import pydicom as dicom 7 | import plistlib 8 | import pandas as pd 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | 12 | from PIL import Image 13 | from tqdm import tqdm 14 | from tqdm.notebook import tqdm_notebook 15 | from sklearn.model_selection import KFold 16 | from sklearn.model_selection import train_test_split 17 | 18 | 19 | def create_data_dir(folderPath): 20 | if not os.path.exists(folderPath): 21 | os.mkdir(folderPath) 22 | 23 | def draw_annotation(image, line): 24 | centerPointX = float(line[1]) * image.shape[1] 25 | centerPointY = float(line[2]) * image.shape[0] 26 | 27 | width = float(line[3]) * image.shape[1] 28 | height = float(line[4]) * image.shape[0] 29 | 30 | startPoint = (int(centerPointX-(width/2)), int(centerPointY-(height/2))) 31 | endPoint = (int(centerPointX+(width/2)), int(centerPointY+(height/2))) 32 | 33 | color = (255,0,0) 34 | thickness = 5 35 | 36 | image = cv2.rectangle(image, startPoint, endPoint, color, thickness) 37 | 38 | 39 | def saveImages(dicomFilePath, saveFolderPath): 40 | """ 41 | Saving dicom images as png 42 | 43 | Parameters 44 | ---------- 45 | dicomFilePath : str 46 | Dicom File Path 47 | saveFolderPath : str 48 | Image Save Folder Path 49 | 50 | Returns 51 | ------- 52 | 53 | imageFilePath: str 54 | Saved Image File Path 55 | 56 | """ 57 | 58 | dicomData = dicom.read_file(dicomFilePath) 59 | imgArray = dicomData.pixel_array 60 | imgArray = 255*((imgArray - imgArray.min()) / (imgArray.max() - imgArray.min())) 61 | imgArray = imgArray.astype(np.uint8) 62 | imgArray = Image.fromarray(imgArray).convert("L") 63 | 64 | dicomFileName = dicomFilePath.split(os.path.sep)[-1] 65 | patientID = '_'.join(dicomFileName.split("_")[:2]) 66 | view = ''.join(dicomFileName.split("_")[3:5]) 67 | imageFileName = '_'.join([patientID, view]) + '.png' 68 | imageFilePath = os.path.join(saveFolderPath, imageFileName) 69 | imgArray.save(imageFilePath) 70 | 71 | return imageFilePath 72 | 73 | 74 | def loadPoint(point_string): 75 | x, y = tuple([float(num) for num in point_string.strip('()').split(',')]) 76 | return y, x 77 | 78 | 79 | def saveMaskYOLOFormat(xmlFilePath, imageFilePath, saveMaskPath, biradsScore, imshape): 80 | 81 | """ 82 | Saving mask informations to text file 83 | 84 | Parameters 85 | ---------- 86 | xmlFilePath : str 87 | XML File Path 88 | imageFilePath : str 89 | Image File Path 90 | saveMaskPath : str 91 | Save Mask File Path 92 | biradsScore : int 93 | Malignancy-Benignance or Mass 94 | imshape : tupple 95 | Image Shape 96 | Returns 97 | ------- 98 | 99 | """ 100 | 101 | with open(xmlFilePath, 'rb') as maskFile: 102 | plistDict = plistlib.load(maskFile, fmt=plistlib.FMT_XML)['Images'][0] 103 | 104 | pngFileName = imageFilePath.split(os.path.sep)[-1] 105 | 106 | rois = plistDict['ROIs'] 107 | maskPoints = [] 108 | for roi in rois: 109 | numPoints = roi['NumberOfPoints'] 110 | if numPoints == 1: 111 | continue 112 | 113 | abnormality = roi["Name"] 114 | 115 | if abnormality =="Mass": 116 | 117 | points = roi['Point_px'] 118 | points = [loadPoint(point) for point in points] 119 | points = np.array(points) 120 | 121 | xmin = int(min(points[:,1])) 122 | ymin = int(min(points[:,0])) 123 | xmax = int(max(points[:,1])) 124 | ymax = int(max(points[:,0])) 125 | 126 | normalized_center_x = (xmin + (xmax-xmin)/2) / imshape[0] 127 | normalized_center_y = (ymin + (ymax-ymin)/2) / imshape[1] 128 | normalized_width = (xmax-xmin) / imshape[0] 129 | normalized_height = (ymax-ymin) / imshape[1] 130 | 131 | maskPoints.append(' '.join([str(biradsScore), str(normalized_center_x), str(normalized_center_y), str(normalized_width), str(normalized_height)])) 132 | 133 | with open(saveMaskPath, 'w') as textFile: 134 | textFile.write('\n'.join(maskPoints)) 135 | textFile.write('\n') -------------------------------------------------------------------------------- /docs/dijitalgencyze-icon.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbddobvyz/dijital-genc-yze/fa26926e4c8959359e89f4c2d6dee7936c7f90a6/docs/dijitalgencyze-icon.jpg --------------------------------------------------------------------------------