├── .gitattributes
├── README.md
├── Seminar 1
    └── itmo_ods_nlp_course_01_preprocessing.ipynb
├── Seminar 2
    └── itmo_ods_nlp_course_02_vectorization.ipynb
├── Seminar 3
    └── itmo_ods_nlp_course_03_word2vec.ipynb
├── Seminar 4
    ├── itmo_ods_nlp_course_cnn.ipynb
    └── itmo_ods_nlp_course_pytorch_basics.ipynb
├── Seminar 5
    ├── itmo_ods_nlp_course_ner.ipynb
    └── itmo_ods_nlp_course_nn_lm.ipynb
├── Seminar 6
    ├── itmo_ods_nlp_course_seq2seq.ipynb
    └── vocab.py
├── Seminar 7
    └── itmo_ods_nlp_course_PLMs.ipynb
├── Seminar 8
    ├── esenin.txt
    ├── itmo_ods_nlp_coures_yet_another_peft.ipynb
    ├── itmo_ods_nlp_course_LLMs.ipynb
    └── itmo_ods_nlp_course_poems_generation.ipynb
└── Seminar 9
    └── itmo_dl_nlp_course_rag.ipynb


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.zip filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### Курс "Технологии обработки естественного языка" в ИТМО (весна 2025) - семинары
 2 | 
 3 | - [Семинар 1 - Инструменты предобработки текстовых данных (14.02.2025)](Seminar%201)
 4 | - [Семинар 2 - Базовые методы векторизации текстов (18.02.2025)](Seminar%202)
 5 | - [Семинар 3 - Word Embeddings (25.02.2025)](Seminar%203)
 6 | - [Семинар 4 - PyTorch basics, CNN (04.03.2025)](Seminar%204)
 7 | - [Семинар 5 - RNN, NER (11.03.2024)](Seminar%205)
 8 | - [Семинар 6 - Seq2Seq (18.03.2024)](Seminar%206)
 9 | - [Семинар 7 - PLMs (25.03.2024)](Seminar%207)
10 | - [Семинар 8 - LLMs (01.04.2024)](Seminar%208)
11 | - [Семинар 9 - RAG (15.04.2024)](Seminar%209)


--------------------------------------------------------------------------------
/Seminar 4/itmo_ods_nlp_course_pytorch_basics.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "cells": [
   3 |     {
   4 |       "cell_type": "markdown",
   5 |       "metadata": {
   6 |         "id": "YaFLTcQAHbSV"
   7 |       },
   8 |       "source": [
   9 |         "Pytorch - один из самых популярных фреймворков глубокого обучения для ML-специалистов. Фактически сегодня это еще и целая [экосистема](https://pytorch.org/ecosystem/) инструментов\n",
  10 |         "\n",
  11 |         "В библиотеке есть четыре ключевых составляющих:\n",
  12 |         "\n",
  13 |         "- Развитый инструментарий для работы с тензорами. Он похож на numpy, но даёт дополнительные возможности по контролю выделяемой памяти, что важно при работе с большими моделями и данными.\n",
  14 |         "- Простое построение динамического вычислительного графа, позволяющего получать градиенты целевых функций от параметров модели.\n",
  15 |         "- Большой набор готовых слоёв для построения нейронных сетей произвольной архитектуры.\n",
  16 |         "- Возможность перенаправлять вычисления на графические процессоры GPU.\n",
  17 |         "\n"
  18 |       ]
  19 |     },
  20 |     {
  21 |       "cell_type": "code",
  22 |       "execution_count": null,
  23 |       "metadata": {
  24 |         "id": "3EpI8IEdHbSX"
  25 |       },
  26 |       "outputs": [],
  27 |       "source": [
  28 |         "import torch"
  29 |       ]
  30 |     },
  31 |     {
  32 |       "cell_type": "markdown",
  33 |       "metadata": {
  34 |         "id": "rRx-3BmvHbSW"
  35 |       },
  36 |       "source": [
  37 |         "### Тензоры\n"
  38 |       ]
  39 |     },
  40 |     {
  41 |       "cell_type": "markdown",
  42 |       "metadata": {
  43 |         "id": "8AE-Vw8iHbSZ"
  44 |       },
  45 |       "source": [
  46 |         "Создание тензора"
  47 |       ]
  48 |     },
  49 |     {
  50 |       "cell_type": "code",
  51 |       "execution_count": null,
  52 |       "metadata": {
  53 |         "colab": {
  54 |           "base_uri": "https://localhost:8080/"
  55 |         },
  56 |         "id": "Ed-Bg4JOHbSa",
  57 |         "outputId": "14d6d6a3-6072-4918-8bb7-665d88b4094c"
  58 |       },
  59 |       "outputs": [
  60 |         {
  61 |           "output_type": "stream",
  62 |           "name": "stdout",
  63 |           "text": [
  64 |             "tensor([[-6.7967e-07,  4.5666e-41, -7.3119e-07],\n",
  65 |             "        [ 4.5666e-41, -5.3389e-07,  4.5666e-41],\n",
  66 |             "        [-7.0882e-07,  4.5666e-41, -7.0877e-07],\n",
  67 |             "        [ 4.5666e-41, -7.2292e-07,  4.5666e-41],\n",
  68 |             "        [-7.2081e-07,  4.5666e-41, -6.8812e-07]])\n"
  69 |           ]
  70 |         }
  71 |       ],
  72 |       "source": [
  73 |         "x = torch.empty(5, 3)\n",
  74 |         "print(x)"
  75 |       ]
  76 |     },
  77 |     {
  78 |       "cell_type": "markdown",
  79 |       "metadata": {
  80 |         "id": "efjPGo2hHbSd"
  81 |       },
  82 |       "source": [
  83 |         "Случайная инициализация в диапазоне [0; 1]"
  84 |       ]
  85 |     },
  86 |     {
  87 |       "cell_type": "code",
  88 |       "execution_count": null,
  89 |       "metadata": {
  90 |         "colab": {
  91 |           "base_uri": "https://localhost:8080/"
  92 |         },
  93 |         "id": "1mfaLeKTHbSe",
  94 |         "outputId": "dff4150c-ded1-49a3-8e5f-d2d4ffed2c9d"
  95 |       },
  96 |       "outputs": [
  97 |         {
  98 |           "output_type": "stream",
  99 |           "name": "stdout",
 100 |           "text": [
 101 |             "tensor([[0.8341, 0.0180, 0.0352],\n",
 102 |             "        [0.6997, 0.8201, 0.6471],\n",
 103 |             "        [0.9095, 0.4144, 0.7317],\n",
 104 |             "        [0.8288, 0.3143, 0.1800],\n",
 105 |             "        [0.3573, 0.5910, 0.7238]])\n"
 106 |           ]
 107 |         }
 108 |       ],
 109 |       "source": [
 110 |         "x = torch.rand(5, 3)\n",
 111 |         "print(x)"
 112 |       ]
 113 |     },
 114 |     {
 115 |       "cell_type": "markdown",
 116 |       "metadata": {
 117 |         "id": "sCS32yP5HbSf"
 118 |       },
 119 |       "source": [
 120 |         "Инициализация нулями"
 121 |       ]
 122 |     },
 123 |     {
 124 |       "cell_type": "code",
 125 |       "execution_count": null,
 126 |       "metadata": {
 127 |         "colab": {
 128 |           "base_uri": "https://localhost:8080/"
 129 |         },
 130 |         "id": "jV0vu4fWHbSf",
 131 |         "outputId": "fac04788-1e6d-48dc-a5cd-6217a5a7ca3b"
 132 |       },
 133 |       "outputs": [
 134 |         {
 135 |           "output_type": "stream",
 136 |           "name": "stdout",
 137 |           "text": [
 138 |             "tensor([[0, 0, 0],\n",
 139 |             "        [0, 0, 0],\n",
 140 |             "        [0, 0, 0],\n",
 141 |             "        [0, 0, 0],\n",
 142 |             "        [0, 0, 0]])\n"
 143 |           ]
 144 |         }
 145 |       ],
 146 |       "source": [
 147 |         "x = torch.zeros(5, 3, dtype=torch.long)\n",
 148 |         "print(x)"
 149 |       ]
 150 |     },
 151 |     {
 152 |       "cell_type": "markdown",
 153 |       "metadata": {
 154 |         "id": "cR5_fxQjHbSg"
 155 |       },
 156 |       "source": [
 157 |         "Создание непосредственно из данных"
 158 |       ]
 159 |     },
 160 |     {
 161 |       "cell_type": "code",
 162 |       "execution_count": null,
 163 |       "metadata": {
 164 |         "colab": {
 165 |           "base_uri": "https://localhost:8080/"
 166 |         },
 167 |         "id": "_XaCGIsrHbSg",
 168 |         "outputId": "7e4768e9-824b-47c0-9256-d42435c3fc1d"
 169 |       },
 170 |       "outputs": [
 171 |         {
 172 |           "output_type": "stream",
 173 |           "name": "stdout",
 174 |           "text": [
 175 |             "tensor([5.5000, 3.0000])\n"
 176 |           ]
 177 |         }
 178 |       ],
 179 |       "source": [
 180 |         "x = torch.tensor([5.5, 3])\n",
 181 |         "print(x)"
 182 |       ]
 183 |     },
 184 |     {
 185 |       "cell_type": "markdown",
 186 |       "metadata": {
 187 |         "id": "enXt3O2EHbSh"
 188 |       },
 189 |       "source": [
 190 |         "Из другого тензора"
 191 |       ]
 192 |     },
 193 |     {
 194 |       "cell_type": "code",
 195 |       "execution_count": null,
 196 |       "metadata": {
 197 |         "colab": {
 198 |           "base_uri": "https://localhost:8080/"
 199 |         },
 200 |         "id": "tfYa2XNtHbSh",
 201 |         "outputId": "4d8cd839-fd7e-4950-ba85-4dfef7f1b045"
 202 |       },
 203 |       "outputs": [
 204 |         {
 205 |           "output_type": "stream",
 206 |           "name": "stdout",
 207 |           "text": [
 208 |             "tensor([[1., 1., 1.],\n",
 209 |             "        [1., 1., 1.],\n",
 210 |             "        [1., 1., 1.],\n",
 211 |             "        [1., 1., 1.],\n",
 212 |             "        [1., 1., 1.]], dtype=torch.float64)\n",
 213 |             "tensor([[-0.4060, -0.0181, -0.6774],\n",
 214 |             "        [-0.4574, -0.8045,  1.3948],\n",
 215 |             "        [ 2.3655, -0.2676,  0.3849],\n",
 216 |             "        [ 1.0001, -1.4454,  0.8617],\n",
 217 |             "        [ 0.2797,  0.2810, -1.5000]])\n"
 218 |           ]
 219 |         }
 220 |       ],
 221 |       "source": [
 222 |         "x = x.new_ones(5, 3, dtype=torch.double)      # new_* methods take in sizes\n",
 223 |         "print(x)\n",
 224 |         "\n",
 225 |         "x = torch.randn_like(x, dtype=torch.float)    # the same size as input that is filled with random numbers from a normal distribution, override dtype!\n",
 226 |         "print(x)"
 227 |       ]
 228 |     },
 229 |     {
 230 |       "cell_type": "markdown",
 231 |       "source": [
 232 |         "При преобразовании типа (если он меняется) под данные выделяется новая память"
 233 |       ],
 234 |       "metadata": {
 235 |         "id": "L-oM7IO7U8X4"
 236 |       }
 237 |     },
 238 |     {
 239 |       "cell_type": "code",
 240 |       "source": [
 241 |         "x = torch.Tensor(5, 3)\n",
 242 |         "y = x.long()\n",
 243 |         "y = x.float()"
 244 |       ],
 245 |       "metadata": {
 246 |         "id": "gBkUz2tLU8Ea"
 247 |       },
 248 |       "execution_count": null,
 249 |       "outputs": []
 250 |     },
 251 |     {
 252 |       "cell_type": "markdown",
 253 |       "metadata": {
 254 |         "id": "SNENKpeeHbSi"
 255 |       },
 256 |       "source": [
 257 |         "Размер тензора"
 258 |       ]
 259 |     },
 260 |     {
 261 |       "cell_type": "code",
 262 |       "execution_count": null,
 263 |       "metadata": {
 264 |         "colab": {
 265 |           "base_uri": "https://localhost:8080/"
 266 |         },
 267 |         "id": "xbKtu1IjHbSi",
 268 |         "outputId": "93b7b6d0-bf1e-4241-fb92-095ce83adc60"
 269 |       },
 270 |       "outputs": [
 271 |         {
 272 |           "output_type": "execute_result",
 273 |           "data": {
 274 |             "text/plain": [
 275 |               "torch.Size([5, 3])"
 276 |             ]
 277 |           },
 278 |           "metadata": {},
 279 |           "execution_count": 8
 280 |         }
 281 |       ],
 282 |       "source": [
 283 |         "x.size()"
 284 |       ]
 285 |     },
 286 |     {
 287 |       "cell_type": "code",
 288 |       "source": [
 289 |         "x.shape"
 290 |       ],
 291 |       "metadata": {
 292 |         "colab": {
 293 |           "base_uri": "https://localhost:8080/"
 294 |         },
 295 |         "id": "_amAGT3oMrwy",
 296 |         "outputId": "7544958a-fb2e-4c27-9b7f-a154ccfe2951"
 297 |       },
 298 |       "execution_count": null,
 299 |       "outputs": [
 300 |         {
 301 |           "output_type": "execute_result",
 302 |           "data": {
 303 |             "text/plain": [
 304 |               "torch.Size([5, 3])"
 305 |             ]
 306 |           },
 307 |           "metadata": {},
 308 |           "execution_count": 9
 309 |         }
 310 |       ]
 311 |     },
 312 |     {
 313 |       "cell_type": "markdown",
 314 |       "metadata": {
 315 |         "id": "MjZS-Ga0HbSi"
 316 |       },
 317 |       "source": [
 318 |         "NB! torch.Size - абстракция от tuple, поэтому поддерживаются те же операции, как и с кортежами"
 319 |       ]
 320 |     },
 321 |     {
 322 |       "cell_type": "markdown",
 323 |       "metadata": {
 324 |         "id": "G1WW3Ev7HbSj"
 325 |       },
 326 |       "source": [
 327 |         "### Операции с тензорами\n"
 328 |       ]
 329 |     },
 330 |     {
 331 |       "cell_type": "code",
 332 |       "execution_count": null,
 333 |       "metadata": {
 334 |         "colab": {
 335 |           "base_uri": "https://localhost:8080/"
 336 |         },
 337 |         "id": "6zpTGOKSHbSj",
 338 |         "outputId": "5f5d97b5-bb09-4377-cb60-9112886632df"
 339 |       },
 340 |       "outputs": [
 341 |         {
 342 |           "output_type": "stream",
 343 |           "name": "stdout",
 344 |           "text": [
 345 |             "tensor([[0.4233, 0.1546, 0.8740],\n",
 346 |             "        [0.1989, 0.2291, 0.6066],\n",
 347 |             "        [0.6610, 0.2598, 0.6111],\n",
 348 |             "        [0.5708, 0.1070, 0.4649],\n",
 349 |             "        [0.9717, 0.0444, 0.9573]])\n"
 350 |           ]
 351 |         }
 352 |       ],
 353 |       "source": [
 354 |         "y = torch.rand(5, 3)\n",
 355 |         "print(x + y)"
 356 |       ]
 357 |     },
 358 |     {
 359 |       "cell_type": "code",
 360 |       "execution_count": null,
 361 |       "metadata": {
 362 |         "colab": {
 363 |           "base_uri": "https://localhost:8080/"
 364 |         },
 365 |         "id": "KDC_fsaAHbSk",
 366 |         "outputId": "ca3d7087-c527-4255-ee12-f321037c8b2a"
 367 |       },
 368 |       "outputs": [
 369 |         {
 370 |           "output_type": "stream",
 371 |           "name": "stdout",
 372 |           "text": [
 373 |             "tensor([[0.4233, 0.1546, 0.8740],\n",
 374 |             "        [0.1989, 0.2291, 0.6066],\n",
 375 |             "        [0.6610, 0.2598, 0.6111],\n",
 376 |             "        [0.5708, 0.1070, 0.4649],\n",
 377 |             "        [0.9717, 0.0444, 0.9573]])\n"
 378 |           ]
 379 |         }
 380 |       ],
 381 |       "source": [
 382 |         "print(torch.add(x, y))"
 383 |       ]
 384 |     },
 385 |     {
 386 |       "cell_type": "markdown",
 387 |       "metadata": {
 388 |         "id": "RcWj30xGHbSl"
 389 |       },
 390 |       "source": [
 391 |         "Выходная переменная как параметр"
 392 |       ]
 393 |     },
 394 |     {
 395 |       "cell_type": "code",
 396 |       "execution_count": null,
 397 |       "metadata": {
 398 |         "colab": {
 399 |           "base_uri": "https://localhost:8080/"
 400 |         },
 401 |         "id": "zcAlCBb-HbSl",
 402 |         "outputId": "25c15f02-f9f7-473c-edf5-1ccdf06c53d6"
 403 |       },
 404 |       "outputs": [
 405 |         {
 406 |           "output_type": "stream",
 407 |           "name": "stdout",
 408 |           "text": [
 409 |             "tensor([[0.4233, 0.1546, 0.8740],\n",
 410 |             "        [0.1989, 0.2291, 0.6066],\n",
 411 |             "        [0.6610, 0.2598, 0.6111],\n",
 412 |             "        [0.5708, 0.1070, 0.4649],\n",
 413 |             "        [0.9717, 0.0444, 0.9573]])\n"
 414 |           ]
 415 |         }
 416 |       ],
 417 |       "source": [
 418 |         "result = torch.empty(5, 3)\n",
 419 |         "torch.add(x, y, out=result)\n",
 420 |         "print(result)"
 421 |       ]
 422 |     },
 423 |     {
 424 |       "cell_type": "markdown",
 425 |       "metadata": {
 426 |         "id": "MtQNzjG-HbSm"
 427 |       },
 428 |       "source": [
 429 |         "in-place операции - operation_ syntax, новая память не выделяется"
 430 |       ]
 431 |     },
 432 |     {
 433 |       "cell_type": "code",
 434 |       "execution_count": null,
 435 |       "metadata": {
 436 |         "colab": {
 437 |           "base_uri": "https://localhost:8080/"
 438 |         },
 439 |         "id": "HXRpa24sHbSm",
 440 |         "outputId": "130e664b-a1cf-43cb-a8bd-3f8c7eac21e0"
 441 |       },
 442 |       "outputs": [
 443 |         {
 444 |           "output_type": "execute_result",
 445 |           "data": {
 446 |             "text/plain": [
 447 |               "tensor([[0.4233, 0.1546, 0.8740],\n",
 448 |               "        [0.1989, 0.2291, 0.6066],\n",
 449 |               "        [0.6610, 0.2598, 0.6111],\n",
 450 |               "        [0.5708, 0.1070, 0.4649],\n",
 451 |               "        [0.9717, 0.0444, 0.9573]])"
 452 |             ]
 453 |           },
 454 |           "metadata": {},
 455 |           "execution_count": 13
 456 |         }
 457 |       ],
 458 |       "source": [
 459 |         "x.add(y)"
 460 |       ]
 461 |     },
 462 |     {
 463 |       "cell_type": "code",
 464 |       "execution_count": null,
 465 |       "metadata": {
 466 |         "colab": {
 467 |           "base_uri": "https://localhost:8080/"
 468 |         },
 469 |         "id": "OrZ1ScyaHbSn",
 470 |         "outputId": "e34869d6-c4b4-4767-f3e4-8e4adc3a3e7c"
 471 |       },
 472 |       "outputs": [
 473 |         {
 474 |           "output_type": "execute_result",
 475 |           "data": {
 476 |             "text/plain": [
 477 |               "tensor([[0.4233, 0.1546, 0.8740],\n",
 478 |               "        [0.1989, 0.2291, 0.6066],\n",
 479 |               "        [0.6610, 0.2598, 0.6111],\n",
 480 |               "        [0.5708, 0.1070, 0.4649],\n",
 481 |               "        [0.9717, 0.0444, 0.9573]])"
 482 |             ]
 483 |           },
 484 |           "metadata": {},
 485 |           "execution_count": 14
 486 |         }
 487 |       ],
 488 |       "source": [
 489 |         "x.add_(y)"
 490 |       ]
 491 |     },
 492 |     {
 493 |       "cell_type": "markdown",
 494 |       "metadata": {
 495 |         "id": "W7Lj2P0UHbSo"
 496 |       },
 497 |       "source": [
 498 |         "Синтаксический сахар NumPy индексации"
 499 |       ]
 500 |     },
 501 |     {
 502 |       "cell_type": "code",
 503 |       "execution_count": null,
 504 |       "metadata": {
 505 |         "colab": {
 506 |           "base_uri": "https://localhost:8080/"
 507 |         },
 508 |         "id": "KE5Fah0iHbSo",
 509 |         "outputId": "a4a50d13-300b-4d90-e07b-536e9a2c552d"
 510 |       },
 511 |       "outputs": [
 512 |         {
 513 |           "output_type": "stream",
 514 |           "name": "stdout",
 515 |           "text": [
 516 |             "tensor([0.1546, 0.2291, 0.2598, 0.1070, 0.0444])\n"
 517 |           ]
 518 |         }
 519 |       ],
 520 |       "source": [
 521 |         "print(x[:, 1])"
 522 |       ]
 523 |     },
 524 |     {
 525 |       "cell_type": "markdown",
 526 |       "metadata": {
 527 |         "id": "lvS7Y3YgHbSo"
 528 |       },
 529 |       "source": [
 530 |         "Форма тензора (число индексов и их размерности) меняется функциями `view` и `reshape`"
 531 |       ]
 532 |     },
 533 |     {
 534 |       "cell_type": "code",
 535 |       "execution_count": null,
 536 |       "metadata": {
 537 |         "colab": {
 538 |           "base_uri": "https://localhost:8080/"
 539 |         },
 540 |         "id": "mZF0GUhtHbSo",
 541 |         "outputId": "84961ca9-d735-4895-f17c-a4300563468e"
 542 |       },
 543 |       "outputs": [
 544 |         {
 545 |           "output_type": "stream",
 546 |           "name": "stdout",
 547 |           "text": [
 548 |             "torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])\n"
 549 |           ]
 550 |         }
 551 |       ],
 552 |       "source": [
 553 |         "x = torch.randn(4, 4)\n",
 554 |         "y = x.view(16)\n",
 555 |         "z = x.view(-1, 8) # Одна из размерностей может быть равна -1 и тогда она будет посчитана автоматически\n",
 556 |         "print(x.size(), y.size(), z.size())"
 557 |       ]
 558 |     },
 559 |     {
 560 |       "cell_type": "markdown",
 561 |       "source": [
 562 |         "`view` - создает другое представление исходного тензора. При изменении формы `view` меняется `x.stride()`. Новый тензор всегда делит (share) данные с исходным тензором\n",
 563 |         "\n",
 564 |         "`reshape` не дает гарантии на шэринг данных: сначала проверяется `is_contiguous` и если результа - False, вызывается `contiguous` (создаёт новую память). После этого вызывается `view`"
 565 |       ],
 566 |       "metadata": {
 567 |         "id": "Td4AN9DEW_lV"
 568 |       }
 569 |     },
 570 |     {
 571 |       "cell_type": "code",
 572 |       "source": [
 573 |         "x = torch.randn(4, 4)\n",
 574 |         "y = x.reshape(16)\n",
 575 |         "z = x.reshape(-1, 8)\n",
 576 |         "print(x.size(), y.size(), z.size())"
 577 |       ],
 578 |       "metadata": {
 579 |         "colab": {
 580 |           "base_uri": "https://localhost:8080/"
 581 |         },
 582 |         "id": "o_lWRGR6V0A9",
 583 |         "outputId": "dfa2fa7e-0b43-4157-d4c0-bbbef60ed87d"
 584 |       },
 585 |       "execution_count": null,
 586 |       "outputs": [
 587 |         {
 588 |           "output_type": "stream",
 589 |           "name": "stdout",
 590 |           "text": [
 591 |             "torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])\n"
 592 |           ]
 593 |         }
 594 |       ]
 595 |     },
 596 |     {
 597 |       "cell_type": "markdown",
 598 |       "metadata": {
 599 |         "id": "iz551EmlHbSp"
 600 |       },
 601 |       "source": [
 602 |         "Получение значения тензора из одного элемента"
 603 |       ]
 604 |     },
 605 |     {
 606 |       "cell_type": "code",
 607 |       "execution_count": null,
 608 |       "metadata": {
 609 |         "colab": {
 610 |           "base_uri": "https://localhost:8080/"
 611 |         },
 612 |         "id": "gsAk9MudHbSp",
 613 |         "outputId": "38e2672e-2faa-4bb9-c871-148e53860ffd"
 614 |       },
 615 |       "outputs": [
 616 |         {
 617 |           "output_type": "stream",
 618 |           "name": "stdout",
 619 |           "text": [
 620 |             "tensor([1.9939])\n",
 621 |             "1.993883490562439\n"
 622 |           ]
 623 |         }
 624 |       ],
 625 |       "source": [
 626 |         "x = torch.randn(1)\n",
 627 |         "print(x)\n",
 628 |         "print(x.item())"
 629 |       ]
 630 |     },
 631 |     {
 632 |       "cell_type": "code",
 633 |       "execution_count": null,
 634 |       "metadata": {
 635 |         "colab": {
 636 |           "base_uri": "https://localhost:8080/"
 637 |         },
 638 |         "id": "6fY57dMyHbSp",
 639 |         "outputId": "f31e6942-6e53-4c4c-a47b-958b3869f2e0"
 640 |       },
 641 |       "outputs": [
 642 |         {
 643 |           "output_type": "execute_result",
 644 |           "data": {
 645 |             "text/plain": [
 646 |               "-0.02648058533668518"
 647 |             ]
 648 |           },
 649 |           "metadata": {},
 650 |           "execution_count": 19
 651 |         }
 652 |       ],
 653 |       "source": [
 654 |         "y[1].item()"
 655 |       ]
 656 |     },
 657 |     {
 658 |       "cell_type": "markdown",
 659 |       "metadata": {
 660 |         "id": "sPqMWMNoHbSq"
 661 |       },
 662 |       "source": [
 663 |         "Проверка доступности GPU и создание тензора на GPU"
 664 |       ]
 665 |     },
 666 |     {
 667 |       "cell_type": "code",
 668 |       "execution_count": null,
 669 |       "metadata": {
 670 |         "id": "gh929KmCHbSq",
 671 |         "colab": {
 672 |           "base_uri": "https://localhost:8080/"
 673 |         },
 674 |         "outputId": "f30d234f-9ad0-4b5a-d6ba-a36135e5993a"
 675 |       },
 676 |       "outputs": [
 677 |         {
 678 |           "output_type": "stream",
 679 |           "name": "stdout",
 680 |           "text": [
 681 |             "tensor([2.9939], device='cuda:0')\n",
 682 |             "tensor([2.9939], dtype=torch.float64)\n"
 683 |           ]
 684 |         }
 685 |       ],
 686 |       "source": [
 687 |         "if torch.cuda.is_available():\n",
 688 |         "    device = torch.device(\"cuda\")          # a CUDA device object\n",
 689 |         "    y = torch.ones_like(x, device=device)  # directly create a tensor on GPU\n",
 690 |         "    x = x.to(device)                       # or just use strings ``.to(\"cuda\")``\n",
 691 |         "    z = x + y\n",
 692 |         "    print(z)\n",
 693 |         "    print(z.to(\"cpu\", torch.double))"
 694 |       ]
 695 |     },
 696 |     {
 697 |       "cell_type": "markdown",
 698 |       "source": [
 699 |         "Чтобы не прописывать device руками"
 700 |       ],
 701 |       "metadata": {
 702 |         "id": "qClsNFnCZOLc"
 703 |       }
 704 |     },
 705 |     {
 706 |       "cell_type": "code",
 707 |       "source": [
 708 |         "if torch.cuda.is_available():\n",
 709 |         "  device = \"cuda:0\"\n",
 710 |         "else:\n",
 711 |         "  device = \"cpu\"\n",
 712 |         "device"
 713 |       ],
 714 |       "metadata": {
 715 |         "colab": {
 716 |           "base_uri": "https://localhost:8080/",
 717 |           "height": 35
 718 |         },
 719 |         "id": "14cp1ImnZKEI",
 720 |         "outputId": "888190df-4426-467c-817e-0cfd4e1ced61"
 721 |       },
 722 |       "execution_count": null,
 723 |       "outputs": [
 724 |         {
 725 |           "output_type": "execute_result",
 726 |           "data": {
 727 |             "text/plain": [
 728 |               "'cuda:0'"
 729 |             ],
 730 |             "application/vnd.google.colaboratory.intrinsic+json": {
 731 |               "type": "string"
 732 |             }
 733 |           },
 734 |           "metadata": {},
 735 |           "execution_count": 21
 736 |         }
 737 |       ]
 738 |     },
 739 |     {
 740 |       "cell_type": "markdown",
 741 |       "source": [
 742 |         "Выигрыш во времени от использования GPU"
 743 |       ],
 744 |       "metadata": {
 745 |         "id": "bUUTF9kdYyDj"
 746 |       }
 747 |     },
 748 |     {
 749 |       "cell_type": "code",
 750 |       "source": [
 751 |         "%%time\n",
 752 |         "\n",
 753 |         "x1 = torch.eye(10000)\n",
 754 |         "y1 = torch.eye(10000)\n",
 755 |         "z1 = x1.mm(y1)"
 756 |       ],
 757 |       "metadata": {
 758 |         "colab": {
 759 |           "base_uri": "https://localhost:8080/"
 760 |         },
 761 |         "id": "LBIKZWcOYnpn",
 762 |         "outputId": "35e139e4-a81f-4925-fb15-33b407f9f7d9"
 763 |       },
 764 |       "execution_count": null,
 765 |       "outputs": [
 766 |         {
 767 |           "output_type": "stream",
 768 |           "name": "stdout",
 769 |           "text": [
 770 |             "CPU times: user 27 s, sys: 709 ms, total: 27.7 s\n",
 771 |             "Wall time: 28.1 s\n"
 772 |           ]
 773 |         }
 774 |       ]
 775 |     },
 776 |     {
 777 |       "cell_type": "code",
 778 |       "source": [
 779 |         "%%time\n",
 780 |         "\n",
 781 |         "x1 = torch.eye(10000, device=device)\n",
 782 |         "y1 = torch.eye(10000, device=device)\n",
 783 |         "z1 = x1.mm(y1)"
 784 |       ],
 785 |       "metadata": {
 786 |         "colab": {
 787 |           "base_uri": "https://localhost:8080/"
 788 |         },
 789 |         "id": "-sBn0ix2YntI",
 790 |         "outputId": "5f5c5af9-ac15-44b7-a148-6907f243012f"
 791 |       },
 792 |       "execution_count": null,
 793 |       "outputs": [
 794 |         {
 795 |           "output_type": "stream",
 796 |           "name": "stdout",
 797 |           "text": [
 798 |             "CPU times: user 431 ms, sys: 265 ms, total: 696 ms\n",
 799 |             "Wall time: 2.78 s\n"
 800 |           ]
 801 |         }
 802 |       ]
 803 |     },
 804 |     {
 805 |       "cell_type": "markdown",
 806 |       "metadata": {
 807 |         "id": "9NSTCyMhHbSq"
 808 |       },
 809 |       "source": [
 810 |         "### Autograd - automatic differentiation engine\n",
 811 |         "\n",
 812 |         "[PyTorch 101, Part 1: Understanding Graphs, Automatic Differentiation and Autograd](https://blog.paperspace.com/pytorch-101-understanding-graphs-and-automatic-differentiation/)\n",
 813 |         "\n",
 814 |         "В центре большинства современных приемов машинного обучения лежит расчет градиентов. Это в особенности касается нейронных сетей, где для обновления весовых коэффициентов используется алгоритм обратного распространения\n",
 815 |         "\n",
 816 |         "Autograd предоставляет классы и функции, реализующие автоматическое дифференцирование произвольных скалярных функций. Это требует минимальных изменений в существующем коде - нужно только объявить Tensor, для которого должны вычисляться градиенты, с атрибутом `requires_grad=True`"
 817 |       ]
 818 |     },
 819 |     {
 820 |       "cell_type": "code",
 821 |       "execution_count": null,
 822 |       "metadata": {
 823 |         "colab": {
 824 |           "base_uri": "https://localhost:8080/"
 825 |         },
 826 |         "id": "JcdJCnP5HbSr",
 827 |         "outputId": "882b1d90-22d0-48f9-b1ab-3a82355db6e8"
 828 |       },
 829 |       "outputs": [
 830 |         {
 831 |           "output_type": "stream",
 832 |           "name": "stdout",
 833 |           "text": [
 834 |             "tensor([[1., 1.],\n",
 835 |             "        [1., 1.]], requires_grad=True)\n"
 836 |           ]
 837 |         }
 838 |       ],
 839 |       "source": [
 840 |         "x = torch.ones(2, 2, requires_grad=True)\n",
 841 |         "print(x)"
 842 |       ]
 843 |     },
 844 |     {
 845 |       "cell_type": "code",
 846 |       "source": [
 847 |         "x.grad == None"
 848 |       ],
 849 |       "metadata": {
 850 |         "colab": {
 851 |           "base_uri": "https://localhost:8080/"
 852 |         },
 853 |         "id": "JbgJ9x8CcFJ6",
 854 |         "outputId": "5c62cc95-6d76-4fb6-c7b4-9ce7aebfe3b4"
 855 |       },
 856 |       "execution_count": null,
 857 |       "outputs": [
 858 |         {
 859 |           "output_type": "execute_result",
 860 |           "data": {
 861 |             "text/plain": [
 862 |               "True"
 863 |             ]
 864 |           },
 865 |           "metadata": {},
 866 |           "execution_count": 25
 867 |         }
 868 |       ]
 869 |     },
 870 |     {
 871 |       "cell_type": "code",
 872 |       "source": [
 873 |         "x.grad_fn == None"
 874 |       ],
 875 |       "metadata": {
 876 |         "colab": {
 877 |           "base_uri": "https://localhost:8080/"
 878 |         },
 879 |         "id": "b_i-OcPLb-Rv",
 880 |         "outputId": "b6dc6c9b-c8c0-43f8-b937-4ad516247c3b"
 881 |       },
 882 |       "execution_count": null,
 883 |       "outputs": [
 884 |         {
 885 |           "output_type": "execute_result",
 886 |           "data": {
 887 |             "text/plain": [
 888 |               "True"
 889 |             ]
 890 |           },
 891 |           "metadata": {},
 892 |           "execution_count": 26
 893 |         }
 894 |       ]
 895 |     },
 896 |     {
 897 |       "cell_type": "markdown",
 898 |       "metadata": {
 899 |         "id": "U0VM8aIlHbSr"
 900 |       },
 901 |       "source": [
 902 |         "После применения какой-либо операции к тензору атрибуту `grad_fn`  присваивается объект `Function`, который добавляется в граф вычислений для обратного распространения градиента.\n",
 903 |         "\n"
 904 |       ]
 905 |     },
 906 |     {
 907 |       "cell_type": "code",
 908 |       "execution_count": null,
 909 |       "metadata": {
 910 |         "colab": {
 911 |           "base_uri": "https://localhost:8080/"
 912 |         },
 913 |         "id": "ZIcFl4AWHbSs",
 914 |         "outputId": "aa130744-e0d3-4027-83f1-e636be9f7def"
 915 |       },
 916 |       "outputs": [
 917 |         {
 918 |           "output_type": "stream",
 919 |           "name": "stdout",
 920 |           "text": [
 921 |             "tensor([[3., 3.],\n",
 922 |             "        [3., 3.]], grad_fn=<AddBackward0>)\n"
 923 |           ]
 924 |         }
 925 |       ],
 926 |       "source": [
 927 |         "y = x + 2\n",
 928 |         "print(y)"
 929 |       ]
 930 |     },
 931 |     {
 932 |       "cell_type": "code",
 933 |       "execution_count": null,
 934 |       "metadata": {
 935 |         "colab": {
 936 |           "base_uri": "https://localhost:8080/"
 937 |         },
 938 |         "id": "of4Eh18aHbSs",
 939 |         "outputId": "8e96be6c-a183-4992-f0a9-b2230e9be7f7"
 940 |       },
 941 |       "outputs": [
 942 |         {
 943 |           "output_type": "stream",
 944 |           "name": "stdout",
 945 |           "text": [
 946 |             "tensor([[27., 27.],\n",
 947 |             "        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)\n"
 948 |           ]
 949 |         }
 950 |       ],
 951 |       "source": [
 952 |         "z = y * y * 3\n",
 953 |         "out = z.mean()\n",
 954 |         "\n",
 955 |         "print(z, out)"
 956 |       ]
 957 |     },
 958 |     {
 959 |       "cell_type": "markdown",
 960 |       "metadata": {
 961 |         "id": "39gLEvmAHbSt"
 962 |       },
 963 |       "source": [
 964 |         "`.grad_fn` может менять \"на лету\""
 965 |       ]
 966 |     },
 967 |     {
 968 |       "cell_type": "code",
 969 |       "execution_count": null,
 970 |       "metadata": {
 971 |         "colab": {
 972 |           "base_uri": "https://localhost:8080/"
 973 |         },
 974 |         "id": "va5zd2xMHbSt",
 975 |         "outputId": "0bb8f68e-6a33-438e-b6ef-1cf5a2187481"
 976 |       },
 977 |       "outputs": [
 978 |         {
 979 |           "output_type": "stream",
 980 |           "name": "stdout",
 981 |           "text": [
 982 |             "False\n",
 983 |             "True\n",
 984 |             "<SumBackward0 object at 0x7f4bd545e430>\n"
 985 |           ]
 986 |         }
 987 |       ],
 988 |       "source": [
 989 |         "a = torch.randn(2, 2)\n",
 990 |         "a = ((a * 3) / (a - 1))\n",
 991 |         "print(a.requires_grad)\n",
 992 |         "a.requires_grad_(True)\n",
 993 |         "print(a.requires_grad)\n",
 994 |         "b = (a * a).sum()\n",
 995 |         "print(b.grad_fn)"
 996 |       ]
 997 |     },
 998 |     {
 999 |       "cell_type": "markdown",
1000 |       "metadata": {
1001 |         "id": "lZcXFmziHbSt"
1002 |       },
1003 |       "source": [
1004 |         "Метод `backward` корневого узла графа вычислений запускает процедуру вычисления градиентов в листовых (is_leaf) узлах, имеющих атрибут requires_grad. Граф дифференцируется по цепочке (chain rule)"
1005 |       ]
1006 |     },
1007 |     {
1008 |       "cell_type": "code",
1009 |       "execution_count": null,
1010 |       "metadata": {
1011 |         "id": "yyNpdWrzHbSu"
1012 |       },
1013 |       "outputs": [],
1014 |       "source": [
1015 |         "out.backward()"
1016 |       ]
1017 |     },
1018 |     {
1019 |       "cell_type": "code",
1020 |       "execution_count": null,
1021 |       "metadata": {
1022 |         "colab": {
1023 |           "base_uri": "https://localhost:8080/"
1024 |         },
1025 |         "id": "bSpM1ECkHbSu",
1026 |         "outputId": "c4da2859-1a54-4d25-b93c-816e6638ddb4"
1027 |       },
1028 |       "outputs": [
1029 |         {
1030 |           "output_type": "stream",
1031 |           "name": "stdout",
1032 |           "text": [
1033 |             "tensor([[4.5000, 4.5000],\n",
1034 |             "        [4.5000, 4.5000]])\n"
1035 |           ]
1036 |         }
1037 |       ],
1038 |       "source": [
1039 |         "print(x.grad)"
1040 |       ]
1041 |     },
1042 |     {
1043 |       "cell_type": "markdown",
1044 |       "source": [
1045 |         "По умолчанию промежуточные (не листовые) узлы графа не хранят прошедшие через них градиентов."
1046 |       ],
1047 |       "metadata": {
1048 |         "id": "LpYFZQuze1u-"
1049 |       }
1050 |     },
1051 |     {
1052 |       "cell_type": "code",
1053 |       "source": [
1054 |         "print(y.grad)"
1055 |       ],
1056 |       "metadata": {
1057 |         "colab": {
1058 |           "base_uri": "https://localhost:8080/"
1059 |         },
1060 |         "id": "AEcfJluqe1eQ",
1061 |         "outputId": "2884609f-7ba2-4c94-df7b-83a5c50a0e73"
1062 |       },
1063 |       "execution_count": null,
1064 |       "outputs": [
1065 |         {
1066 |           "output_type": "stream",
1067 |           "name": "stdout",
1068 |           "text": [
1069 |             "None\n"
1070 |           ]
1071 |         },
1072 |         {
1073 |           "output_type": "stream",
1074 |           "name": "stderr",
1075 |           "text": [
1076 |             "<ipython-input-32-b81046718426>:1: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:486.)\n",
1077 |             "  print(y.grad)\n"
1078 |           ]
1079 |         }
1080 |       ]
1081 |     },
1082 |     {
1083 |       "cell_type": "markdown",
1084 |       "source": [
1085 |         "Эту ситуацию можно изменить, вызвав для для конкретного узла метод retain_grad"
1086 |       ],
1087 |       "metadata": {
1088 |         "id": "6KEkSd3oe7g_"
1089 |       }
1090 |     },
1091 |     {
1092 |       "cell_type": "code",
1093 |       "source": [
1094 |         "x = torch.ones(2, 2, requires_grad=True)\n",
1095 |         "y = x + 2\n",
1096 |         "y.retain_grad()\n",
1097 |         "z = y * y * 3\n",
1098 |         "out = z.mean()\n",
1099 |         "out.backward()"
1100 |       ],
1101 |       "metadata": {
1102 |         "id": "19q2HQX8e7JW"
1103 |       },
1104 |       "execution_count": null,
1105 |       "outputs": []
1106 |     },
1107 |     {
1108 |       "cell_type": "code",
1109 |       "source": [
1110 |         "print(x.grad)"
1111 |       ],
1112 |       "metadata": {
1113 |         "colab": {
1114 |           "base_uri": "https://localhost:8080/"
1115 |         },
1116 |         "id": "LXSMmRATfHVO",
1117 |         "outputId": "2c371fdc-ebbf-4b0f-e233-3c216ef19239"
1118 |       },
1119 |       "execution_count": null,
1120 |       "outputs": [
1121 |         {
1122 |           "output_type": "stream",
1123 |           "name": "stdout",
1124 |           "text": [
1125 |             "tensor([[4.5000, 4.5000],\n",
1126 |             "        [4.5000, 4.5000]])\n"
1127 |           ]
1128 |         }
1129 |       ]
1130 |     },
1131 |     {
1132 |       "cell_type": "code",
1133 |       "source": [
1134 |         "print(y.grad)"
1135 |       ],
1136 |       "metadata": {
1137 |         "colab": {
1138 |           "base_uri": "https://localhost:8080/"
1139 |         },
1140 |         "id": "S9KDgTYLfEib",
1141 |         "outputId": "5ae8384e-39fc-46ab-9451-e019b7427fd1"
1142 |       },
1143 |       "execution_count": null,
1144 |       "outputs": [
1145 |         {
1146 |           "output_type": "stream",
1147 |           "name": "stdout",
1148 |           "text": [
1149 |             "tensor([[4.5000, 4.5000],\n",
1150 |             "        [4.5000, 4.5000]])\n"
1151 |           ]
1152 |         }
1153 |       ]
1154 |     },
1155 |     {
1156 |       "cell_type": "markdown",
1157 |       "metadata": {
1158 |         "id": "CD3WUBM3HbSw"
1159 |       },
1160 |       "source": [
1161 |         "Иногда с листовыми узлами необходимо проделать действия, не меняя при этом графа. Такие действия проводят, используя контекстный менедежр `no_grad`, которое блокирует создание новых узлов графа"
1162 |       ]
1163 |     },
1164 |     {
1165 |       "cell_type": "code",
1166 |       "execution_count": null,
1167 |       "metadata": {
1168 |         "colab": {
1169 |           "base_uri": "https://localhost:8080/"
1170 |         },
1171 |         "id": "ueu6nMZQHbSw",
1172 |         "outputId": "413d2e51-5789-49ea-b405-1475da821c30"
1173 |       },
1174 |       "outputs": [
1175 |         {
1176 |           "output_type": "stream",
1177 |           "name": "stdout",
1178 |           "text": [
1179 |             "True\n",
1180 |             "True\n",
1181 |             "False\n"
1182 |           ]
1183 |         }
1184 |       ],
1185 |       "source": [
1186 |         "print(x.requires_grad)\n",
1187 |         "print((x ** 2).requires_grad)\n",
1188 |         "\n",
1189 |         "with torch.no_grad(): # потом можно включить вручную torch.enable_grad()\n",
1190 |         "    print((x ** 2).requires_grad)\n"
1191 |       ]
1192 |     },
1193 |     {
1194 |       "cell_type": "code",
1195 |       "source": [],
1196 |       "metadata": {
1197 |         "id": "YdrSm6g6M4DE"
1198 |       },
1199 |       "execution_count": null,
1200 |       "outputs": []
1201 |     }
1202 |   ],
1203 |   "metadata": {
1204 |     "kernelspec": {
1205 |       "display_name": "Python 3",
1206 |       "language": "python",
1207 |       "name": "python3"
1208 |     },
1209 |     "language_info": {
1210 |       "codemirror_mode": {
1211 |         "name": "ipython",
1212 |         "version": 3
1213 |       },
1214 |       "file_extension": ".py",
1215 |       "mimetype": "text/x-python",
1216 |       "name": "python",
1217 |       "nbconvert_exporter": "python",
1218 |       "pygments_lexer": "ipython3",
1219 |       "version": "3.7.4"
1220 |     },
1221 |     "colab": {
1222 |       "provenance": [],
1223 |       "toc_visible": true
1224 |     },
1225 |     "accelerator": "GPU",
1226 |     "gpuClass": "standard"
1227 |   },
1228 |   "nbformat": 4,
1229 |   "nbformat_minor": 0
1230 | }


--------------------------------------------------------------------------------
/Seminar 6/vocab.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn.functional as F
 5 | 
 6 | class Vocab:
 7 |     def __init__(self, tokens, bos="_BOS_", eos="_EOS_", unk='_UNK_'):
 8 |         """
 9 |         A special class that converts lines of tokens into matrices and backwards
10 |         """
11 |         assert all(tok in tokens for tok in (bos, eos, unk))
12 |         self.tokens = tokens
13 |         self.token_to_ix = {t:i for i, t in enumerate(tokens)}
14 |         self.bos, self.eos, self.unk = bos, eos, unk
15 |         self.bos_ix = self.token_to_ix[bos]
16 |         self.eos_ix = self.token_to_ix[eos]
17 |         self.unk_ix = self.token_to_ix[unk]
18 | 
19 |     def __len__(self):
20 |         return len(self.tokens)
21 | 
22 |     @staticmethod
23 |     def from_lines(lines, bos="_BOS_", eos="_EOS_", unk='_UNK_'):
24 |         flat_lines = '\n'.join(list(lines)).split()
25 |         tokens = sorted(set(flat_lines))
26 |         tokens = [t for t in tokens if t not in (bos, eos, unk) and len(t)]
27 |         tokens = [bos, eos, unk] + tokens
28 |         return Vocab(tokens, bos, eos, unk)
29 | 
30 |     def tokenize(self, string):
31 |         """converts string to a list of tokens"""
32 |         tokens = [tok if tok in self.token_to_ix else self.unk
33 |                   for tok in string.split()]
34 |         return [self.bos] + tokens + [self.eos]
35 | 
36 |     def to_matrix(self, lines, dtype=torch.int64, max_len=None):
37 |         """
38 |         convert variable length token sequences into  fixed size matrix
39 |         example usage:
40 |         >>>print(to_matrix(words[:3],source_to_ix))
41 |         [[15 22 21 28 27 13 -1 -1 -1 -1 -1]
42 |          [30 21 15 15 21 14 28 27 13 -1 -1]
43 |          [25 37 31 34 21 20 37 21 28 19 13]]
44 |         """
45 |         lines = list(map(self.tokenize, lines))
46 |         max_len = max_len or max(map(len, lines))
47 | 
48 |         matrix = torch.full((len(lines), max_len), self.eos_ix, dtype=dtype)
49 |         for i, seq in enumerate(lines):
50 |             row_ix = list(map(self.token_to_ix.get, seq))[:max_len]
51 |             matrix[i, :len(row_ix)] = torch.as_tensor(row_ix)
52 | 
53 |         return matrix
54 | 
55 |     def to_lines(self, matrix, crop=True):
56 |         """
57 |         Convert matrix of token ids into strings
58 |         :param matrix: matrix of tokens of int32, shape=[batch,time]
59 |         :param crop: if True, crops BOS and EOS from line
60 |         :return:
61 |         """
62 |         lines = []
63 |         for line_ix in map(list,matrix):
64 |             if crop:
65 |                 if line_ix[0] == self.bos_ix:
66 |                     line_ix = line_ix[1:]
67 |                 if self.eos_ix in line_ix:
68 |                     line_ix = line_ix[:line_ix.index(self.eos_ix)]
69 |             line = ' '.join(self.tokens[i] for i in line_ix)
70 |             lines.append(line)
71 |         return lines
72 |     
73 |     def compute_mask(self, input_ix):
74 |         """ compute a boolean mask that equals "1" until first EOS (including that EOS) """
75 |         return F.pad(torch.cumsum(input_ix == self.eos_ix, dim=-1)[..., :-1] < 1, pad=(1, 0, 0, 0), value=True)
76 | 
77 | 


--------------------------------------------------------------------------------
/Seminar 8/itmo_ods_nlp_course_poems_generation.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "nbformat": 4,
   3 |   "nbformat_minor": 0,
   4 |   "metadata": {
   5 |     "colab": {
   6 |       "provenance": [],
   7 |       "toc_visible": true
   8 |     },
   9 |     "kernelspec": {
  10 |       "name": "python3",
  11 |       "display_name": "Python 3"
  12 |     },
  13 |     "language_info": {
  14 |       "name": "python"
  15 |     },
  16 |     "accelerator": "GPU",
  17 |     "gpuClass": "standard",
  18 |     "widgets": {
  19 |       "application/vnd.jupyter.widget-state+json": {
  20 |         "c066b5c1bc90494daf341b7dd55b3af1": {
  21 |           "model_module": "@jupyter-widgets/controls",
  22 |           "model_name": "HBoxModel",
  23 |           "model_module_version": "1.5.0",
  24 |           "state": {
  25 |             "_dom_classes": [],
  26 |             "_model_module": "@jupyter-widgets/controls",
  27 |             "_model_module_version": "1.5.0",
  28 |             "_model_name": "HBoxModel",
  29 |             "_view_count": null,
  30 |             "_view_module": "@jupyter-widgets/controls",
  31 |             "_view_module_version": "1.5.0",
  32 |             "_view_name": "HBoxView",
  33 |             "box_style": "",
  34 |             "children": [
  35 |               "IPY_MODEL_259549e7870c4635a9d89cd61a102a4c",
  36 |               "IPY_MODEL_b565ee1cadcd460fadbfc08672f561a1",
  37 |               "IPY_MODEL_ca4e30cc5fdb45198a767e7d5cb7d343"
  38 |             ],
  39 |             "layout": "IPY_MODEL_2e8dc9683c1540efbbe29529c4d85662"
  40 |           }
  41 |         },
  42 |         "259549e7870c4635a9d89cd61a102a4c": {
  43 |           "model_module": "@jupyter-widgets/controls",
  44 |           "model_name": "HTMLModel",
  45 |           "model_module_version": "1.5.0",
  46 |           "state": {
  47 |             "_dom_classes": [],
  48 |             "_model_module": "@jupyter-widgets/controls",
  49 |             "_model_module_version": "1.5.0",
  50 |             "_model_name": "HTMLModel",
  51 |             "_view_count": null,
  52 |             "_view_module": "@jupyter-widgets/controls",
  53 |             "_view_module_version": "1.5.0",
  54 |             "_view_name": "HTMLView",
  55 |             "description": "",
  56 |             "description_tooltip": null,
  57 |             "layout": "IPY_MODEL_6a79248883774a399a323d4e630c4b77",
  58 |             "placeholder": "​",
  59 |             "style": "IPY_MODEL_db6a231b14e443bfa1ac0e6a590b4b11",
  60 |             "value": "Downloading (…)olve/main/vocab.json: 100%"
  61 |           }
  62 |         },
  63 |         "b565ee1cadcd460fadbfc08672f561a1": {
  64 |           "model_module": "@jupyter-widgets/controls",
  65 |           "model_name": "FloatProgressModel",
  66 |           "model_module_version": "1.5.0",
  67 |           "state": {
  68 |             "_dom_classes": [],
  69 |             "_model_module": "@jupyter-widgets/controls",
  70 |             "_model_module_version": "1.5.0",
  71 |             "_model_name": "FloatProgressModel",
  72 |             "_view_count": null,
  73 |             "_view_module": "@jupyter-widgets/controls",
  74 |             "_view_module_version": "1.5.0",
  75 |             "_view_name": "ProgressView",
  76 |             "bar_style": "success",
  77 |             "description": "",
  78 |             "description_tooltip": null,
  79 |             "layout": "IPY_MODEL_3cb811638bb644babb6d24bed0aede87",
  80 |             "max": 1612610,
  81 |             "min": 0,
  82 |             "orientation": "horizontal",
  83 |             "style": "IPY_MODEL_a07a27fddc3e47afb8f565281ce86074",
  84 |             "value": 1612610
  85 |           }
  86 |         },
  87 |         "ca4e30cc5fdb45198a767e7d5cb7d343": {
  88 |           "model_module": "@jupyter-widgets/controls",
  89 |           "model_name": "HTMLModel",
  90 |           "model_module_version": "1.5.0",
  91 |           "state": {
  92 |             "_dom_classes": [],
  93 |             "_model_module": "@jupyter-widgets/controls",
  94 |             "_model_module_version": "1.5.0",
  95 |             "_model_name": "HTMLModel",
  96 |             "_view_count": null,
  97 |             "_view_module": "@jupyter-widgets/controls",
  98 |             "_view_module_version": "1.5.0",
  99 |             "_view_name": "HTMLView",
 100 |             "description": "",
 101 |             "description_tooltip": null,
 102 |             "layout": "IPY_MODEL_be610141c1e14fff891bbcce0a22df3f",
 103 |             "placeholder": "​",
 104 |             "style": "IPY_MODEL_72d8408a7fc14213abf5c9cc9b620f28",
 105 |             "value": " 1.61M/1.61M [00:00&lt;00:00, 6.91MB/s]"
 106 |           }
 107 |         },
 108 |         "2e8dc9683c1540efbbe29529c4d85662": {
 109 |           "model_module": "@jupyter-widgets/base",
 110 |           "model_name": "LayoutModel",
 111 |           "model_module_version": "1.2.0",
 112 |           "state": {
 113 |             "_model_module": "@jupyter-widgets/base",
 114 |             "_model_module_version": "1.2.0",
 115 |             "_model_name": "LayoutModel",
 116 |             "_view_count": null,
 117 |             "_view_module": "@jupyter-widgets/base",
 118 |             "_view_module_version": "1.2.0",
 119 |             "_view_name": "LayoutView",
 120 |             "align_content": null,
 121 |             "align_items": null,
 122 |             "align_self": null,
 123 |             "border": null,
 124 |             "bottom": null,
 125 |             "display": null,
 126 |             "flex": null,
 127 |             "flex_flow": null,
 128 |             "grid_area": null,
 129 |             "grid_auto_columns": null,
 130 |             "grid_auto_flow": null,
 131 |             "grid_auto_rows": null,
 132 |             "grid_column": null,
 133 |             "grid_gap": null,
 134 |             "grid_row": null,
 135 |             "grid_template_areas": null,
 136 |             "grid_template_columns": null,
 137 |             "grid_template_rows": null,
 138 |             "height": null,
 139 |             "justify_content": null,
 140 |             "justify_items": null,
 141 |             "left": null,
 142 |             "margin": null,
 143 |             "max_height": null,
 144 |             "max_width": null,
 145 |             "min_height": null,
 146 |             "min_width": null,
 147 |             "object_fit": null,
 148 |             "object_position": null,
 149 |             "order": null,
 150 |             "overflow": null,
 151 |             "overflow_x": null,
 152 |             "overflow_y": null,
 153 |             "padding": null,
 154 |             "right": null,
 155 |             "top": null,
 156 |             "visibility": null,
 157 |             "width": null
 158 |           }
 159 |         },
 160 |         "6a79248883774a399a323d4e630c4b77": {
 161 |           "model_module": "@jupyter-widgets/base",
 162 |           "model_name": "LayoutModel",
 163 |           "model_module_version": "1.2.0",
 164 |           "state": {
 165 |             "_model_module": "@jupyter-widgets/base",
 166 |             "_model_module_version": "1.2.0",
 167 |             "_model_name": "LayoutModel",
 168 |             "_view_count": null,
 169 |             "_view_module": "@jupyter-widgets/base",
 170 |             "_view_module_version": "1.2.0",
 171 |             "_view_name": "LayoutView",
 172 |             "align_content": null,
 173 |             "align_items": null,
 174 |             "align_self": null,
 175 |             "border": null,
 176 |             "bottom": null,
 177 |             "display": null,
 178 |             "flex": null,
 179 |             "flex_flow": null,
 180 |             "grid_area": null,
 181 |             "grid_auto_columns": null,
 182 |             "grid_auto_flow": null,
 183 |             "grid_auto_rows": null,
 184 |             "grid_column": null,
 185 |             "grid_gap": null,
 186 |             "grid_row": null,
 187 |             "grid_template_areas": null,
 188 |             "grid_template_columns": null,
 189 |             "grid_template_rows": null,
 190 |             "height": null,
 191 |             "justify_content": null,
 192 |             "justify_items": null,
 193 |             "left": null,
 194 |             "margin": null,
 195 |             "max_height": null,
 196 |             "max_width": null,
 197 |             "min_height": null,
 198 |             "min_width": null,
 199 |             "object_fit": null,
 200 |             "object_position": null,
 201 |             "order": null,
 202 |             "overflow": null,
 203 |             "overflow_x": null,
 204 |             "overflow_y": null,
 205 |             "padding": null,
 206 |             "right": null,
 207 |             "top": null,
 208 |             "visibility": null,
 209 |             "width": null
 210 |           }
 211 |         },
 212 |         "db6a231b14e443bfa1ac0e6a590b4b11": {
 213 |           "model_module": "@jupyter-widgets/controls",
 214 |           "model_name": "DescriptionStyleModel",
 215 |           "model_module_version": "1.5.0",
 216 |           "state": {
 217 |             "_model_module": "@jupyter-widgets/controls",
 218 |             "_model_module_version": "1.5.0",
 219 |             "_model_name": "DescriptionStyleModel",
 220 |             "_view_count": null,
 221 |             "_view_module": "@jupyter-widgets/base",
 222 |             "_view_module_version": "1.2.0",
 223 |             "_view_name": "StyleView",
 224 |             "description_width": ""
 225 |           }
 226 |         },
 227 |         "3cb811638bb644babb6d24bed0aede87": {
 228 |           "model_module": "@jupyter-widgets/base",
 229 |           "model_name": "LayoutModel",
 230 |           "model_module_version": "1.2.0",
 231 |           "state": {
 232 |             "_model_module": "@jupyter-widgets/base",
 233 |             "_model_module_version": "1.2.0",
 234 |             "_model_name": "LayoutModel",
 235 |             "_view_count": null,
 236 |             "_view_module": "@jupyter-widgets/base",
 237 |             "_view_module_version": "1.2.0",
 238 |             "_view_name": "LayoutView",
 239 |             "align_content": null,
 240 |             "align_items": null,
 241 |             "align_self": null,
 242 |             "border": null,
 243 |             "bottom": null,
 244 |             "display": null,
 245 |             "flex": null,
 246 |             "flex_flow": null,
 247 |             "grid_area": null,
 248 |             "grid_auto_columns": null,
 249 |             "grid_auto_flow": null,
 250 |             "grid_auto_rows": null,
 251 |             "grid_column": null,
 252 |             "grid_gap": null,
 253 |             "grid_row": null,
 254 |             "grid_template_areas": null,
 255 |             "grid_template_columns": null,
 256 |             "grid_template_rows": null,
 257 |             "height": null,
 258 |             "justify_content": null,
 259 |             "justify_items": null,
 260 |             "left": null,
 261 |             "margin": null,
 262 |             "max_height": null,
 263 |             "max_width": null,
 264 |             "min_height": null,
 265 |             "min_width": null,
 266 |             "object_fit": null,
 267 |             "object_position": null,
 268 |             "order": null,
 269 |             "overflow": null,
 270 |             "overflow_x": null,
 271 |             "overflow_y": null,
 272 |             "padding": null,
 273 |             "right": null,
 274 |             "top": null,
 275 |             "visibility": null,
 276 |             "width": null
 277 |           }
 278 |         },
 279 |         "a07a27fddc3e47afb8f565281ce86074": {
 280 |           "model_module": "@jupyter-widgets/controls",
 281 |           "model_name": "ProgressStyleModel",
 282 |           "model_module_version": "1.5.0",
 283 |           "state": {
 284 |             "_model_module": "@jupyter-widgets/controls",
 285 |             "_model_module_version": "1.5.0",
 286 |             "_model_name": "ProgressStyleModel",
 287 |             "_view_count": null,
 288 |             "_view_module": "@jupyter-widgets/base",
 289 |             "_view_module_version": "1.2.0",
 290 |             "_view_name": "StyleView",
 291 |             "bar_color": null,
 292 |             "description_width": ""
 293 |           }
 294 |         },
 295 |         "be610141c1e14fff891bbcce0a22df3f": {
 296 |           "model_module": "@jupyter-widgets/base",
 297 |           "model_name": "LayoutModel",
 298 |           "model_module_version": "1.2.0",
 299 |           "state": {
 300 |             "_model_module": "@jupyter-widgets/base",
 301 |             "_model_module_version": "1.2.0",
 302 |             "_model_name": "LayoutModel",
 303 |             "_view_count": null,
 304 |             "_view_module": "@jupyter-widgets/base",
 305 |             "_view_module_version": "1.2.0",
 306 |             "_view_name": "LayoutView",
 307 |             "align_content": null,
 308 |             "align_items": null,
 309 |             "align_self": null,
 310 |             "border": null,
 311 |             "bottom": null,
 312 |             "display": null,
 313 |             "flex": null,
 314 |             "flex_flow": null,
 315 |             "grid_area": null,
 316 |             "grid_auto_columns": null,
 317 |             "grid_auto_flow": null,
 318 |             "grid_auto_rows": null,
 319 |             "grid_column": null,
 320 |             "grid_gap": null,
 321 |             "grid_row": null,
 322 |             "grid_template_areas": null,
 323 |             "grid_template_columns": null,
 324 |             "grid_template_rows": null,
 325 |             "height": null,
 326 |             "justify_content": null,
 327 |             "justify_items": null,
 328 |             "left": null,
 329 |             "margin": null,
 330 |             "max_height": null,
 331 |             "max_width": null,
 332 |             "min_height": null,
 333 |             "min_width": null,
 334 |             "object_fit": null,
 335 |             "object_position": null,
 336 |             "order": null,
 337 |             "overflow": null,
 338 |             "overflow_x": null,
 339 |             "overflow_y": null,
 340 |             "padding": null,
 341 |             "right": null,
 342 |             "top": null,
 343 |             "visibility": null,
 344 |             "width": null
 345 |           }
 346 |         },
 347 |         "72d8408a7fc14213abf5c9cc9b620f28": {
 348 |           "model_module": "@jupyter-widgets/controls",
 349 |           "model_name": "DescriptionStyleModel",
 350 |           "model_module_version": "1.5.0",
 351 |           "state": {
 352 |             "_model_module": "@jupyter-widgets/controls",
 353 |             "_model_module_version": "1.5.0",
 354 |             "_model_name": "DescriptionStyleModel",
 355 |             "_view_count": null,
 356 |             "_view_module": "@jupyter-widgets/base",
 357 |             "_view_module_version": "1.2.0",
 358 |             "_view_name": "StyleView",
 359 |             "description_width": ""
 360 |           }
 361 |         },
 362 |         "f5fbf569cc7f438aa7d018af21db3fa3": {
 363 |           "model_module": "@jupyter-widgets/controls",
 364 |           "model_name": "HBoxModel",
 365 |           "model_module_version": "1.5.0",
 366 |           "state": {
 367 |             "_dom_classes": [],
 368 |             "_model_module": "@jupyter-widgets/controls",
 369 |             "_model_module_version": "1.5.0",
 370 |             "_model_name": "HBoxModel",
 371 |             "_view_count": null,
 372 |             "_view_module": "@jupyter-widgets/controls",
 373 |             "_view_module_version": "1.5.0",
 374 |             "_view_name": "HBoxView",
 375 |             "box_style": "",
 376 |             "children": [
 377 |               "IPY_MODEL_59037cf68d6543bf8dbecbce0114ce37",
 378 |               "IPY_MODEL_d942d39cdefd410599ef0ee3c4cdbf15",
 379 |               "IPY_MODEL_4668dbead9024218967e8dc318941818"
 380 |             ],
 381 |             "layout": "IPY_MODEL_22a183f85aff49ec9ec7753cf8c0f3c7"
 382 |           }
 383 |         },
 384 |         "59037cf68d6543bf8dbecbce0114ce37": {
 385 |           "model_module": "@jupyter-widgets/controls",
 386 |           "model_name": "HTMLModel",
 387 |           "model_module_version": "1.5.0",
 388 |           "state": {
 389 |             "_dom_classes": [],
 390 |             "_model_module": "@jupyter-widgets/controls",
 391 |             "_model_module_version": "1.5.0",
 392 |             "_model_name": "HTMLModel",
 393 |             "_view_count": null,
 394 |             "_view_module": "@jupyter-widgets/controls",
 395 |             "_view_module_version": "1.5.0",
 396 |             "_view_name": "HTMLView",
 397 |             "description": "",
 398 |             "description_tooltip": null,
 399 |             "layout": "IPY_MODEL_387a166402b340b3b88941dc573199ca",
 400 |             "placeholder": "​",
 401 |             "style": "IPY_MODEL_8d8b2f1d14784d4f98e26c5e05b73de0",
 402 |             "value": "Downloading (…)olve/main/merges.txt: 100%"
 403 |           }
 404 |         },
 405 |         "d942d39cdefd410599ef0ee3c4cdbf15": {
 406 |           "model_module": "@jupyter-widgets/controls",
 407 |           "model_name": "FloatProgressModel",
 408 |           "model_module_version": "1.5.0",
 409 |           "state": {
 410 |             "_dom_classes": [],
 411 |             "_model_module": "@jupyter-widgets/controls",
 412 |             "_model_module_version": "1.5.0",
 413 |             "_model_name": "FloatProgressModel",
 414 |             "_view_count": null,
 415 |             "_view_module": "@jupyter-widgets/controls",
 416 |             "_view_module_version": "1.5.0",
 417 |             "_view_name": "ProgressView",
 418 |             "bar_style": "success",
 419 |             "description": "",
 420 |             "description_tooltip": null,
 421 |             "layout": "IPY_MODEL_3047ff5deb8546d286bb48c4e7b8d8c3",
 422 |             "max": 1270963,
 423 |             "min": 0,
 424 |             "orientation": "horizontal",
 425 |             "style": "IPY_MODEL_c0c7a80a14c9469f934021610e66d683",
 426 |             "value": 1270963
 427 |           }
 428 |         },
 429 |         "4668dbead9024218967e8dc318941818": {
 430 |           "model_module": "@jupyter-widgets/controls",
 431 |           "model_name": "HTMLModel",
 432 |           "model_module_version": "1.5.0",
 433 |           "state": {
 434 |             "_dom_classes": [],
 435 |             "_model_module": "@jupyter-widgets/controls",
 436 |             "_model_module_version": "1.5.0",
 437 |             "_model_name": "HTMLModel",
 438 |             "_view_count": null,
 439 |             "_view_module": "@jupyter-widgets/controls",
 440 |             "_view_module_version": "1.5.0",
 441 |             "_view_name": "HTMLView",
 442 |             "description": "",
 443 |             "description_tooltip": null,
 444 |             "layout": "IPY_MODEL_95ef08a39791436e9cdc7bec3850c013",
 445 |             "placeholder": "​",
 446 |             "style": "IPY_MODEL_5811bf129dca492bb7d5c2cf2ae9cdc2",
 447 |             "value": " 1.27M/1.27M [00:00&lt;00:00, 9.37MB/s]"
 448 |           }
 449 |         },
 450 |         "22a183f85aff49ec9ec7753cf8c0f3c7": {
 451 |           "model_module": "@jupyter-widgets/base",
 452 |           "model_name": "LayoutModel",
 453 |           "model_module_version": "1.2.0",
 454 |           "state": {
 455 |             "_model_module": "@jupyter-widgets/base",
 456 |             "_model_module_version": "1.2.0",
 457 |             "_model_name": "LayoutModel",
 458 |             "_view_count": null,
 459 |             "_view_module": "@jupyter-widgets/base",
 460 |             "_view_module_version": "1.2.0",
 461 |             "_view_name": "LayoutView",
 462 |             "align_content": null,
 463 |             "align_items": null,
 464 |             "align_self": null,
 465 |             "border": null,
 466 |             "bottom": null,
 467 |             "display": null,
 468 |             "flex": null,
 469 |             "flex_flow": null,
 470 |             "grid_area": null,
 471 |             "grid_auto_columns": null,
 472 |             "grid_auto_flow": null,
 473 |             "grid_auto_rows": null,
 474 |             "grid_column": null,
 475 |             "grid_gap": null,
 476 |             "grid_row": null,
 477 |             "grid_template_areas": null,
 478 |             "grid_template_columns": null,
 479 |             "grid_template_rows": null,
 480 |             "height": null,
 481 |             "justify_content": null,
 482 |             "justify_items": null,
 483 |             "left": null,
 484 |             "margin": null,
 485 |             "max_height": null,
 486 |             "max_width": null,
 487 |             "min_height": null,
 488 |             "min_width": null,
 489 |             "object_fit": null,
 490 |             "object_position": null,
 491 |             "order": null,
 492 |             "overflow": null,
 493 |             "overflow_x": null,
 494 |             "overflow_y": null,
 495 |             "padding": null,
 496 |             "right": null,
 497 |             "top": null,
 498 |             "visibility": null,
 499 |             "width": null
 500 |           }
 501 |         },
 502 |         "387a166402b340b3b88941dc573199ca": {
 503 |           "model_module": "@jupyter-widgets/base",
 504 |           "model_name": "LayoutModel",
 505 |           "model_module_version": "1.2.0",
 506 |           "state": {
 507 |             "_model_module": "@jupyter-widgets/base",
 508 |             "_model_module_version": "1.2.0",
 509 |             "_model_name": "LayoutModel",
 510 |             "_view_count": null,
 511 |             "_view_module": "@jupyter-widgets/base",
 512 |             "_view_module_version": "1.2.0",
 513 |             "_view_name": "LayoutView",
 514 |             "align_content": null,
 515 |             "align_items": null,
 516 |             "align_self": null,
 517 |             "border": null,
 518 |             "bottom": null,
 519 |             "display": null,
 520 |             "flex": null,
 521 |             "flex_flow": null,
 522 |             "grid_area": null,
 523 |             "grid_auto_columns": null,
 524 |             "grid_auto_flow": null,
 525 |             "grid_auto_rows": null,
 526 |             "grid_column": null,
 527 |             "grid_gap": null,
 528 |             "grid_row": null,
 529 |             "grid_template_areas": null,
 530 |             "grid_template_columns": null,
 531 |             "grid_template_rows": null,
 532 |             "height": null,
 533 |             "justify_content": null,
 534 |             "justify_items": null,
 535 |             "left": null,
 536 |             "margin": null,
 537 |             "max_height": null,
 538 |             "max_width": null,
 539 |             "min_height": null,
 540 |             "min_width": null,
 541 |             "object_fit": null,
 542 |             "object_position": null,
 543 |             "order": null,
 544 |             "overflow": null,
 545 |             "overflow_x": null,
 546 |             "overflow_y": null,
 547 |             "padding": null,
 548 |             "right": null,
 549 |             "top": null,
 550 |             "visibility": null,
 551 |             "width": null
 552 |           }
 553 |         },
 554 |         "8d8b2f1d14784d4f98e26c5e05b73de0": {
 555 |           "model_module": "@jupyter-widgets/controls",
 556 |           "model_name": "DescriptionStyleModel",
 557 |           "model_module_version": "1.5.0",
 558 |           "state": {
 559 |             "_model_module": "@jupyter-widgets/controls",
 560 |             "_model_module_version": "1.5.0",
 561 |             "_model_name": "DescriptionStyleModel",
 562 |             "_view_count": null,
 563 |             "_view_module": "@jupyter-widgets/base",
 564 |             "_view_module_version": "1.2.0",
 565 |             "_view_name": "StyleView",
 566 |             "description_width": ""
 567 |           }
 568 |         },
 569 |         "3047ff5deb8546d286bb48c4e7b8d8c3": {
 570 |           "model_module": "@jupyter-widgets/base",
 571 |           "model_name": "LayoutModel",
 572 |           "model_module_version": "1.2.0",
 573 |           "state": {
 574 |             "_model_module": "@jupyter-widgets/base",
 575 |             "_model_module_version": "1.2.0",
 576 |             "_model_name": "LayoutModel",
 577 |             "_view_count": null,
 578 |             "_view_module": "@jupyter-widgets/base",
 579 |             "_view_module_version": "1.2.0",
 580 |             "_view_name": "LayoutView",
 581 |             "align_content": null,
 582 |             "align_items": null,
 583 |             "align_self": null,
 584 |             "border": null,
 585 |             "bottom": null,
 586 |             "display": null,
 587 |             "flex": null,
 588 |             "flex_flow": null,
 589 |             "grid_area": null,
 590 |             "grid_auto_columns": null,
 591 |             "grid_auto_flow": null,
 592 |             "grid_auto_rows": null,
 593 |             "grid_column": null,
 594 |             "grid_gap": null,
 595 |             "grid_row": null,
 596 |             "grid_template_areas": null,
 597 |             "grid_template_columns": null,
 598 |             "grid_template_rows": null,
 599 |             "height": null,
 600 |             "justify_content": null,
 601 |             "justify_items": null,
 602 |             "left": null,
 603 |             "margin": null,
 604 |             "max_height": null,
 605 |             "max_width": null,
 606 |             "min_height": null,
 607 |             "min_width": null,
 608 |             "object_fit": null,
 609 |             "object_position": null,
 610 |             "order": null,
 611 |             "overflow": null,
 612 |             "overflow_x": null,
 613 |             "overflow_y": null,
 614 |             "padding": null,
 615 |             "right": null,
 616 |             "top": null,
 617 |             "visibility": null,
 618 |             "width": null
 619 |           }
 620 |         },
 621 |         "c0c7a80a14c9469f934021610e66d683": {
 622 |           "model_module": "@jupyter-widgets/controls",
 623 |           "model_name": "ProgressStyleModel",
 624 |           "model_module_version": "1.5.0",
 625 |           "state": {
 626 |             "_model_module": "@jupyter-widgets/controls",
 627 |             "_model_module_version": "1.5.0",
 628 |             "_model_name": "ProgressStyleModel",
 629 |             "_view_count": null,
 630 |             "_view_module": "@jupyter-widgets/base",
 631 |             "_view_module_version": "1.2.0",
 632 |             "_view_name": "StyleView",
 633 |             "bar_color": null,
 634 |             "description_width": ""
 635 |           }
 636 |         },
 637 |         "95ef08a39791436e9cdc7bec3850c013": {
 638 |           "model_module": "@jupyter-widgets/base",
 639 |           "model_name": "LayoutModel",
 640 |           "model_module_version": "1.2.0",
 641 |           "state": {
 642 |             "_model_module": "@jupyter-widgets/base",
 643 |             "_model_module_version": "1.2.0",
 644 |             "_model_name": "LayoutModel",
 645 |             "_view_count": null,
 646 |             "_view_module": "@jupyter-widgets/base",
 647 |             "_view_module_version": "1.2.0",
 648 |             "_view_name": "LayoutView",
 649 |             "align_content": null,
 650 |             "align_items": null,
 651 |             "align_self": null,
 652 |             "border": null,
 653 |             "bottom": null,
 654 |             "display": null,
 655 |             "flex": null,
 656 |             "flex_flow": null,
 657 |             "grid_area": null,
 658 |             "grid_auto_columns": null,
 659 |             "grid_auto_flow": null,
 660 |             "grid_auto_rows": null,
 661 |             "grid_column": null,
 662 |             "grid_gap": null,
 663 |             "grid_row": null,
 664 |             "grid_template_areas": null,
 665 |             "grid_template_columns": null,
 666 |             "grid_template_rows": null,
 667 |             "height": null,
 668 |             "justify_content": null,
 669 |             "justify_items": null,
 670 |             "left": null,
 671 |             "margin": null,
 672 |             "max_height": null,
 673 |             "max_width": null,
 674 |             "min_height": null,
 675 |             "min_width": null,
 676 |             "object_fit": null,
 677 |             "object_position": null,
 678 |             "order": null,
 679 |             "overflow": null,
 680 |             "overflow_x": null,
 681 |             "overflow_y": null,
 682 |             "padding": null,
 683 |             "right": null,
 684 |             "top": null,
 685 |             "visibility": null,
 686 |             "width": null
 687 |           }
 688 |         },
 689 |         "5811bf129dca492bb7d5c2cf2ae9cdc2": {
 690 |           "model_module": "@jupyter-widgets/controls",
 691 |           "model_name": "DescriptionStyleModel",
 692 |           "model_module_version": "1.5.0",
 693 |           "state": {
 694 |             "_model_module": "@jupyter-widgets/controls",
 695 |             "_model_module_version": "1.5.0",
 696 |             "_model_name": "DescriptionStyleModel",
 697 |             "_view_count": null,
 698 |             "_view_module": "@jupyter-widgets/base",
 699 |             "_view_module_version": "1.2.0",
 700 |             "_view_name": "StyleView",
 701 |             "description_width": ""
 702 |           }
 703 |         },
 704 |         "dd8abf873df64f90849d9179efb982ea": {
 705 |           "model_module": "@jupyter-widgets/controls",
 706 |           "model_name": "HBoxModel",
 707 |           "model_module_version": "1.5.0",
 708 |           "state": {
 709 |             "_dom_classes": [],
 710 |             "_model_module": "@jupyter-widgets/controls",
 711 |             "_model_module_version": "1.5.0",
 712 |             "_model_name": "HBoxModel",
 713 |             "_view_count": null,
 714 |             "_view_module": "@jupyter-widgets/controls",
 715 |             "_view_module_version": "1.5.0",
 716 |             "_view_name": "HBoxView",
 717 |             "box_style": "",
 718 |             "children": [
 719 |               "IPY_MODEL_ee2aed55495b405ba03b36ab1787b02c",
 720 |               "IPY_MODEL_127049f326ed494cb139a84671fb02b0",
 721 |               "IPY_MODEL_03cab8fe7f9a4180940a82875e409004"
 722 |             ],
 723 |             "layout": "IPY_MODEL_749d556a61f840fdbf7c59dc7fff4f64"
 724 |           }
 725 |         },
 726 |         "ee2aed55495b405ba03b36ab1787b02c": {
 727 |           "model_module": "@jupyter-widgets/controls",
 728 |           "model_name": "HTMLModel",
 729 |           "model_module_version": "1.5.0",
 730 |           "state": {
 731 |             "_dom_classes": [],
 732 |             "_model_module": "@jupyter-widgets/controls",
 733 |             "_model_module_version": "1.5.0",
 734 |             "_model_name": "HTMLModel",
 735 |             "_view_count": null,
 736 |             "_view_module": "@jupyter-widgets/controls",
 737 |             "_view_module_version": "1.5.0",
 738 |             "_view_name": "HTMLView",
 739 |             "description": "",
 740 |             "description_tooltip": null,
 741 |             "layout": "IPY_MODEL_65f232af5f734cb5a3d6cf30f9c3d788",
 742 |             "placeholder": "​",
 743 |             "style": "IPY_MODEL_62bd164d23d94f7cb2d4ce064fd0ec85",
 744 |             "value": "Downloading (…)lve/main/config.json: 100%"
 745 |           }
 746 |         },
 747 |         "127049f326ed494cb139a84671fb02b0": {
 748 |           "model_module": "@jupyter-widgets/controls",
 749 |           "model_name": "FloatProgressModel",
 750 |           "model_module_version": "1.5.0",
 751 |           "state": {
 752 |             "_dom_classes": [],
 753 |             "_model_module": "@jupyter-widgets/controls",
 754 |             "_model_module_version": "1.5.0",
 755 |             "_model_name": "FloatProgressModel",
 756 |             "_view_count": null,
 757 |             "_view_module": "@jupyter-widgets/controls",
 758 |             "_view_module_version": "1.5.0",
 759 |             "_view_name": "ProgressView",
 760 |             "bar_style": "success",
 761 |             "description": "",
 762 |             "description_tooltip": null,
 763 |             "layout": "IPY_MODEL_4ef83848492749818b4f3bc8b3fecb0c",
 764 |             "max": 674,
 765 |             "min": 0,
 766 |             "orientation": "horizontal",
 767 |             "style": "IPY_MODEL_01b0579211894dacaf6c7088a414fe95",
 768 |             "value": 674
 769 |           }
 770 |         },
 771 |         "03cab8fe7f9a4180940a82875e409004": {
 772 |           "model_module": "@jupyter-widgets/controls",
 773 |           "model_name": "HTMLModel",
 774 |           "model_module_version": "1.5.0",
 775 |           "state": {
 776 |             "_dom_classes": [],
 777 |             "_model_module": "@jupyter-widgets/controls",
 778 |             "_model_module_version": "1.5.0",
 779 |             "_model_name": "HTMLModel",
 780 |             "_view_count": null,
 781 |             "_view_module": "@jupyter-widgets/controls",
 782 |             "_view_module_version": "1.5.0",
 783 |             "_view_name": "HTMLView",
 784 |             "description": "",
 785 |             "description_tooltip": null,
 786 |             "layout": "IPY_MODEL_b0e9917a6d9d40f9be2bcd03e595a544",
 787 |             "placeholder": "​",
 788 |             "style": "IPY_MODEL_e5e0108c21824639914ce204b29d49bb",
 789 |             "value": " 674/674 [00:00&lt;00:00, 14.4kB/s]"
 790 |           }
 791 |         },
 792 |         "749d556a61f840fdbf7c59dc7fff4f64": {
 793 |           "model_module": "@jupyter-widgets/base",
 794 |           "model_name": "LayoutModel",
 795 |           "model_module_version": "1.2.0",
 796 |           "state": {
 797 |             "_model_module": "@jupyter-widgets/base",
 798 |             "_model_module_version": "1.2.0",
 799 |             "_model_name": "LayoutModel",
 800 |             "_view_count": null,
 801 |             "_view_module": "@jupyter-widgets/base",
 802 |             "_view_module_version": "1.2.0",
 803 |             "_view_name": "LayoutView",
 804 |             "align_content": null,
 805 |             "align_items": null,
 806 |             "align_self": null,
 807 |             "border": null,
 808 |             "bottom": null,
 809 |             "display": null,
 810 |             "flex": null,
 811 |             "flex_flow": null,
 812 |             "grid_area": null,
 813 |             "grid_auto_columns": null,
 814 |             "grid_auto_flow": null,
 815 |             "grid_auto_rows": null,
 816 |             "grid_column": null,
 817 |             "grid_gap": null,
 818 |             "grid_row": null,
 819 |             "grid_template_areas": null,
 820 |             "grid_template_columns": null,
 821 |             "grid_template_rows": null,
 822 |             "height": null,
 823 |             "justify_content": null,
 824 |             "justify_items": null,
 825 |             "left": null,
 826 |             "margin": null,
 827 |             "max_height": null,
 828 |             "max_width": null,
 829 |             "min_height": null,
 830 |             "min_width": null,
 831 |             "object_fit": null,
 832 |             "object_position": null,
 833 |             "order": null,
 834 |             "overflow": null,
 835 |             "overflow_x": null,
 836 |             "overflow_y": null,
 837 |             "padding": null,
 838 |             "right": null,
 839 |             "top": null,
 840 |             "visibility": null,
 841 |             "width": null
 842 |           }
 843 |         },
 844 |         "65f232af5f734cb5a3d6cf30f9c3d788": {
 845 |           "model_module": "@jupyter-widgets/base",
 846 |           "model_name": "LayoutModel",
 847 |           "model_module_version": "1.2.0",
 848 |           "state": {
 849 |             "_model_module": "@jupyter-widgets/base",
 850 |             "_model_module_version": "1.2.0",
 851 |             "_model_name": "LayoutModel",
 852 |             "_view_count": null,
 853 |             "_view_module": "@jupyter-widgets/base",
 854 |             "_view_module_version": "1.2.0",
 855 |             "_view_name": "LayoutView",
 856 |             "align_content": null,
 857 |             "align_items": null,
 858 |             "align_self": null,
 859 |             "border": null,
 860 |             "bottom": null,
 861 |             "display": null,
 862 |             "flex": null,
 863 |             "flex_flow": null,
 864 |             "grid_area": null,
 865 |             "grid_auto_columns": null,
 866 |             "grid_auto_flow": null,
 867 |             "grid_auto_rows": null,
 868 |             "grid_column": null,
 869 |             "grid_gap": null,
 870 |             "grid_row": null,
 871 |             "grid_template_areas": null,
 872 |             "grid_template_columns": null,
 873 |             "grid_template_rows": null,
 874 |             "height": null,
 875 |             "justify_content": null,
 876 |             "justify_items": null,
 877 |             "left": null,
 878 |             "margin": null,
 879 |             "max_height": null,
 880 |             "max_width": null,
 881 |             "min_height": null,
 882 |             "min_width": null,
 883 |             "object_fit": null,
 884 |             "object_position": null,
 885 |             "order": null,
 886 |             "overflow": null,
 887 |             "overflow_x": null,
 888 |             "overflow_y": null,
 889 |             "padding": null,
 890 |             "right": null,
 891 |             "top": null,
 892 |             "visibility": null,
 893 |             "width": null
 894 |           }
 895 |         },
 896 |         "62bd164d23d94f7cb2d4ce064fd0ec85": {
 897 |           "model_module": "@jupyter-widgets/controls",
 898 |           "model_name": "DescriptionStyleModel",
 899 |           "model_module_version": "1.5.0",
 900 |           "state": {
 901 |             "_model_module": "@jupyter-widgets/controls",
 902 |             "_model_module_version": "1.5.0",
 903 |             "_model_name": "DescriptionStyleModel",
 904 |             "_view_count": null,
 905 |             "_view_module": "@jupyter-widgets/base",
 906 |             "_view_module_version": "1.2.0",
 907 |             "_view_name": "StyleView",
 908 |             "description_width": ""
 909 |           }
 910 |         },
 911 |         "4ef83848492749818b4f3bc8b3fecb0c": {
 912 |           "model_module": "@jupyter-widgets/base",
 913 |           "model_name": "LayoutModel",
 914 |           "model_module_version": "1.2.0",
 915 |           "state": {
 916 |             "_model_module": "@jupyter-widgets/base",
 917 |             "_model_module_version": "1.2.0",
 918 |             "_model_name": "LayoutModel",
 919 |             "_view_count": null,
 920 |             "_view_module": "@jupyter-widgets/base",
 921 |             "_view_module_version": "1.2.0",
 922 |             "_view_name": "LayoutView",
 923 |             "align_content": null,
 924 |             "align_items": null,
 925 |             "align_self": null,
 926 |             "border": null,
 927 |             "bottom": null,
 928 |             "display": null,
 929 |             "flex": null,
 930 |             "flex_flow": null,
 931 |             "grid_area": null,
 932 |             "grid_auto_columns": null,
 933 |             "grid_auto_flow": null,
 934 |             "grid_auto_rows": null,
 935 |             "grid_column": null,
 936 |             "grid_gap": null,
 937 |             "grid_row": null,
 938 |             "grid_template_areas": null,
 939 |             "grid_template_columns": null,
 940 |             "grid_template_rows": null,
 941 |             "height": null,
 942 |             "justify_content": null,
 943 |             "justify_items": null,
 944 |             "left": null,
 945 |             "margin": null,
 946 |             "max_height": null,
 947 |             "max_width": null,
 948 |             "min_height": null,
 949 |             "min_width": null,
 950 |             "object_fit": null,
 951 |             "object_position": null,
 952 |             "order": null,
 953 |             "overflow": null,
 954 |             "overflow_x": null,
 955 |             "overflow_y": null,
 956 |             "padding": null,
 957 |             "right": null,
 958 |             "top": null,
 959 |             "visibility": null,
 960 |             "width": null
 961 |           }
 962 |         },
 963 |         "01b0579211894dacaf6c7088a414fe95": {
 964 |           "model_module": "@jupyter-widgets/controls",
 965 |           "model_name": "ProgressStyleModel",
 966 |           "model_module_version": "1.5.0",
 967 |           "state": {
 968 |             "_model_module": "@jupyter-widgets/controls",
 969 |             "_model_module_version": "1.5.0",
 970 |             "_model_name": "ProgressStyleModel",
 971 |             "_view_count": null,
 972 |             "_view_module": "@jupyter-widgets/base",
 973 |             "_view_module_version": "1.2.0",
 974 |             "_view_name": "StyleView",
 975 |             "bar_color": null,
 976 |             "description_width": ""
 977 |           }
 978 |         },
 979 |         "b0e9917a6d9d40f9be2bcd03e595a544": {
 980 |           "model_module": "@jupyter-widgets/base",
 981 |           "model_name": "LayoutModel",
 982 |           "model_module_version": "1.2.0",
 983 |           "state": {
 984 |             "_model_module": "@jupyter-widgets/base",
 985 |             "_model_module_version": "1.2.0",
 986 |             "_model_name": "LayoutModel",
 987 |             "_view_count": null,
 988 |             "_view_module": "@jupyter-widgets/base",
 989 |             "_view_module_version": "1.2.0",
 990 |             "_view_name": "LayoutView",
 991 |             "align_content": null,
 992 |             "align_items": null,
 993 |             "align_self": null,
 994 |             "border": null,
 995 |             "bottom": null,
 996 |             "display": null,
 997 |             "flex": null,
 998 |             "flex_flow": null,
 999 |             "grid_area": null,
1000 |             "grid_auto_columns": null,
1001 |             "grid_auto_flow": null,
1002 |             "grid_auto_rows": null,
1003 |             "grid_column": null,
1004 |             "grid_gap": null,
1005 |             "grid_row": null,
1006 |             "grid_template_areas": null,
1007 |             "grid_template_columns": null,
1008 |             "grid_template_rows": null,
1009 |             "height": null,
1010 |             "justify_content": null,
1011 |             "justify_items": null,
1012 |             "left": null,
1013 |             "margin": null,
1014 |             "max_height": null,
1015 |             "max_width": null,
1016 |             "min_height": null,
1017 |             "min_width": null,
1018 |             "object_fit": null,
1019 |             "object_position": null,
1020 |             "order": null,
1021 |             "overflow": null,
1022 |             "overflow_x": null,
1023 |             "overflow_y": null,
1024 |             "padding": null,
1025 |             "right": null,
1026 |             "top": null,
1027 |             "visibility": null,
1028 |             "width": null
1029 |           }
1030 |         },
1031 |         "e5e0108c21824639914ce204b29d49bb": {
1032 |           "model_module": "@jupyter-widgets/controls",
1033 |           "model_name": "DescriptionStyleModel",
1034 |           "model_module_version": "1.5.0",
1035 |           "state": {
1036 |             "_model_module": "@jupyter-widgets/controls",
1037 |             "_model_module_version": "1.5.0",
1038 |             "_model_name": "DescriptionStyleModel",
1039 |             "_view_count": null,
1040 |             "_view_module": "@jupyter-widgets/base",
1041 |             "_view_module_version": "1.2.0",
1042 |             "_view_name": "StyleView",
1043 |             "description_width": ""
1044 |           }
1045 |         },
1046 |         "68de9de758d14d57b361e4832a1bd442": {
1047 |           "model_module": "@jupyter-widgets/controls",
1048 |           "model_name": "HBoxModel",
1049 |           "model_module_version": "1.5.0",
1050 |           "state": {
1051 |             "_dom_classes": [],
1052 |             "_model_module": "@jupyter-widgets/controls",
1053 |             "_model_module_version": "1.5.0",
1054 |             "_model_name": "HBoxModel",
1055 |             "_view_count": null,
1056 |             "_view_module": "@jupyter-widgets/controls",
1057 |             "_view_module_version": "1.5.0",
1058 |             "_view_name": "HBoxView",
1059 |             "box_style": "",
1060 |             "children": [
1061 |               "IPY_MODEL_9d98811282cf4527ad1c8dc600b14344",
1062 |               "IPY_MODEL_356e23046cd04ea099e31b456dd43ea6",
1063 |               "IPY_MODEL_1c41f9b9583b4bb693c43d14c3ac4d09"
1064 |             ],
1065 |             "layout": "IPY_MODEL_099fcbe44b5a4d82ada714cc13723fbd"
1066 |           }
1067 |         },
1068 |         "9d98811282cf4527ad1c8dc600b14344": {
1069 |           "model_module": "@jupyter-widgets/controls",
1070 |           "model_name": "HTMLModel",
1071 |           "model_module_version": "1.5.0",
1072 |           "state": {
1073 |             "_dom_classes": [],
1074 |             "_model_module": "@jupyter-widgets/controls",
1075 |             "_model_module_version": "1.5.0",
1076 |             "_model_name": "HTMLModel",
1077 |             "_view_count": null,
1078 |             "_view_module": "@jupyter-widgets/controls",
1079 |             "_view_module_version": "1.5.0",
1080 |             "_view_name": "HTMLView",
1081 |             "description": "",
1082 |             "description_tooltip": null,
1083 |             "layout": "IPY_MODEL_617845c19c1042cba607f39f5e3aab74",
1084 |             "placeholder": "​",
1085 |             "style": "IPY_MODEL_daa07b0624924c12853ca841d416345f",
1086 |             "value": "Downloading pytorch_model.bin: 100%"
1087 |           }
1088 |         },
1089 |         "356e23046cd04ea099e31b456dd43ea6": {
1090 |           "model_module": "@jupyter-widgets/controls",
1091 |           "model_name": "FloatProgressModel",
1092 |           "model_module_version": "1.5.0",
1093 |           "state": {
1094 |             "_dom_classes": [],
1095 |             "_model_module": "@jupyter-widgets/controls",
1096 |             "_model_module_version": "1.5.0",
1097 |             "_model_name": "FloatProgressModel",
1098 |             "_view_count": null,
1099 |             "_view_module": "@jupyter-widgets/controls",
1100 |             "_view_module_version": "1.5.0",
1101 |             "_view_name": "ProgressView",
1102 |             "bar_style": "success",
1103 |             "description": "",
1104 |             "description_tooltip": null,
1105 |             "layout": "IPY_MODEL_be4b89a4a133484fbb3f29d1652e085d",
1106 |             "max": 1730074771,
1107 |             "min": 0,
1108 |             "orientation": "horizontal",
1109 |             "style": "IPY_MODEL_bea5187033444bffad13a7d2fa2d193f",
1110 |             "value": 1730074771
1111 |           }
1112 |         },
1113 |         "1c41f9b9583b4bb693c43d14c3ac4d09": {
1114 |           "model_module": "@jupyter-widgets/controls",
1115 |           "model_name": "HTMLModel",
1116 |           "model_module_version": "1.5.0",
1117 |           "state": {
1118 |             "_dom_classes": [],
1119 |             "_model_module": "@jupyter-widgets/controls",
1120 |             "_model_module_version": "1.5.0",
1121 |             "_model_name": "HTMLModel",
1122 |             "_view_count": null,
1123 |             "_view_module": "@jupyter-widgets/controls",
1124 |             "_view_module_version": "1.5.0",
1125 |             "_view_name": "HTMLView",
1126 |             "description": "",
1127 |             "description_tooltip": null,
1128 |             "layout": "IPY_MODEL_34e0247ed20f4c4bba133d972867b4d8",
1129 |             "placeholder": "​",
1130 |             "style": "IPY_MODEL_a36d54a55ea84020b10de4c3a9a6c579",
1131 |             "value": " 1.73G/1.73G [00:42&lt;00:00, 50.7MB/s]"
1132 |           }
1133 |         },
1134 |         "099fcbe44b5a4d82ada714cc13723fbd": {
1135 |           "model_module": "@jupyter-widgets/base",
1136 |           "model_name": "LayoutModel",
1137 |           "model_module_version": "1.2.0",
1138 |           "state": {
1139 |             "_model_module": "@jupyter-widgets/base",
1140 |             "_model_module_version": "1.2.0",
1141 |             "_model_name": "LayoutModel",
1142 |             "_view_count": null,
1143 |             "_view_module": "@jupyter-widgets/base",
1144 |             "_view_module_version": "1.2.0",
1145 |             "_view_name": "LayoutView",
1146 |             "align_content": null,
1147 |             "align_items": null,
1148 |             "align_self": null,
1149 |             "border": null,
1150 |             "bottom": null,
1151 |             "display": null,
1152 |             "flex": null,
1153 |             "flex_flow": null,
1154 |             "grid_area": null,
1155 |             "grid_auto_columns": null,
1156 |             "grid_auto_flow": null,
1157 |             "grid_auto_rows": null,
1158 |             "grid_column": null,
1159 |             "grid_gap": null,
1160 |             "grid_row": null,
1161 |             "grid_template_areas": null,
1162 |             "grid_template_columns": null,
1163 |             "grid_template_rows": null,
1164 |             "height": null,
1165 |             "justify_content": null,
1166 |             "justify_items": null,
1167 |             "left": null,
1168 |             "margin": null,
1169 |             "max_height": null,
1170 |             "max_width": null,
1171 |             "min_height": null,
1172 |             "min_width": null,
1173 |             "object_fit": null,
1174 |             "object_position": null,
1175 |             "order": null,
1176 |             "overflow": null,
1177 |             "overflow_x": null,
1178 |             "overflow_y": null,
1179 |             "padding": null,
1180 |             "right": null,
1181 |             "top": null,
1182 |             "visibility": null,
1183 |             "width": null
1184 |           }
1185 |         },
1186 |         "617845c19c1042cba607f39f5e3aab74": {
1187 |           "model_module": "@jupyter-widgets/base",
1188 |           "model_name": "LayoutModel",
1189 |           "model_module_version": "1.2.0",
1190 |           "state": {
1191 |             "_model_module": "@jupyter-widgets/base",
1192 |             "_model_module_version": "1.2.0",
1193 |             "_model_name": "LayoutModel",
1194 |             "_view_count": null,
1195 |             "_view_module": "@jupyter-widgets/base",
1196 |             "_view_module_version": "1.2.0",
1197 |             "_view_name": "LayoutView",
1198 |             "align_content": null,
1199 |             "align_items": null,
1200 |             "align_self": null,
1201 |             "border": null,
1202 |             "bottom": null,
1203 |             "display": null,
1204 |             "flex": null,
1205 |             "flex_flow": null,
1206 |             "grid_area": null,
1207 |             "grid_auto_columns": null,
1208 |             "grid_auto_flow": null,
1209 |             "grid_auto_rows": null,
1210 |             "grid_column": null,
1211 |             "grid_gap": null,
1212 |             "grid_row": null,
1213 |             "grid_template_areas": null,
1214 |             "grid_template_columns": null,
1215 |             "grid_template_rows": null,
1216 |             "height": null,
1217 |             "justify_content": null,
1218 |             "justify_items": null,
1219 |             "left": null,
1220 |             "margin": null,
1221 |             "max_height": null,
1222 |             "max_width": null,
1223 |             "min_height": null,
1224 |             "min_width": null,
1225 |             "object_fit": null,
1226 |             "object_position": null,
1227 |             "order": null,
1228 |             "overflow": null,
1229 |             "overflow_x": null,
1230 |             "overflow_y": null,
1231 |             "padding": null,
1232 |             "right": null,
1233 |             "top": null,
1234 |             "visibility": null,
1235 |             "width": null
1236 |           }
1237 |         },
1238 |         "daa07b0624924c12853ca841d416345f": {
1239 |           "model_module": "@jupyter-widgets/controls",
1240 |           "model_name": "DescriptionStyleModel",
1241 |           "model_module_version": "1.5.0",
1242 |           "state": {
1243 |             "_model_module": "@jupyter-widgets/controls",
1244 |             "_model_module_version": "1.5.0",
1245 |             "_model_name": "DescriptionStyleModel",
1246 |             "_view_count": null,
1247 |             "_view_module": "@jupyter-widgets/base",
1248 |             "_view_module_version": "1.2.0",
1249 |             "_view_name": "StyleView",
1250 |             "description_width": ""
1251 |           }
1252 |         },
1253 |         "be4b89a4a133484fbb3f29d1652e085d": {
1254 |           "model_module": "@jupyter-widgets/base",
1255 |           "model_name": "LayoutModel",
1256 |           "model_module_version": "1.2.0",
1257 |           "state": {
1258 |             "_model_module": "@jupyter-widgets/base",
1259 |             "_model_module_version": "1.2.0",
1260 |             "_model_name": "LayoutModel",
1261 |             "_view_count": null,
1262 |             "_view_module": "@jupyter-widgets/base",
1263 |             "_view_module_version": "1.2.0",
1264 |             "_view_name": "LayoutView",
1265 |             "align_content": null,
1266 |             "align_items": null,
1267 |             "align_self": null,
1268 |             "border": null,
1269 |             "bottom": null,
1270 |             "display": null,
1271 |             "flex": null,
1272 |             "flex_flow": null,
1273 |             "grid_area": null,
1274 |             "grid_auto_columns": null,
1275 |             "grid_auto_flow": null,
1276 |             "grid_auto_rows": null,
1277 |             "grid_column": null,
1278 |             "grid_gap": null,
1279 |             "grid_row": null,
1280 |             "grid_template_areas": null,
1281 |             "grid_template_columns": null,
1282 |             "grid_template_rows": null,
1283 |             "height": null,
1284 |             "justify_content": null,
1285 |             "justify_items": null,
1286 |             "left": null,
1287 |             "margin": null,
1288 |             "max_height": null,
1289 |             "max_width": null,
1290 |             "min_height": null,
1291 |             "min_width": null,
1292 |             "object_fit": null,
1293 |             "object_position": null,
1294 |             "order": null,
1295 |             "overflow": null,
1296 |             "overflow_x": null,
1297 |             "overflow_y": null,
1298 |             "padding": null,
1299 |             "right": null,
1300 |             "top": null,
1301 |             "visibility": null,
1302 |             "width": null
1303 |           }
1304 |         },
1305 |         "bea5187033444bffad13a7d2fa2d193f": {
1306 |           "model_module": "@jupyter-widgets/controls",
1307 |           "model_name": "ProgressStyleModel",
1308 |           "model_module_version": "1.5.0",
1309 |           "state": {
1310 |             "_model_module": "@jupyter-widgets/controls",
1311 |             "_model_module_version": "1.5.0",
1312 |             "_model_name": "ProgressStyleModel",
1313 |             "_view_count": null,
1314 |             "_view_module": "@jupyter-widgets/base",
1315 |             "_view_module_version": "1.2.0",
1316 |             "_view_name": "StyleView",
1317 |             "bar_color": null,
1318 |             "description_width": ""
1319 |           }
1320 |         },
1321 |         "34e0247ed20f4c4bba133d972867b4d8": {
1322 |           "model_module": "@jupyter-widgets/base",
1323 |           "model_name": "LayoutModel",
1324 |           "model_module_version": "1.2.0",
1325 |           "state": {
1326 |             "_model_module": "@jupyter-widgets/base",
1327 |             "_model_module_version": "1.2.0",
1328 |             "_model_name": "LayoutModel",
1329 |             "_view_count": null,
1330 |             "_view_module": "@jupyter-widgets/base",
1331 |             "_view_module_version": "1.2.0",
1332 |             "_view_name": "LayoutView",
1333 |             "align_content": null,
1334 |             "align_items": null,
1335 |             "align_self": null,
1336 |             "border": null,
1337 |             "bottom": null,
1338 |             "display": null,
1339 |             "flex": null,
1340 |             "flex_flow": null,
1341 |             "grid_area": null,
1342 |             "grid_auto_columns": null,
1343 |             "grid_auto_flow": null,
1344 |             "grid_auto_rows": null,
1345 |             "grid_column": null,
1346 |             "grid_gap": null,
1347 |             "grid_row": null,
1348 |             "grid_template_areas": null,
1349 |             "grid_template_columns": null,
1350 |             "grid_template_rows": null,
1351 |             "height": null,
1352 |             "justify_content": null,
1353 |             "justify_items": null,
1354 |             "left": null,
1355 |             "margin": null,
1356 |             "max_height": null,
1357 |             "max_width": null,
1358 |             "min_height": null,
1359 |             "min_width": null,
1360 |             "object_fit": null,
1361 |             "object_position": null,
1362 |             "order": null,
1363 |             "overflow": null,
1364 |             "overflow_x": null,
1365 |             "overflow_y": null,
1366 |             "padding": null,
1367 |             "right": null,
1368 |             "top": null,
1369 |             "visibility": null,
1370 |             "width": null
1371 |           }
1372 |         },
1373 |         "a36d54a55ea84020b10de4c3a9a6c579": {
1374 |           "model_module": "@jupyter-widgets/controls",
1375 |           "model_name": "DescriptionStyleModel",
1376 |           "model_module_version": "1.5.0",
1377 |           "state": {
1378 |             "_model_module": "@jupyter-widgets/controls",
1379 |             "_model_module_version": "1.5.0",
1380 |             "_model_name": "DescriptionStyleModel",
1381 |             "_view_count": null,
1382 |             "_view_module": "@jupyter-widgets/base",
1383 |             "_view_module_version": "1.2.0",
1384 |             "_view_name": "StyleView",
1385 |             "description_width": ""
1386 |           }
1387 |         }
1388 |       }
1389 |     }
1390 |   },
1391 |   "cells": [
1392 |     {
1393 |       "cell_type": "code",
1394 |       "execution_count": null,
1395 |       "metadata": {
1396 |         "id": "0FFDx9A6Ol7P",
1397 |         "colab": {
1398 |           "base_uri": "https://localhost:8080/"
1399 |         },
1400 |         "outputId": "1c5ef378-8121-4cfb-c5d7-e50ac9d70d18"
1401 |       },
1402 |       "outputs": [
1403 |         {
1404 |           "output_type": "stream",
1405 |           "name": "stdout",
1406 |           "text": [
1407 |             "Installing collected packages: transformers, accelerate\n",
1408 |             "Successfully installed accelerate-0.19.0 transformers-4.29.1\n"
1409 |           ]
1410 |         }
1411 |       ],
1412 |       "source": [
1413 |         "!pip uninstall -y transformers accelerate\n",
1414 |         "!pip install transformers accelerate"
1415 |       ]
1416 |     },
1417 |     {
1418 |       "cell_type": "code",
1419 |       "source": [
1420 |         "import re\n",
1421 |         "import torch\n",
1422 |         "\n",
1423 |         "from transformers import GPT2LMHeadModel, GPT2Tokenizer\n",
1424 |         "from transformers import TextDataset, DataCollatorForLanguageModeling\n",
1425 |         "from transformers import Trainer, TrainingArguments"
1426 |       ],
1427 |       "metadata": {
1428 |         "id": "TzfZE5NDOp5M"
1429 |       },
1430 |       "execution_count": null,
1431 |       "outputs": []
1432 |     },
1433 |     {
1434 |       "cell_type": "code",
1435 |       "source": [
1436 |         "def split_poems(input_text):\n",
1437 |         "  result = input_text.split('*****')\n",
1438 |         "  return result"
1439 |       ],
1440 |       "metadata": {
1441 |         "id": "3Av_iIRJZpRH"
1442 |       },
1443 |       "execution_count": null,
1444 |       "outputs": []
1445 |     },
1446 |     {
1447 |       "cell_type": "code",
1448 |       "source": [
1449 |         "with open('esenin.txt', 'r') as f:\n",
1450 |         "  text = f.read()"
1451 |       ],
1452 |       "metadata": {
1453 |         "id": "097NJBRSUhx3"
1454 |       },
1455 |       "execution_count": null,
1456 |       "outputs": []
1457 |     },
1458 |     {
1459 |       "cell_type": "code",
1460 |       "source": [
1461 |         "poems = split_poems(text)\n",
1462 |         "poems[0]"
1463 |       ],
1464 |       "metadata": {
1465 |         "colab": {
1466 |           "base_uri": "https://localhost:8080/",
1467 |           "height": 53
1468 |         },
1469 |         "id": "uldyTuMJU-fd",
1470 |         "outputId": "1717ef15-a9cc-4bd2-fc74-167e2bd8d3ed"
1471 |       },
1472 |       "execution_count": null,
1473 |       "outputs": [
1474 |         {
1475 |           "output_type": "execute_result",
1476 |           "data": {
1477 |             "text/plain": [
1478 |               "'Вот уж вечер. Роса\\nБлестит на крапиве.\\nЯ стою у дороги,\\nПрислонившись к иве.\\n\\nОт луны свет большой\\nПрямо на нашу крышу.\\nГде-то песнь соловья\\nВдалеке я слышу.\\n\\nХорошо и тепло,\\nКак зимой у печки.\\nИ березы стоят,\\nКак большие свечки.\\n\\nИ вдали за рекой,\\nВидно, за опушкой,\\nСонный сторож стучит\\nМертвой колотушкой.'"
1479 |             ],
1480 |             "application/vnd.google.colaboratory.intrinsic+json": {
1481 |               "type": "string"
1482 |             }
1483 |           },
1484 |           "metadata": {},
1485 |           "execution_count": 7
1486 |         }
1487 |       ]
1488 |     },
1489 |     {
1490 |       "cell_type": "code",
1491 |       "source": [
1492 |         "train_path = 'train_dataset.txt'\n",
1493 |         "with open(train_path, \"w\") as f:\n",
1494 |         "    f.write('\\n'.join(poems) + '\\n')"
1495 |       ],
1496 |       "metadata": {
1497 |         "id": "lwg2qEFBXbob"
1498 |       },
1499 |       "execution_count": null,
1500 |       "outputs": []
1501 |     },
1502 |     {
1503 |       "cell_type": "code",
1504 |       "source": [
1505 |         "model_name = \"sberbank-ai/rugpt3medium_based_on_gpt2\"\n",
1506 |         "tokenizer = GPT2Tokenizer.from_pretrained(model_name)\n",
1507 |         "model = GPT2LMHeadModel.from_pretrained(model_name).to('cuda:0')"
1508 |       ],
1509 |       "metadata": {
1510 |         "colab": {
1511 |           "base_uri": "https://localhost:8080/",
1512 |           "height": 145,
1513 |           "referenced_widgets": [
1514 |             "c066b5c1bc90494daf341b7dd55b3af1",
1515 |             "259549e7870c4635a9d89cd61a102a4c",
1516 |             "b565ee1cadcd460fadbfc08672f561a1",
1517 |             "ca4e30cc5fdb45198a767e7d5cb7d343",
1518 |             "2e8dc9683c1540efbbe29529c4d85662",
1519 |             "6a79248883774a399a323d4e630c4b77",
1520 |             "db6a231b14e443bfa1ac0e6a590b4b11",
1521 |             "3cb811638bb644babb6d24bed0aede87",
1522 |             "a07a27fddc3e47afb8f565281ce86074",
1523 |             "be610141c1e14fff891bbcce0a22df3f",
1524 |             "72d8408a7fc14213abf5c9cc9b620f28",
1525 |             "f5fbf569cc7f438aa7d018af21db3fa3",
1526 |             "59037cf68d6543bf8dbecbce0114ce37",
1527 |             "d942d39cdefd410599ef0ee3c4cdbf15",
1528 |             "4668dbead9024218967e8dc318941818",
1529 |             "22a183f85aff49ec9ec7753cf8c0f3c7",
1530 |             "387a166402b340b3b88941dc573199ca",
1531 |             "8d8b2f1d14784d4f98e26c5e05b73de0",
1532 |             "3047ff5deb8546d286bb48c4e7b8d8c3",
1533 |             "c0c7a80a14c9469f934021610e66d683",
1534 |             "95ef08a39791436e9cdc7bec3850c013",
1535 |             "5811bf129dca492bb7d5c2cf2ae9cdc2",
1536 |             "dd8abf873df64f90849d9179efb982ea",
1537 |             "ee2aed55495b405ba03b36ab1787b02c",
1538 |             "127049f326ed494cb139a84671fb02b0",
1539 |             "03cab8fe7f9a4180940a82875e409004",
1540 |             "749d556a61f840fdbf7c59dc7fff4f64",
1541 |             "65f232af5f734cb5a3d6cf30f9c3d788",
1542 |             "62bd164d23d94f7cb2d4ce064fd0ec85",
1543 |             "4ef83848492749818b4f3bc8b3fecb0c",
1544 |             "01b0579211894dacaf6c7088a414fe95",
1545 |             "b0e9917a6d9d40f9be2bcd03e595a544",
1546 |             "e5e0108c21824639914ce204b29d49bb",
1547 |             "68de9de758d14d57b361e4832a1bd442",
1548 |             "9d98811282cf4527ad1c8dc600b14344",
1549 |             "356e23046cd04ea099e31b456dd43ea6",
1550 |             "1c41f9b9583b4bb693c43d14c3ac4d09",
1551 |             "099fcbe44b5a4d82ada714cc13723fbd",
1552 |             "617845c19c1042cba607f39f5e3aab74",
1553 |             "daa07b0624924c12853ca841d416345f",
1554 |             "be4b89a4a133484fbb3f29d1652e085d",
1555 |             "bea5187033444bffad13a7d2fa2d193f",
1556 |             "34e0247ed20f4c4bba133d972867b4d8",
1557 |             "a36d54a55ea84020b10de4c3a9a6c579"
1558 |           ]
1559 |         },
1560 |         "id": "u9Hu8WLQOp9N",
1561 |         "outputId": "f8ca75a6-93cb-4b02-c589-5d0184ebb591"
1562 |       },
1563 |       "execution_count": null,
1564 |       "outputs": [
1565 |         {
1566 |           "output_type": "display_data",
1567 |           "data": {
1568 |             "text/plain": [
1569 |               "Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.61M [00:00<?, ?B/s]"
1570 |             ],
1571 |             "application/vnd.jupyter.widget-view+json": {
1572 |               "version_major": 2,
1573 |               "version_minor": 0,
1574 |               "model_id": "c066b5c1bc90494daf341b7dd55b3af1"
1575 |             }
1576 |           },
1577 |           "metadata": {}
1578 |         },
1579 |         {
1580 |           "output_type": "display_data",
1581 |           "data": {
1582 |             "text/plain": [
1583 |               "Downloading (…)olve/main/merges.txt:   0%|          | 0.00/1.27M [00:00<?, ?B/s]"
1584 |             ],
1585 |             "application/vnd.jupyter.widget-view+json": {
1586 |               "version_major": 2,
1587 |               "version_minor": 0,
1588 |               "model_id": "f5fbf569cc7f438aa7d018af21db3fa3"
1589 |             }
1590 |           },
1591 |           "metadata": {}
1592 |         },
1593 |         {
1594 |           "output_type": "display_data",
1595 |           "data": {
1596 |             "text/plain": [
1597 |               "Downloading (…)lve/main/config.json:   0%|          | 0.00/674 [00:00<?, ?B/s]"
1598 |             ],
1599 |             "application/vnd.jupyter.widget-view+json": {
1600 |               "version_major": 2,
1601 |               "version_minor": 0,
1602 |               "model_id": "dd8abf873df64f90849d9179efb982ea"
1603 |             }
1604 |           },
1605 |           "metadata": {}
1606 |         },
1607 |         {
1608 |           "output_type": "display_data",
1609 |           "data": {
1610 |             "text/plain": [
1611 |               "Downloading pytorch_model.bin:   0%|          | 0.00/1.73G [00:00<?, ?B/s]"
1612 |             ],
1613 |             "application/vnd.jupyter.widget-view+json": {
1614 |               "version_major": 2,
1615 |               "version_minor": 0,
1616 |               "model_id": "68de9de758d14d57b361e4832a1bd442"
1617 |             }
1618 |           },
1619 |           "metadata": {}
1620 |         }
1621 |       ]
1622 |     },
1623 |     {
1624 |       "cell_type": "code",
1625 |       "source": [
1626 |         "def generate(input_text):\n",
1627 |         "  input_ids = tokenizer.encode(input_text, return_tensors=\"pt\").to('cuda:0')\n",
1628 |         "\n",
1629 |         "  model.eval()\n",
1630 |         "  with torch.no_grad():\n",
1631 |         "      out = model.generate(\n",
1632 |         "          input_ids,\n",
1633 |         "          do_sample=True,\n",
1634 |         "          num_beams=3,\n",
1635 |         "          temperature=1.5,\n",
1636 |         "          top_p=2.2,\n",
1637 |         "          max_length=100,\n",
1638 |         "          repetition_penalty=1.2,\n",
1639 |         "      )\n",
1640 |         "  generated_text = list(map(tokenizer.decode, out))[0]\n",
1641 |         "  print(generated_text)"
1642 |       ],
1643 |       "metadata": {
1644 |         "id": "u1Z6lQ6UYWFC"
1645 |       },
1646 |       "execution_count": null,
1647 |       "outputs": []
1648 |     },
1649 |     {
1650 |       "cell_type": "code",
1651 |       "source": [
1652 |         "train_dataset = TextDataset(tokenizer=tokenizer, file_path=train_path, block_size=32)\n",
1653 |         "data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)"
1654 |       ],
1655 |       "metadata": {
1656 |         "colab": {
1657 |           "base_uri": "https://localhost:8080/"
1658 |         },
1659 |         "id": "KY-mVk59OqE5",
1660 |         "outputId": "fad1aeca-b2d7-4e6a-827f-e975990b0851"
1661 |       },
1662 |       "execution_count": null,
1663 |       "outputs": [
1664 |         {
1665 |           "output_type": "stream",
1666 |           "name": "stderr",
1667 |           "text": [
1668 |             "/usr/local/lib/python3.10/dist-packages/transformers/data/datasets/language_modeling.py:53: FutureWarning: This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/language-modeling/run_mlm.py\n",
1669 |             "  warnings.warn(\n"
1670 |           ]
1671 |         }
1672 |       ]
1673 |     },
1674 |     {
1675 |       "cell_type": "code",
1676 |       "source": [
1677 |         "generate(\"Кто я? Что я? Только лишь мечтатель,\")"
1678 |       ],
1679 |       "metadata": {
1680 |         "colab": {
1681 |           "base_uri": "https://localhost:8080/"
1682 |         },
1683 |         "id": "oIo1KtVjzsaZ",
1684 |         "outputId": "c4ab1de0-c945-472e-f712-51a9fdc2bd0e"
1685 |       },
1686 |       "execution_count": null,
1687 |       "outputs": [
1688 |         {
1689 |           "output_type": "stream",
1690 |           "name": "stderr",
1691 |           "text": [
1692 |             "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
1693 |             "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
1694 |           ]
1695 |         },
1696 |         {
1697 |           "output_type": "stream",
1698 |           "name": "stdout",
1699 |           "text": [
1700 |             "Кто я? Что я? Только лишь мечтатель, но в душе романтик… — бормотал он вслух.\n",
1701 |             "\n",
1702 |             "В дверь постучали.\n",
1703 |             "\n",
1704 |             "— Войдите, — позвал он.\n",
1705 |             "\n",
1706 |             "— Что вам угодно?\n",
1707 |             "\n",
1708 |             "— Мне хотелось бы поговорить с вами.\n",
1709 |             "\n",
1710 |             "Дверь отворилась.\n",
1711 |             "\n",
1712 |             "— Проходите, — пригласил граф.\n",
1713 |             "\n",
1714 |             "— Благодарю вас, — ответил гость.\n",
1715 |             "\n",
1716 |             "— А вы знаете,\n"
1717 |           ]
1718 |         }
1719 |       ]
1720 |     },
1721 |     {
1722 |       "cell_type": "code",
1723 |       "source": [
1724 |         "training_args = TrainingArguments(\n",
1725 |         "    output_dir=\"./finetuned\",         # The output directory\n",
1726 |         "    overwrite_output_dir=True,        # Overwrite the content of the output dir\n",
1727 |         "    num_train_epochs=40,              # number of training epochs\n",
1728 |         "    per_device_train_batch_size=20,   # batch size for training\n",
1729 |         "    per_device_eval_batch_size=32,    # batch size for evaluation\n",
1730 |         "    warmup_steps=9,                   # number of warmup steps for learning rate scheduler\n",
1731 |         "    gradient_accumulation_steps=5\n",
1732 |         ")\n",
1733 |         "\n",
1734 |         "trainer = Trainer(\n",
1735 |         "    model=model,\n",
1736 |         "    args=training_args,\n",
1737 |         "    data_collator=data_collator,\n",
1738 |         "    train_dataset=train_dataset,\n",
1739 |         "    optimizers=(torch.optim.AdamW(model.parameters(), lr=1e-5), None)\n",
1740 |         ")"
1741 |       ],
1742 |       "metadata": {
1743 |         "id": "QmoiTZeyOqIp"
1744 |       },
1745 |       "execution_count": null,
1746 |       "outputs": []
1747 |     },
1748 |     {
1749 |       "cell_type": "code",
1750 |       "source": [
1751 |         "trainer.train()"
1752 |       ],
1753 |       "metadata": {
1754 |         "colab": {
1755 |           "base_uri": "https://localhost:8080/",
1756 |           "height": 110
1757 |         },
1758 |         "id": "CC_t_9TfR8FJ",
1759 |         "outputId": "ea7f1aa8-101e-4dd8-8cf4-fa849c14364a"
1760 |       },
1761 |       "execution_count": null,
1762 |       "outputs": [
1763 |         {
1764 |           "output_type": "display_data",
1765 |           "data": {
1766 |             "text/plain": [
1767 |               "<IPython.core.display.HTML object>"
1768 |             ],
1769 |             "text/html": [
1770 |               "\n",
1771 |               "    <div>\n",
1772 |               "      \n",
1773 |               "      <progress value='440' max='440' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1774 |               "      [440/440 16:47, Epoch 37/40]\n",
1775 |               "    </div>\n",
1776 |               "    <table border=\"1\" class=\"dataframe\">\n",
1777 |               "  <thead>\n",
1778 |               " <tr style=\"text-align: left;\">\n",
1779 |               "      <th>Step</th>\n",
1780 |               "      <th>Training Loss</th>\n",
1781 |               "    </tr>\n",
1782 |               "  </thead>\n",
1783 |               "  <tbody>\n",
1784 |               "  </tbody>\n",
1785 |               "</table><p>"
1786 |             ]
1787 |           },
1788 |           "metadata": {}
1789 |         },
1790 |         {
1791 |           "output_type": "execute_result",
1792 |           "data": {
1793 |             "text/plain": [
1794 |               "TrainOutput(global_step=440, training_loss=2.85989296653054, metrics={'train_runtime': 1009.5923, 'train_samples_per_second': 45.286, 'train_steps_per_second': 0.436, 'total_flos': 2517417363898368.0, 'train_loss': 2.85989296653054, 'epoch': 37.93})"
1795 |             ]
1796 |           },
1797 |           "metadata": {},
1798 |           "execution_count": 14
1799 |         }
1800 |       ]
1801 |     },
1802 |     {
1803 |       "cell_type": "code",
1804 |       "source": [
1805 |         "model.save_pretrained('model_esenin')"
1806 |       ],
1807 |       "metadata": {
1808 |         "id": "ga2JF0VBR8Jn"
1809 |       },
1810 |       "execution_count": null,
1811 |       "outputs": []
1812 |     },
1813 |     {
1814 |       "cell_type": "code",
1815 |       "source": [
1816 |         "generate(\"Кто я? Что я? Только лишь мечтатель,\")"
1817 |       ],
1818 |       "metadata": {
1819 |         "colab": {
1820 |           "base_uri": "https://localhost:8080/"
1821 |         },
1822 |         "id": "mtk7jaBsYczX",
1823 |         "outputId": "16a9c812-ba35-427f-f3f3-3f66cb1cbb7a"
1824 |       },
1825 |       "execution_count": null,
1826 |       "outputs": [
1827 |         {
1828 |           "output_type": "stream",
1829 |           "name": "stderr",
1830 |           "text": [
1831 |             "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
1832 |             "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
1833 |           ]
1834 |         },
1835 |         {
1836 |           "output_type": "stream",
1837 |           "name": "stdout",
1838 |           "text": [
1839 |             "Кто я? Что я? Только лишь мечтатель,\n",
1840 |             "Уставивший в даль прекрасные очи.\n",
1841 |             "\n",
1842 |             "Мне чудится, все в душе горит огнем,\n",
1843 |             "И мне мерещится, что я сам в огне.\n",
1844 |             "Такой уж я на земле странник и странник.\n",
1845 |             "Глупое сердце, не бейся.\n",
1846 |             "\n",
1847 |             "Не бейся, не бейся, я для тебя готов -\n",
1848 |             "Я готов для тебя выть и выть волком.\n",
1849 |             "\n"
1850 |           ]
1851 |         }
1852 |       ]
1853 |     },
1854 |     {
1855 |       "cell_type": "code",
1856 |       "source": [
1857 |         "generate(\"Кто я?\")"
1858 |       ],
1859 |       "metadata": {
1860 |         "colab": {
1861 |           "base_uri": "https://localhost:8080/"
1862 |         },
1863 |         "id": "6vtZYzsUYAT_",
1864 |         "outputId": "8910dae4-7c30-4933-cfe2-a8bd667c4a86"
1865 |       },
1866 |       "execution_count": null,
1867 |       "outputs": [
1868 |         {
1869 |           "output_type": "stream",
1870 |           "name": "stderr",
1871 |           "text": [
1872 |             "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
1873 |             "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
1874 |           ]
1875 |         },
1876 |         {
1877 |           "output_type": "stream",
1878 |           "name": "stdout",
1879 |           "text": [
1880 |             "Кто я? Кто я такой?\n",
1881 |             "Оглянись вокруг -\n",
1882 |             "В этом мире все люди звери!\n",
1883 |             "\n",
1884 |             "Но среди них есть и поэты,\n",
1885 |             "И есть художники, и певцы.\n",
1886 |             "И таких немало на белом свете,\n",
1887 |             "Только не счесть их во всей земле.\n",
1888 |             "\n",
1889 |             "Все они стремятся в высь,\n",
1890 |             "В эти дали, где звенит прибой,\n",
1891 |             "И с высоты своей огромной\n",
1892 |             "Им все равно, что внизу творится.\n",
1893 |             "\n",
1894 |             "Я живу среди людей,\n",
1895 |             "\n"
1896 |           ]
1897 |         }
1898 |       ]
1899 |     },
1900 |     {
1901 |       "cell_type": "code",
1902 |       "source": [
1903 |         "generate(\"Друг мой, друг мой,\")"
1904 |       ],
1905 |       "metadata": {
1906 |         "colab": {
1907 |           "base_uri": "https://localhost:8080/"
1908 |         },
1909 |         "id": "MTkBigBlYAXi",
1910 |         "outputId": "23c938b9-9d19-4321-a971-25f30dd5c090"
1911 |       },
1912 |       "execution_count": null,
1913 |       "outputs": [
1914 |         {
1915 |           "output_type": "stream",
1916 |           "name": "stderr",
1917 |           "text": [
1918 |             "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
1919 |             "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
1920 |           ]
1921 |         },
1922 |         {
1923 |           "output_type": "stream",
1924 |           "name": "stdout",
1925 |           "text": [
1926 |             " Друг мой, друг мой, что ты мелешь?\n",
1927 |             "\n",
1928 |             "Ты, верно, пьян и думаешь о другом.\n",
1929 |             "Или, может быть, я тебе приснился.\n",
1930 |             "Пойми, ведь это всего лишь сон,\n",
1931 |             "Пей, сколько хочешь, хоть до утра.\n",
1932 |             "\n",
1933 |             "Пойми, ведь это всего лишь сон,\n",
1934 |             "Это все мне только снится.\n",
1935 |             "Пойми, ведь это всего лишь сон,\n",
1936 |             "Это все мне только снится.\n",
1937 |             "\n",
1938 |             "И теперь, когда\n"
1939 |           ]
1940 |         }
1941 |       ]
1942 |     },
1943 |     {
1944 |       "cell_type": "code",
1945 |       "source": [
1946 |         "generate(\"Зачем все это?\")"
1947 |       ],
1948 |       "metadata": {
1949 |         "colab": {
1950 |           "base_uri": "https://localhost:8080/"
1951 |         },
1952 |         "id": "BkPiaTuBYAZw",
1953 |         "outputId": "0f0417a1-b87b-4727-be33-e428322ea046"
1954 |       },
1955 |       "execution_count": null,
1956 |       "outputs": [
1957 |         {
1958 |           "output_type": "stream",
1959 |           "name": "stderr",
1960 |           "text": [
1961 |             "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
1962 |             "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
1963 |           ]
1964 |         },
1965 |         {
1966 |           "output_type": "stream",
1967 |           "name": "stdout",
1968 |           "text": [
1969 |             "Зачем все это?\n",
1970 |             "Отраву я пью горькую\n",
1971 |             "И смотрю на луну.\n",
1972 |             "\n",
1973 |             "Небо синее, месяц синий,\n",
1974 |             "Поле голубее и светлей.\n",
1975 |             "Дымится рожь на полях,\n",
1976 |             "Зеленеет чернь в полях.\n",
1977 |             "\n",
1978 |             "Голова кружится мне и звенит,\n",
1979 |             "Пью я горькую и гляжу.\n",
1980 |             "Полюбил я девушку простую,\n",
1981 |             "С косою на плечах.\n",
1982 |             "\n",
1983 |             "Я пойду с нею\n"
1984 |           ]
1985 |         }
1986 |       ]
1987 |     },
1988 |     {
1989 |       "cell_type": "code",
1990 |       "source": [
1991 |         "generate(\"Зачем нам анализ текстов?\")"
1992 |       ],
1993 |       "metadata": {
1994 |         "id": "pPeM2SdOfbc7",
1995 |         "colab": {
1996 |           "base_uri": "https://localhost:8080/"
1997 |         },
1998 |         "outputId": "043654be-b663-47a9-b7b5-07f01129e299"
1999 |       },
2000 |       "execution_count": null,
2001 |       "outputs": [
2002 |         {
2003 |           "output_type": "stream",
2004 |           "name": "stderr",
2005 |           "text": [
2006 |             "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
2007 |             "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
2008 |           ]
2009 |         },
2010 |         {
2011 |           "output_type": "stream",
2012 |           "name": "stdout",
2013 |           "text": [
2014 |             "Зачем нам анализ текстов?\n",
2015 |             "Нам бы только сердцем верить,\n",
2016 |             "Чтоб в трудный час не дрожать,\n",
2017 |             "Не киснуть в тоске бесцельной\n",
2018 |             "И не прятать от жизни взгляд.\n",
2019 |             "\n",
2020 |             "Пусть она катится к чертям -\n",
2021 |             "Наша жизнь! Пусть катится!\n",
2022 |             "Утопить бы в вине тоску -\n",
2023 |             "За одну эту малость.\n",
2024 |             "\n",
2025 |             "Пусть катится! Ведь так заведено -\n",
2026 |             "Каждый сам кузнец своего счастья.\n",
2027 |             "Как же без этого? Ведь так\n"
2028 |           ]
2029 |         }
2030 |       ]
2031 |     },
2032 |     {
2033 |       "cell_type": "code",
2034 |       "source": [],
2035 |       "metadata": {
2036 |         "id": "OXg8elnaa-i0"
2037 |       },
2038 |       "execution_count": null,
2039 |       "outputs": []
2040 |     },
2041 |     {
2042 |       "cell_type": "code",
2043 |       "source": [],
2044 |       "metadata": {
2045 |         "id": "6LBbA14ca-mD"
2046 |       },
2047 |       "execution_count": null,
2048 |       "outputs": []
2049 |     },
2050 |     {
2051 |       "cell_type": "code",
2052 |       "source": [],
2053 |       "metadata": {
2054 |         "id": "9ppuvR5Ta-oz"
2055 |       },
2056 |       "execution_count": null,
2057 |       "outputs": []
2058 |     }
2059 |   ]
2060 | }


--------------------------------------------------------------------------------