├── .DS_Store ├── .gitignore ├── README.md ├── exercises ├── 2019 │ ├── ex1 │ │ ├── PythonOddities.ipynb │ │ ├── ex1_solution.ipynb │ │ ├── ex2_solution.ipynb │ │ ├── ex3_solution.ipynb │ │ └── npprimer.ipynb │ ├── ex10 │ │ ├── barbara.png │ │ ├── boat.png │ │ ├── drum1.wav │ │ ├── drum2.wav │ │ ├── ex2-sol.ipynb │ │ ├── ex2.ipynb │ │ ├── ex4-sol.ipynb │ │ └── ex4.ipynb │ ├── ex10_old │ │ ├── ex1.py │ │ ├── ex1_solution.py │ │ ├── ex2.py │ │ └── ex2_solution.py │ ├── ex11 │ │ ├── ex3-sol.ipynb │ │ └── ex3.ipynb │ ├── ex11_old │ │ ├── features_cosmology_project.py │ │ ├── segment_aerial_images.ipynb │ │ └── tf_aerial_images.py │ ├── ex2 │ │ ├── CroppedYale.zip │ │ ├── CroppedYale │ │ │ ├── yaleB01_P00A-005E-10.pgm │ │ │ ├── yaleB02_P00A-005E-10.pgm │ │ │ ├── yaleB03_P00A-005E-10.pgm │ │ │ ├── yaleB04_P00A-005E-10.pgm │ │ │ ├── yaleB05_P00A-005E-10.pgm │ │ │ ├── yaleB06_P00A-005E-10.pgm │ │ │ ├── yaleB07_P00A-005E-10.pgm │ │ │ ├── yaleB08_P00A-005E-10.pgm │ │ │ ├── yaleB09_P00A-005E-10.pgm │ │ │ ├── yaleB10_P00A-005E-10.pgm │ │ │ ├── yaleB11_P00A-005E-10.pgm │ │ │ ├── yaleB12_P00A-005E-10.pgm │ │ │ ├── yaleB13_P00A-005E-10.pgm │ │ │ ├── yaleB15_P00A-005E-10.pgm │ │ │ ├── yaleB16_P00A-005E-10.pgm │ │ │ ├── yaleB17_P00A-005E-10.pgm │ │ │ ├── yaleB18_P00A-005E-10.pgm │ │ │ ├── yaleB19_P00A-005E-10.pgm │ │ │ ├── yaleB20_P00A-005E-10.pgm │ │ │ ├── yaleB21_P00A-005E-10.pgm │ │ │ ├── yaleB22_P00A-005E-10.pgm │ │ │ ├── yaleB23_P00A-005E-10.pgm │ │ │ ├── yaleB24_P00A-005E-10.pgm │ │ │ ├── yaleB25_P00A-005E-10.pgm │ │ │ ├── yaleB26_P00A-005E-10.pgm │ │ │ ├── yaleB27_P00A-005E-10.pgm │ │ │ ├── yaleB28_P00A-005E-10.pgm │ │ │ ├── yaleB29_P00A-005E-10.pgm │ │ │ ├── yaleB30_P00A-005E-10.pgm │ │ │ ├── yaleB31_P00A-005E-10.pgm │ │ │ ├── yaleB32_P00A-005E-10.pgm │ │ │ ├── yaleB33_P00A-005E-10.pgm │ │ │ ├── yaleB34_P00A-005E-10.pgm │ │ │ ├── yaleB35_P00A-005E-10.pgm │ │ │ ├── yaleB36_P00A-005E-10.pgm │ │ │ ├── yaleB37_P00A-005E-10.pgm │ │ │ ├── yaleB38_P00A-005E-10.pgm │ │ │ └── yaleB39_P00A-005E-10.pgm │ │ ├── Eigenfaces_solution.ipynb │ │ ├── Eigenfaces_template.ipynb │ │ ├── eigenfaces.png │ │ ├── gaussian_isolines.ipynb │ │ └── tutorial02.pdf │ ├── ex3 │ │ ├── ex3.ipynb │ │ └── tutorial03.pdf │ ├── ex5 │ │ ├── README.md │ │ ├── associated-press.tar.gz │ │ ├── pLSA-for-the-AP-solution.ipynb │ │ ├── pLSA-for-the-AP.ipynb │ │ └── plsm-em-notes.pdf │ ├── ex6 │ │ ├── README.md │ │ ├── build_vocab.sh │ │ ├── cooc.py │ │ ├── cut_vocab.sh │ │ ├── glove_solution.py │ │ ├── glove_template.py │ │ ├── pickle_vocab.py │ │ └── tutorial06.pdf │ ├── ex7 │ │ └── tutorial07.pdf │ ├── ex8 │ │ ├── barbara.png │ │ ├── boat.png │ │ ├── sparse_coding.ipynb │ │ ├── sparse_coding_solution.ipynb │ │ └── tutorial08.pdf │ └── ex9 │ │ ├── Compressed_sensing.ipynb │ │ ├── Compressed_sensing_solution.ipynb │ │ └── tutorial09.pdf ├── 2020 │ ├── .DS_Store │ ├── ex10 │ │ ├── barbara.png │ │ ├── boat.png │ │ ├── drum1.wav │ │ ├── drum2.wav │ │ ├── ex2-sol.ipynb │ │ ├── ex2.ipynb │ │ ├── ex4-sol.ipynb │ │ └── ex4.ipynb │ ├── ex11 │ │ ├── ex3-sol.ipynb │ │ └── ex3.ipynb │ └── ex9 │ │ ├── features_cosmology_project.py │ │ ├── segment_aerial_images.ipynb │ │ └── tf_aerial_images.py ├── 2021 │ ├── Exercise_1.ipynb │ ├── Exercise_1_Solution.ipynb │ ├── Exercise_2.ipynb │ ├── Exercise_2_gaussian_isolines.ipynb │ ├── Exercise_2_solution.ipynb │ ├── Exercise_3_Solution.ipynb │ ├── Exercise_8_NMF.ipynb │ ├── Exercise_8_NMF_Solution.ipynb │ ├── Project_1.ipynb │ ├── Project_2.ipynb │ ├── Project_3.ipynb │ └── ex8 │ │ ├── README.md │ │ ├── build_vocab.sh │ │ ├── cooc.py │ │ ├── cut_vocab.sh │ │ ├── glove_solution.py │ │ ├── glove_template.py │ │ └── pickle_vocab.py └── .DS_Store └── lecture_notes_code └── Chapter_1.ipynb /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/*.swp 2 | **/.ipynb_checkpoints 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lecture_cil_public 2 | Public repository for CIL 3 | -------------------------------------------------------------------------------- /exercises/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/.DS_Store -------------------------------------------------------------------------------- /exercises/2019/ex1/PythonOddities.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## List Comprehensions and Generator Expressions" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "mylist = [1, 2, 3, 4, 5]" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "[1, 4, 9, 16, 25]" 30 | ] 31 | }, 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "output_type": "execute_result" 35 | } 36 | ], 37 | "source": [ 38 | "[i**2 for i in mylist]" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "[1, 4]" 50 | ] 51 | }, 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "[i**2 for i in mylist if i < 3]" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 10, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | "[1, 4, 9, 16, 25]" 70 | ] 71 | }, 72 | "execution_count": 10, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": [ 78 | "list(map(lambda i:i**2, mylist))" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 25, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "name": "stdout", 88 | "output_type": "stream", 89 | "text": [ 90 | "\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "mapObject = map(lambda i:i**2, mylist)\n", 96 | "print(mapObject)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 26, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "text/plain": [ 107 | "[1.0, 2.0, 3.0, 4.0, 5.0]" 108 | ] 109 | }, 110 | "execution_count": 26, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "[i ** 0.5 for i in mapObject]" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 27, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "name": "stdout", 126 | "output_type": "stream", 127 | "text": [ 128 | "\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "print(mapObject)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 28, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/plain": [ 144 | "[]" 145 | ] 146 | }, 147 | "execution_count": 28, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "[i ** 0.5 for i in mapObject]" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 29, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "name": "stdout", 163 | "output_type": "stream", 164 | "text": [ 165 | " at 0x105bcaba0>\n" 166 | ] 167 | } 168 | ], 169 | "source": [ 170 | "myGenObj = (i **2 for i in mylist)\n", 171 | "print(myGenObj)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 30, 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "data": { 181 | "text/plain": [ 182 | "[1.0, 2.0, 3.0, 4.0, 5.0]" 183 | ] 184 | }, 185 | "execution_count": 30, 186 | "metadata": {}, 187 | "output_type": "execute_result" 188 | } 189 | ], 190 | "source": [ 191 | "[i ** 0.5 for i in myGenObj]" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 31, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "data": { 201 | "text/plain": [ 202 | "[]" 203 | ] 204 | }, 205 | "execution_count": 31, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "[i ** 0.5 for i in myGenObj]" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "## Generators" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 32, 224 | "metadata": { 225 | "collapsed": true 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "def myGenerator(x):\n", 230 | " state = 0\n", 231 | " state += x\n", 232 | " yield state\n", 233 | " state += x\n", 234 | " yield state" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 33, 240 | "metadata": { 241 | "collapsed": true 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "g = myGenerator(5)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 34, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "data": { 255 | "text/plain": [ 256 | "5" 257 | ] 258 | }, 259 | "execution_count": 34, 260 | "metadata": {}, 261 | "output_type": "execute_result" 262 | } 263 | ], 264 | "source": [ 265 | "next(g)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 35, 271 | "metadata": {}, 272 | "outputs": [ 273 | { 274 | "data": { 275 | "text/plain": [ 276 | "10" 277 | ] 278 | }, 279 | "execution_count": 35, 280 | "metadata": {}, 281 | "output_type": "execute_result" 282 | } 283 | ], 284 | "source": [ 285 | "next(g)" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 36, 291 | "metadata": {}, 292 | "outputs": [ 293 | { 294 | "ename": "StopIteration", 295 | "evalue": "", 296 | "output_type": "error", 297 | "traceback": [ 298 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 299 | "\u001b[0;31mStopIteration\u001b[0m Traceback (most recent call last)", 300 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 301 | "\u001b[0;31mStopIteration\u001b[0m: " 302 | ] 303 | } 304 | ], 305 | "source": [ 306 | "next(g)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 37, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "data": { 316 | "text/plain": [ 317 | "[5, 10]" 318 | ] 319 | }, 320 | "execution_count": 37, 321 | "metadata": {}, 322 | "output_type": "execute_result" 323 | } 324 | ], 325 | "source": [ 326 | "list(myGenerator(5))" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 38, 332 | "metadata": {}, 333 | "outputs": [ 334 | { 335 | "data": { 336 | "text/plain": [ 337 | "[25, 100]" 338 | ] 339 | }, 340 | "execution_count": 38, 341 | "metadata": {}, 342 | "output_type": "execute_result" 343 | } 344 | ], 345 | "source": [ 346 | "[i**2 for i in myGenerator(5)]" 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": [ 353 | "## Random Stuff" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 39, 359 | "metadata": {}, 360 | "outputs": [ 361 | { 362 | "name": "stdout", 363 | "output_type": "stream", 364 | "text": [ 365 | "4 5\n", 366 | "5 4\n" 367 | ] 368 | } 369 | ], 370 | "source": [ 371 | "x, y = 4, 5\n", 372 | "print(x, y)\n", 373 | "x, y = y, x\n", 374 | "print(x, y)" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 40, 380 | "metadata": {}, 381 | "outputs": [ 382 | { 383 | "name": "stdout", 384 | "output_type": "stream", 385 | "text": [ 386 | "Bill is 20 years old\n", 387 | "John is 25 years old\n", 388 | "Max is 30 years old\n", 389 | "Marc is 27 years old\n" 390 | ] 391 | } 392 | ], 393 | "source": [ 394 | "ages, names = [20, 25, 30, 27], ['Bill', 'John', 'Max', 'Marc']\n", 395 | "\n", 396 | "for age, name in zip(ages, names):\n", 397 | " print(\"{} is {} years old\".format(name, age))" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 41, 403 | "metadata": {}, 404 | "outputs": [ 405 | { 406 | "name": "stdout", 407 | "output_type": "stream", 408 | "text": [ 409 | "6\n", 410 | "6\n" 411 | ] 412 | } 413 | ], 414 | "source": [ 415 | "myList = [5, 6, 7]\n", 416 | "x, y, z = myList\n", 417 | "print(y)\n", 418 | "\n", 419 | "def f(a, b, c):\n", 420 | " print(b)\n", 421 | "\n", 422 | "f(*myList)" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": 42, 428 | "metadata": {}, 429 | "outputs": [ 430 | { 431 | "name": "stdout", 432 | "output_type": "stream", 433 | "text": [ 434 | "(3, 4, 5)\n", 435 | "{}\n", 436 | "---\n", 437 | "()\n", 438 | "{'a': 2, 'c': 6}\n", 439 | "---\n" 440 | ] 441 | } 442 | ], 443 | "source": [ 444 | "def f(*args, **kwargs):\n", 445 | " print(args)\n", 446 | " print(kwargs)\n", 447 | " print('---')\n", 448 | " \n", 449 | "f(3, 4, 5)\n", 450 | "f(a=2, c=6)" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": 43, 456 | "metadata": {}, 457 | "outputs": [ 458 | { 459 | "name": "stdout", 460 | "output_type": "stream", 461 | "text": [ 462 | "9 10 11\n", 463 | "---\n", 464 | "[1, 2, 3] 10 11\n", 465 | "---\n", 466 | "{'a': 4, 'b': 5, 'c': 6} 10 11\n", 467 | "---\n", 468 | "1 2 3\n", 469 | "---\n", 470 | "4 5 6\n", 471 | "---\n" 472 | ] 473 | } 474 | ], 475 | "source": [ 476 | "def g(a=9, b=10, c=11):\n", 477 | " print(a, b, c)\n", 478 | " print('---')\n", 479 | "g()\n", 480 | "\n", 481 | "myList = [1, 2, 3]\n", 482 | "myDict = {'a': 4, 'b': 5, 'c': 6}\n", 483 | "\n", 484 | "g(myList)\n", 485 | "g(myDict)\n", 486 | "g(*myList)\n", 487 | "g(**myDict)" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 44, 493 | "metadata": {}, 494 | "outputs": [ 495 | { 496 | "name": "stdout", 497 | "output_type": "stream", 498 | "text": [ 499 | "Basel is so much more beautiful than Zurich\n" 500 | ] 501 | } 502 | ], 503 | "source": [ 504 | "# this only works in python 3.6+\n", 505 | "goodCity, badCity = \"Basel\", \"Zurich\"\n", 506 | "print(f\"{goodCity} is so much more beautiful than {badCity}\")" 507 | ] 508 | }, 509 | { 510 | "cell_type": "code", 511 | "execution_count": 46, 512 | "metadata": {}, 513 | "outputs": [ 514 | { 515 | "name": "stdout", 516 | "output_type": "stream", 517 | "text": [ 518 | "alias dalab='cd ~/ETH/ETH_PhD/DA.INF'\n" 519 | ] 520 | } 521 | ], 522 | "source": [ 523 | "import os\n", 524 | "with open(os.path.expanduser('~/.bashrc')) as f:\n", 525 | " for line in f:\n", 526 | " print(line)\n", 527 | " break # I'm not giving you my secret magic vimrc ;)" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": 25, 533 | "metadata": {}, 534 | "outputs": [ 535 | { 536 | "name": "stdout", 537 | "output_type": "stream", 538 | "text": [ 539 | "hi\n", 540 | "crap\n", 541 | "oh well\n" 542 | ] 543 | } 544 | ], 545 | "source": [ 546 | "try:\n", 547 | " print('hi')\n", 548 | " 5 / 0\n", 549 | " print('there')\n", 550 | "except:\n", 551 | " print('crap')\n", 552 | "finally:\n", 553 | " print('oh well')" 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": 26, 559 | "metadata": {}, 560 | "outputs": [ 561 | { 562 | "name": "stdout", 563 | "output_type": "stream", 564 | "text": [ 565 | "['there', 'is', 'a', 'house', 'in', 'new', 'orleans']\n", 566 | "\n", 567 | "0 there\n", 568 | "1 is\n", 569 | "2 a\n", 570 | "3 house\n", 571 | "4 in\n", 572 | "5 new\n", 573 | "6 orleans\n", 574 | "\n", 575 | "a\n", 576 | "house\n", 577 | "in\n", 578 | "is\n", 579 | "new\n", 580 | "orleans\n", 581 | "there\n", 582 | "\n", 583 | "orleans\n", 584 | "new\n", 585 | "in\n", 586 | "house\n", 587 | "a\n", 588 | "is\n", 589 | "there\n" 590 | ] 591 | } 592 | ], 593 | "source": [ 594 | "a = 'there is a house in new orleans'\n", 595 | "print(a.split())\n", 596 | "print()\n", 597 | "\n", 598 | "for idx, w in enumerate(a.split()):\n", 599 | " print(idx, w)\n", 600 | "print()\n", 601 | "\n", 602 | "for w in sorted(a.split()):\n", 603 | " print(w)\n", 604 | "print()\n", 605 | " \n", 606 | "for w in reversed(a.split()):\n", 607 | " print(w)" 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "execution_count": null, 613 | "metadata": { 614 | "collapsed": true 615 | }, 616 | "outputs": [], 617 | "source": [] 618 | } 619 | ], 620 | "metadata": { 621 | "kernelspec": { 622 | "display_name": "Python 3", 623 | "language": "python", 624 | "name": "python3" 625 | }, 626 | "language_info": { 627 | "codemirror_mode": { 628 | "name": "ipython", 629 | "version": 3 630 | }, 631 | "file_extension": ".py", 632 | "mimetype": "text/x-python", 633 | "name": "python", 634 | "nbconvert_exporter": "python", 635 | "pygments_lexer": "ipython3", 636 | "version": "3.6.1" 637 | } 638 | }, 639 | "nbformat": 4, 640 | "nbformat_minor": 2 641 | } 642 | -------------------------------------------------------------------------------- /exercises/2019/ex1/ex1_solution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Data Generation\n", 8 | "===" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": { 15 | "collapsed": true 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "import numpy as np" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "num_samples, num_features = 10, 5" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "- **np.random.rand(d1,d2,..,dn)** creates array of given shape and populates it with random samples from uniform distribution over [0, 1)\n", 38 | "\n", 39 | "- **np.mean(array,axis=None)** Compute the arithmetic mean along the specified axis. Default: flattend array\n", 40 | "- **np.std(array,axis=None)** Compute the standard deviation along the specified axis. Default: flattend array\n", 41 | "\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "data = np.random.rand(num_samples, num_features)\n", 51 | "data" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "np.mean(data, axis=0)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "np.std(data, axis=0)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "Solution\n", 77 | "===" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": true 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "def standardize(X):\n", 89 | " assert len(X.shape)==2, 'X must be a 2-dim matrix!'\n", 90 | " num_samples=X.shape[0]\n", 91 | " num_features=X.shape[1]\n", 92 | " X_centered=X-np.mean(X,axis=0)\n", 93 | " X_std=X_centered / np.std(X, axis=0)\n", 94 | " \n", 95 | " return X_std" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": true, 103 | "scrolled": false 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "data_centered=standardize(data)\n", 108 | "print('means: ', np.mean(data_centered, axis=0))\n", 109 | "print('sd: ', np.std(data_centered, axis=0))" 110 | ] 111 | } 112 | ], 113 | "metadata": { 114 | "kernelspec": { 115 | "display_name": "Python 3", 116 | "language": "python", 117 | "name": "python3" 118 | }, 119 | "language_info": { 120 | "codemirror_mode": { 121 | "name": "ipython", 122 | "version": 3 123 | }, 124 | "file_extension": ".py", 125 | "mimetype": "text/x-python", 126 | "name": "python", 127 | "nbconvert_exporter": "python", 128 | "pygments_lexer": "ipython3", 129 | "version": "3.6.1" 130 | } 131 | }, 132 | "nbformat": 4, 133 | "nbformat_minor": 1 134 | } 135 | -------------------------------------------------------------------------------- /exercises/2019/ex1/ex2_solution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Data Generation\n", 8 | "===" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": { 15 | "collapsed": true 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "import numpy as np\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "%matplotlib inline " 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "name": "stdout", 31 | "output_type": "stream", 32 | "text": [ 33 | "[[ 0.92475366 0.19024499]\n", 34 | " [ 0.84575128 0.24818742]\n", 35 | " [ 0.75676008 0.47054279]]\n", 36 | "\n", 37 | "\n", 38 | "[[ 0.62697812 0.02757475]\n", 39 | " [ 0.32893114 0.95105152]\n", 40 | " [ 0.23032823 0.88654969]\n", 41 | " [ 0.85927341 0.87771197]]\n" 42 | ] 43 | } 44 | ], 45 | "source": [ 46 | "p = 3\n", 47 | "q = 4\n", 48 | "P, Q = (np.random.rand(i, 2) for i in (p, q)) #loop trough tuple\n", 49 | "\n", 50 | "print(P) # 4 x 2 matrix\n", 51 | "print('\\n')\n", 52 | "print(Q) # 5 x 2 matrix" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": { 59 | "scrolled": false 60 | }, 61 | "outputs": [ 62 | { 63 | "data": { 64 | "text/plain": [ 65 | "" 66 | ] 67 | }, 68 | "execution_count": 3, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | }, 72 | { 73 | "data": { 74 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEv9JREFUeJzt3X+s3XV9x/Hnm9J6u40fS1sT6W1p2a5ohwT0hhiNkQ03\nCgkUUQkkZEqYzYzVJTIcBMNIzbJpE8mIbK5zRjFRrATJJRabTTBmRgzFQrE1tRV13DJHrRZjuMWW\nvPfHOcXTy23P99x7zvme87nPR3Jzz/mcD+e8+HJ59Xs+33s+jcxEklSWU+oOIEnqPstdkgpkuUtS\ngSx3SSqQ5S5JBbLcJalAbcs9Ij4XEc9FxA9O8HhExF0RsS8idkbEG7sfU5LUiSpn7p8H1p7k8cuA\nsebXeuBf5x5LkjQXbcs9M78N/PIkU9YB92TDo8CZEfGabgWUJHXu1C48x3LgmZb7k82x/z3ZP7R0\n6dJctWpVF15ekuaPxx9//BeZuazdvG6Ue2URsZ7G0g0rV65k+/bt/Xx5SRp6EfGzKvO68dsy+4EV\nLfdHm2OvkJmbM3M8M8eXLWv7B48kaZa6Ue4TwF82f2vmzcDzmXnSJRlJUm+1XZaJiC8DFwNLI2IS\n+HtgIUBmfgbYClwO7ANeAG7oVVhJUjVtyz0zr2vzeAIf7FoiSdKc+QlVSSqQ5S5JBbLce2nnFrjz\nPLjjzMb3nVvqTiRpnujr77nPKzu3wIMfhiNTjfvPP9O4D3D+NfXlkjQveObeK9/c+LtiP+bIVGNc\nknrMcu+V5yc7G5ekLrLce+WM0c7GJamLLPdeueR2WLj4+LGFixvjktRjlnuvnH8NXHEXnLECiMb3\nK+7yYqqkvvC3ZXrp/Gssc0m18MxdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFWj4yt2dFiWpreH6\nPXd3WpSkSobrzN2dFiXNxTx65z9cZ+7utChptubZO//hOnN3p0VJszXP3vkPV7m706Kk2Zpn7/yH\nq9zdaVHSbM2zd/7DteYO7rQoaXYuuf34NXco+p3/cJ25S9JszbN3/sN35i5JszWP3vl75i5JBbLc\nJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqUKVyj4i1EbEnIvZFxC0zPL4yIh6JiB0RsTMi\nLu9+VElSVW3LPSIWAHcDlwFrgOsiYs20aR8DtmTmhcC1wL90O6gkqboqZ+4XAfsy8+nM/C1wL7Bu\n2pwETm/ePgN4tnsRJUmdqlLuy4FnWu5PNsda3QFcHxGTwFbgQzM9UUSsj4jtEbH9wIEDs4grSaqi\nWxdUrwM+n5mjwOXAFyPiFc+dmZszczwzx5ctW9all5YkTVel3PcDK1rujzbHWt0IbAHIzO8CI8DS\nbgSUJHWuSrk/BoxFxOqIWETjgunEtDn/A1wCEBGvp1HurrtIUk3alntmHgU2ANuAH9L4rZhdEbEx\nIq5sTrsJeH9EPAl8GXhfZmavQkuSTq7SX9aRmVtpXChtHbu95fZu4K3djSZJmi0/oSpJBbLcJalA\nlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5\nS1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrsk\nFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWqVO4RsTYi9kTEvoi45QRzromI3RGxKyK+1N2Y\nkqROnNpuQkQsAO4G/hyYBB6LiInM3N0yZwy4FXhrZv4qIl7dq8CSpPaqnLlfBOzLzKcz87fAvcC6\naXPeD9ydmb8CyMznuhtTktSJKuW+HHim5f5kc6zVa4HXRsR3IuLRiFjbrYCSpM61XZbp4HnGgIuB\nUeDbEfGGzDzUOiki1gPrAVauXNmll5YkTVflzH0/sKLl/mhzrNUkMJGZRzLzJ8CPaJT9cTJzc2aO\nZ+b4smXLZptZktRGlXJ/DBiLiNURsQi4FpiYNucBGmftRMRSGss0T3cxpySpA23LPTOPAhuAbcAP\ngS2ZuSsiNkbElc1p24CDEbEbeAS4OTMP9iq0JOnkIjNreeHx8fHcvn17La8tScMqIh7PzPF28/yE\nqiQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlL\nUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklSgU+sOIGk4PbBjP5u27eHZQ1OcdeZibr70XK66\ncHndsdRkuUvq2AM79nPr/U8xdeQlAPYfmuLW+58CsOAHhMsykjq2aduel4v9mKkjL7Fp256aEmk6\ny11Sx549NNXRuPrPcpfUsbPOXNzRuPrPcpfUsZsvPZfFCxccN7Z44QJuvvTcmhJpOi+oSurYsYum\n/rbM4LLcJc3KVRcut8wHmMsyklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCVyj0i1kbEnojYFxG3\nnGTeuyIiI2K8exElSZ1qW+4RsQC4G7gMWANcFxFrZph3GvA3wPe6HVKS1JkqZ+4XAfsy8+nM/C1w\nL7BuhnkfBz4BHO5iPknSLFQp9+XAMy33J5tjL4uINwIrMvPrXcwmSZqlOV9QjYhTgE8BN1WYuz4i\ntkfE9gMHDsz1pSVJJ1Cl3PcDK1rujzbHjjkNOA/4VkT8FHgzMDHTRdXM3JyZ45k5vmzZstmnliSd\nVJVyfwwYi4jVEbEIuBaYOPZgZj6fmUszc1VmrgIeBa7MzO09SSxJaqttuWfmUWADsA34IbAlM3dF\nxMaIuLLXASVJnau05W9mbgW2Thu7/QRzL557LEnSXPgJVUkqkOUuSQWy3CWpQJa7JBXIcpekAlnu\nklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5J\nBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoFOrTuAJHXbAzv2s2nbHp49NMVZZy7m5kvP5aoL\nl9cdq68sd0lFeWDHfm69/ymmjrwEwP5DU9x6/1MA86rgXZaRVJRN2/a8XOzHTB15iU3b9tSUqB6W\nu6SiPHtoqqPxUlnukopy1pmLOxovleUuqSg3X3ouixcuOG5s8cIF3HzpuTUlqocXVCUV5dhFU39b\nRpIKc9WFy+ddmU/nsowkFchyl6QCVSr3iFgbEXsiYl9E3DLD4x+JiN0RsTMivhkRZ3c/qiSpqrbl\nHhELgLuBy4A1wHURsWbatB3AeGaeD9wHfLLbQSVJ1VW5oHoRsC8znwaIiHuBdcDuYxMy85GW+Y8C\n13czpCQNozr3uKmyLLMceKbl/mRz7ERuBB6a6YGIWB8R2yNi+4EDB6qnlKQhc2yPm/2Hpkh+t8fN\nAzv29+X1u3pBNSKuB8aBTTM9npmbM3M8M8eXLVvWzZeWpIFS9x43VZZl9gMrWu6PNseOExHvAG4D\n3p6ZL3YnniQNp7r3uKly5v4YMBYRqyNiEXAtMNE6ISIuBP4NuDIzn+t+TEkaLnXvcdO23DPzKLAB\n2Ab8ENiSmbsiYmNEXNmctgn4A+CrEfFEREyc4OkkaV6oe4+bStsPZOZWYOu0sdtbbr+jy7kkaajV\nvceNe8tIUo/UuceN2w9IUoEsd0kqkOUuSQUaqDX3I0eOMDk5yeHDh+uO0tbIyAijo6MsXLiw7iiS\n9AoDVe6Tk5OcdtpprFq1ioioO84JZSYHDx5kcnKS1atX1x1Hkl5hoJZlDh8+zJIlSwa62AEigiVL\nlgzFOwxJ89NAlTsw8MV+zLDklDQ/DVy5123BggVccMEFnHfeebznPe/hhRdeqDuSJHXMcp9m8eLF\nPPHEE/zgBz9g0aJFfOYzn6k7kiR1bKAuqHaq1xvhv+1tb2Pnzp1dez5J6pehPXPv9Ub4R48e5aGH\nHuINb3hDV55PkvppaMu9VxvhT01NccEFFzA+Ps7KlSu58cYb5/R8klSHoV2W6dVG+MfW3CVpmA3t\nmXvdG+FL0iAb2nKveyN8SRpkQ7ss06uN8H/zm990I54k1Wpoyx3q3QhfkgbZ0C7LSJJOzHKXpAJZ\n7pJUIMtdkgpkuUtSgSz3GUxOTrJu3TrGxsY455xz2LBhAy+++GLdsSSpMst9mszk6quv5qqrrmLv\n3r3s3buXqakpPvrRj9YdTZIqG+5y37kF7jwP7jiz8X3nljk/5cMPP8zIyAg33HAD0PjLO+68807u\nueceP+AkaWgMb7nv3AIPfhiefwbIxvcHPzzngt+1axdvetObjhs7/fTTWbVqFfv27ZvTc0tSvwxv\nuX9zIxyZtgPkkanGuCTNc8Nb7s9PdjZe0Zo1a3j88cePG/v1r3/Nz3/+c849103J1IEeLBtKVQ1v\nuZ8x2tl4RZdccgkvvPAC99xzDwAvvfQSN910Exs2bGDxYrcTVkU9WjaUqhrecr/kdlg4rWwXLm6M\nz0FE8LWvfY377ruPsbExlixZwimnnMJtt902p+fVPOOyoWo2vOV+/jVwxV1wxgogGt+vuKsxPkcr\nVqxgYmKCvXv3snXrVr7xjW/w/e9/f+6ZNX/0aNlQqmqot/zl/Gu6UuYn85a3vIWf/exnPX0NFeiM\n0eaSzAzjUh9UOnOPiLURsSci9kXELTM8/qqI+Erz8e9FxKpuB5WGSo+WDaWq2pZ7RCwA7gYuA9YA\n10XEmmnTbgR+lZl/DNwJfKLbQaWh0sNlQ6mKKssyFwH7MvNpgIi4F1gH7G6Zsw64o3n7PuDTERGZ\nmZ0GykwiotN/rO9m8a+m+aYPy4bSiVRZllkOtC4eTjbHZpyTmUeB54ElnYYZGRnh4MGDA1+cmcnB\ngwcZGRmpO4okzaivF1QjYj2wHmDlypWveHx0dJTJyUkOHDjQz1izMjIywuioF8ckDaYq5b4fWNFy\nf7Q5NtOcyYg4FTgDODj9iTJzM7AZYHx8/BWn5wsXLmT16tXVkkuSTqjKssxjwFhErI6IRcC1wMS0\nORPAe5u33w08PJv1dklSd7Q9c8/MoxGxAdgGLAA+l5m7ImIjsD0zJ4D/AL4YEfuAX9L4A0CSVJNK\na+6ZuRXYOm3s9pbbh4H3dDeaJGm2oq7Vk4g4ABz76OdS4Be1BOmcWXtjmLLCcOU1a2/UlfXszFzW\nblJt5X5ciIjtmTled44qzNobw5QVhiuvWXtj0LMO78ZhkqQTstwlqUCDUu6b6w7QAbP2xjBlheHK\na9beGOisA7HmLknqrkE5c5ckdVFfy73CvvAfiYjdEbEzIr4ZEWf3M9+0LO2y/nVEPBURT0TEf8+w\nDXLftMvaMu9dEZERUdsV/grH9X0RcaB5XJ+IiL+qI2czS9vjGhHXNH9md0XEl/qdsSVHu+N6Z8sx\n/VFEHKojZ0uednlXRsQjEbGj2QeX15GzmaVd1rObfbUzIr4VEYOx6VRm9uWLxqdbfwycAywCngTW\nTJvzp8DvNW9/APhKv/LNIuvpLbevBL4xqFmb804Dvg08CowPalbgfcCn68g3i6xjwA7gD5v3Xz2o\nWafN/xCNT5oP8rHdDHygeXsN8NMBzvpV4L3N238GfLGuY9v61c8z95f3hc/M3wLH9oV/WWY+kpkv\nNO8+SmOTsjpUyfrrlru/D9R18aJt1qaP0/hLVA73M9w0VbMOgipZ3w/cnZm/AsjM5/qc8ZhOj+t1\nwJf7kmxmVfImcHrz9hnAs33M16pK1jXAw83bj8zweC36We5V9oVvdSPwUE8TnVilrBHxwYj4MfBJ\n4MN9yjZd26wR8UZgRWZ+vZ/BZlD1Z+Bdzbe490XEihke74cqWV8LvDYivhMRj0bE2r6lO17l/7ea\nS52r+V0Z1aFK3juA6yNiksbWJx/qT7RXqJL1SeDq5u13AqdFRMd/n0W3DeQF1Yi4HhgHNtWd5WQy\n8+7M/CPg74CP1Z1nJhFxCvAp4Ka6s1T0ILAqM88H/hP4Qs15TuZUGkszF9M4G/73iDiz1kTtXQvc\nl5kv1R2kjeuAz2fmKHA5jY0JB7KvgL8F3h4RO4C309gCvfbj28+DVWVfeCLiHcBtwJWZ+WKfsk1X\nKWuLe4GreproxNplPQ04D/hWRPwUeDMwUdNF1bbHNTMPtvx3/yzwpj5lm67Kz8AkMJGZRzLzJ8CP\naJR9v3Xy83ot9S7JQLW8NwJbADLzu8AIjb1c+q3Kz+yzmXl1Zl5Io7vIzFovWNMM0a8LE6cCT9N4\nS3jswsSfTJtzIY2LF2N1XoiomHWs5fYVNLY/Hsis0+Z/i/ouqFY5rq9puf1O4NEBzroW+ELz9lIa\nb9+XDGLW5rzXAT+l+fmWur4qHtuHgPc1b7+expp733NXzLoUOKV5+x+AjXUe35dz9flAXU7j7ObH\nwG3NsY00ztIB/gv4P+CJ5tdEjT+A7bL+M7CrmfORkxVq3Vmnza2t3Cse139sHtcnm8f1dQOcNWgs\nee0GngKuHdSszft3AP9UV8YOj+0a4DvNn4MngL8Y4KzvBvY253wWeFXdxzcz/YSqJJVoUC9QSJLm\nwHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalA/w/uwjUbjZQBrAAAAABJRU5ErkJggg==\n", 75 | "text/plain": [ 76 | "" 77 | ] 78 | }, 79 | "metadata": {}, 80 | "output_type": "display_data" 81 | } 82 | ], 83 | "source": [ 84 | "plt.scatter(P[:,0],P[:,1])\n", 85 | "plt.scatter(Q[:,0],Q[:,1])\n", 86 | "plt.legend(['P','Q'])" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "The **Euclidean** distance is computed as follows: $$\\|x-y\\|_2 =\n", 94 | "\\sqrt{(x_{1} - y_{1})^2 + \\cdots + (x_{n} - y_{n})^2}$$\n", 95 | "For example $P_1$ and $Q_1$:" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 4, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "P_0-Q_0= [ 0.29777554 0.16267024]\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "delta=P[0]-Q[0]\n", 113 | "\n", 114 | "print('P_0-Q_0= ',delta)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 5, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/plain": [ 125 | "0.33931089017680288" 126 | ] 127 | }, 128 | "execution_count": 5, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "np.sqrt(np.sum(delta**2))" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 6, 140 | "metadata": {}, 141 | "outputs": [ 142 | { 143 | "data": { 144 | "text/plain": [ 145 | "0.33931089017680288" 146 | ] 147 | }, 148 | "execution_count": 6, 149 | "metadata": {}, 150 | "output_type": "execute_result" 151 | } 152 | ], 153 | "source": [ 154 | "np.sqrt(np.dot(delta,delta))" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 7, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "data": { 164 | "text/plain": [ 165 | "0.33931089017680288" 166 | ] 167 | }, 168 | "execution_count": 7, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "np.linalg.norm(delta)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "Solution\n", 182 | "===" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "### a)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "- **np.reapeat(A,reps)** Repeat elements of A the number of times given by reps.\n", 197 | "\n", 198 | "- **np.tile(A,reps)** Construct an array by repeating A the number of times given by reps.\n", 199 | "\n", 200 | "\n" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 8, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "data": { 210 | "text/plain": [ 211 | "array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2])" 212 | ] 213 | }, 214 | "execution_count": 8, 215 | "metadata": {}, 216 | "output_type": "execute_result" 217 | } 218 | ], 219 | "source": [ 220 | "np.repeat(range(len(P)), len(Q))" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 9, 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "data": { 230 | "text/plain": [ 231 | "array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])" 232 | ] 233 | }, 234 | "execution_count": 9, 235 | "metadata": {}, 236 | "output_type": "execute_result" 237 | } 238 | ], 239 | "source": [ 240 | "np.tile(range(len(Q)), len(P))" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 10, 246 | "metadata": { 247 | "collapsed": true 248 | }, 249 | "outputs": [], 250 | "source": [ 251 | "p_inds=np.repeat(range(len(P)), len(Q))\n", 252 | "q_inds=np.tile(range(len(Q)), len(P))" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 11, 258 | "metadata": { 259 | "scrolled": true 260 | }, 261 | "outputs": [ 262 | { 263 | "data": { 264 | "text/plain": [ 265 | "array([[ 0.62697812, 0.02757475],\n", 266 | " [ 0.32893114, 0.95105152],\n", 267 | " [ 0.23032823, 0.88654969],\n", 268 | " [ 0.85927341, 0.87771197],\n", 269 | " [ 0.62697812, 0.02757475],\n", 270 | " [ 0.32893114, 0.95105152],\n", 271 | " [ 0.23032823, 0.88654969],\n", 272 | " [ 0.85927341, 0.87771197],\n", 273 | " [ 0.62697812, 0.02757475],\n", 274 | " [ 0.32893114, 0.95105152],\n", 275 | " [ 0.23032823, 0.88654969],\n", 276 | " [ 0.85927341, 0.87771197]])" 277 | ] 278 | }, 279 | "execution_count": 11, 280 | "metadata": {}, 281 | "output_type": "execute_result" 282 | } 283 | ], 284 | "source": [ 285 | "Q[q_inds, :]" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 12, 291 | "metadata": { 292 | "scrolled": false 293 | }, 294 | "outputs": [ 295 | { 296 | "data": { 297 | "text/plain": [ 298 | "array([[ 0.92475366, 0.19024499],\n", 299 | " [ 0.92475366, 0.19024499],\n", 300 | " [ 0.92475366, 0.19024499],\n", 301 | " [ 0.92475366, 0.19024499],\n", 302 | " [ 0.84575128, 0.24818742],\n", 303 | " [ 0.84575128, 0.24818742],\n", 304 | " [ 0.84575128, 0.24818742],\n", 305 | " [ 0.84575128, 0.24818742],\n", 306 | " [ 0.75676008, 0.47054279],\n", 307 | " [ 0.75676008, 0.47054279],\n", 308 | " [ 0.75676008, 0.47054279],\n", 309 | " [ 0.75676008, 0.47054279]])" 310 | ] 311 | }, 312 | "execution_count": 12, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "P[p_inds, :]" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 13, 324 | "metadata": { 325 | "scrolled": true 326 | }, 327 | "outputs": [ 328 | { 329 | "name": "stdout", 330 | "output_type": "stream", 331 | "text": [ 332 | "[ 0.33931089 0.96634934 0.98339561 0.69057839 0.31069542 0.87242248\n", 333 | " 0.88670847 0.62966976 0.4615886 0.64337099 0.67096366 0.41987584]\n" 334 | ] 335 | } 336 | ], 337 | "source": [ 338 | "distances = np.sqrt(np.sum((P[p_inds, :] - Q[q_inds, :])**2, axis=1)) # alternatively np.linalg.norm\n", 339 | "print(distances)" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 14, 345 | "metadata": {}, 346 | "outputs": [ 347 | { 348 | "name": "stdout", 349 | "output_type": "stream", 350 | "text": [ 351 | "[[ 0.33931089 0.96634934 0.98339561 0.69057839]\n", 352 | " [ 0.31069542 0.87242248 0.88670847 0.62966976]\n", 353 | " [ 0.4615886 0.64337099 0.67096366 0.41987584]]\n" 354 | ] 355 | } 356 | ], 357 | "source": [ 358 | "distances = np.reshape(distances, (len(P), len(Q)))\n", 359 | "\n", 360 | "print(distances)" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": {}, 366 | "source": [ 367 | "### b)" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 26, 373 | "metadata": {}, 374 | "outputs": [ 375 | { 376 | "name": "stdout", 377 | "output_type": "stream", 378 | "text": [ 379 | "[[ 0.33931089 0.96634934 0.98339561 0.69057839]\n", 380 | " [ 0.31069542 0.87242248 0.88670847 0.62966976]\n", 381 | " [ 0.4615886 0.64337099 0.67096366 0.41987584]]\n" 382 | ] 383 | } 384 | ], 385 | "source": [ 386 | "# Alternative solution:\n", 387 | "distances=(-2 * P.dot(Q.T)) + np.sum(P**2, axis=1, keepdims=True) + np.sum(Q.T**2, axis=0, keepdims=True)\n", 388 | "print(np.sqrt(distances))" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": {}, 395 | "outputs": [], 396 | "source": [] 397 | } 398 | ], 399 | "metadata": { 400 | "kernelspec": { 401 | "display_name": "Python 3", 402 | "language": "python", 403 | "name": "python3" 404 | }, 405 | "language_info": { 406 | "codemirror_mode": { 407 | "name": "ipython", 408 | "version": 3 409 | }, 410 | "file_extension": ".py", 411 | "mimetype": "text/x-python", 412 | "name": "python", 413 | "nbconvert_exporter": "python", 414 | "pygments_lexer": "ipython3", 415 | "version": "3.6.1" 416 | } 417 | }, 418 | "nbformat": 4, 419 | "nbformat_minor": 1 420 | } 421 | -------------------------------------------------------------------------------- /exercises/2019/ex10/barbara.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2019/ex10/barbara.png -------------------------------------------------------------------------------- /exercises/2019/ex10/boat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2019/ex10/boat.png -------------------------------------------------------------------------------- /exercises/2019/ex10/drum1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2019/ex10/drum1.wav -------------------------------------------------------------------------------- /exercises/2019/ex10/drum2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2019/ex10/drum2.wav -------------------------------------------------------------------------------- /exercises/2019/ex10/ex2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import matplotlib.pyplot as plt\n", 10 | "import numpy as np\n", 11 | "import pylab as pl" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Dictionary properties" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "First, we want to generate the matrix $U$ corresponding to the dictionary of Haar wavelets.\n", 26 | "The following implementation is based on formulas (8) and (9) in\n", 27 | "https://iopscience.iop.org/article/10.1088/0305-4470/36/24/316/pdf" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 6, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "def haarMatrix(n):\n", 37 | " if n > 2:\n", 38 | " h = haarMatrix(n / 2)\n", 39 | " else:\n", 40 | " return np.array([[1, 1], [1, -1]])\n", 41 | "\n", 42 | " # calculate upper haar part\n", 43 | " h_n = np.kron(h, [1, 1])\n", 44 | " \n", 45 | " # calculate lower haar part \n", 46 | " h_i = np.sqrt(n/2)*np.kron(np.eye(len(h)), [1, -1])\n", 47 | "\n", 48 | " # combine parts\n", 49 | " h = np.vstack((h_n, h_i))\n", 50 | " return h" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "Run the code below to generate a visual representation of $U$. Enjoy the symmetry and understand why we need $K$ to be a power of 2." 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 7, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "image/png": "\n", 68 | "text/plain": [ 69 | "
" 70 | ] 71 | }, 72 | "metadata": { 73 | "needs_background": "light" 74 | }, 75 | "output_type": "display_data" 76 | } 77 | ], 78 | "source": [ 79 | "K = 32 #size of the dictionary. MUST be a power of 2.\n", 80 | "U = np.transpose(haarMatrix(K)/np.sqrt(K)) #computing dictionary matrix. \n", 81 | "plt.imshow(U, cmap='PiYG', interpolation='nearest')\n", 82 | "plt.colorbar()\n", 83 | "plt.show()" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "**Question:** Why are the entries in the last columns of dark color? Why is this crucial? (Hint: Energy, cf. Ex 1)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "Next, notice that $U$ is orthonormal" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 8, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "data": { 107 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAS4AAAD8CAYAAADJwUnTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAE8tJREFUeJzt3X+sX3V9x/HnqwVkAaawe2FNfwiymtAQLXBTWFgUBU3tH1QTNa2Zw6XZNQvddOKyThdkbH+oCxKXNLjLaECDVBSYN6ZbJQzCXAb2VmrpjzGvXQeX3tB2orIYxep7f5xzybf3++vc7z3fc76fe1+P5KTfc77n+zkfDu0rn8/nfM45igjMzFKypO4KmJnNlYPLzJLj4DKz5Di4zCw5Di4zS46Dy8yS4+Ays76RtEPScUkH2nwvSX8vaVLSfklXFinXwWVm/XQvsL7D9+8BVufLKHBXkUIdXGbWNxHxJPCjDrtsBL4cmaeAN0ha1q3cM8qqYBGS2k7Tv+qqq6qsitmCdPToUU6ePKn5lNHp32kLB4GfN6yPRcTYHH6/HHihYX0q3zbd6UfzCi5J64EvAkuBf4yIz/Za1sTExHyqYmbAyMhI1Yf8eUTM56CtQrZrcPbcVZS0FNhO1kddA2yWtKbX8sxscEgqtJRgCljZsL4CONbtR/MZ41oHTEbEkYh4FdhJ1l81s8QtWbKk0FKCceAP8quL1wA/iYiO3USYX1exVd/06tk7SRolu1pgZokoqTWFpAeA64AhSVPAZ4AzASLiS8AuYAMwCfwM+MMi5c4nuAr1TfOBujGY86CfmdWgxG4gEbG5y/cB3DzXcucTXD31Tc1s8JUVXP0yn07qHmC1pEsknQVsIuuvtnXVVVcRES2XPg8AmtkcVDg435OeW1wRcUrSVmA32XSIHRFxsLSamVltBr3BMK95XBGxi2xwzcwWCEllXTHsm0pnzptZGhZ0i8vMFiYHl5klx8FlZslxcBXU6f2O7U6i3wlpVj4PzptZktziMrPkOLjMLDkOLjNLSt238xTh4DKzJg6uErS7etjp5PqKo1nvfFXRzJLjFpeZJcVjXGaWJAeXmSXHwWVmyfHgvJklxWNcfdbLjdndfmdm7iqaWYIcXGaWHAeXmSXHwWVmSfGDBM0sSW5xmVlyHFw18VQJs94t6OCSdBR4BfgVcCoiRsqolJnVZ7FMQH1HRJwsoRwzGxCLIbjMbIEZ9KuK861dAN+WtFfSaKsdJI1KmpA0ceLEiXkezsyqMNNd7LbUZb4trmsj4pikC4FHJf1nRDzZuENEjAFjACMjIx75NhtwdYdSEfNqcUXEsfzP48AjwLoyKmVm9Rr0FlfPwSXpHEnnzXwG3g0cKKti/RQRbZdB/J9kVrUyg0vSeknPSZqUtK3F96skPS7pGUn7JW3oVuZ8uooXAY/klT8D+GpE/Ms8yjOzAVHW4LykpcB24F3AFLBH0nhEHGrY7a+AByPiLklrgF3AxZ3K7Tm4IuII8NZef29mg6nkHsY6YDLPCyTtBDYCjcEVwG/mn18PHOtWqKdDmFmTOQTXkKSJhvWx/ILcjOXACw3rU8DVs8q4jWx2wp8A5wA3dDuog8vMmswhuE52uWOmVUGzZxdsBu6NiDsk/S7wFUmXR8Sv2xXq4DKzJiV2FaeAlQ3rK2juCm4B1gNExH9IOhsYAo63K3Swp8eaWS1KvKq4B1gt6RJJZwGbgPFZ+zwPXJ8f9zLgbKDjbHW3uGbp5akSfqKELSRlPkgwIk5J2grsBpYCOyLioKTbgYmIGAduAe6W9Gdk3ciPRJd/VA4uM2tS5rzFiNhFNsWhcdutDZ8PAdfOpUwHl5k1GfQJ1w4uM2vi4DKzpKRwi5uDy8yaOLgWkHYXOvwMe1toBv1Bgg4uM2viFpeZJcVjXGaWJAeXmSXHwWVmyfHgvJklxWNci0QvN2Z3+51ZnRxcZpYcB5eZJcfBZWbJcXCZWVLKfJBgvzi4zKyJW1xmlhwH1yLnqRKWokEPrq4dWUk7JB2XdKBh2wWSHpX0g/zP8/tbTTOrStE3/NQZbkVG4O4lf+dZg23AYxGxGngsXzezBSL54IqIJ4Efzdq8Ebgv/3wf8N6S62VmNVqyZEmhpS69jnFdFBHTABExLenCdjtKGgVGAVatWtXj4cysKnW3poroe2RGxFhEjETEyPDwcL8PZ2YlSL6r2MZLkpYB5H8eL69KZla3hRpc48BN+eebgG+WU53FJSLaLoP4l8UWj0EPrq5jXJIeAK4DhiRNAZ8BPgs8KGkL8DzwgX5W0syqsyBu+YmIzW2+ur7kupjZgBj0lr1nzptZEweXmSXHwWVmyXFwmVlS6r5iWISDa0D5qRJWp+SvKprZ4jPoLa7BjlUzq0WZE1AlrZf0nKRJSS2fJCPpg5IOSToo6avdynSLy8xOU+YYl6SlwHbgXcAUsEfSeEQcathnNfCXwLUR8XKnhzbMcIvLzJqU2OJaB0xGxJGIeBXYSfZYrEZ/BGyPiJcBIqLrvc8OLjNrMofncQ1JmmhYRmcVtRx4oWF9Kt/W6M3AmyX9u6SnJM1+cGkTdxXNrMkcuoonI2KkU1Etts2+9H0GsJrsnugVwL9JujwiftyuUAdXgnqZKuFpElZUyfO4poCVDesrgGMt9nkqIn4J/Lek58iCbE+7Qt1VNLMmJY5x7QFWS7pE0lnAJrLHYjX6J+Ad+XGHyLqORzoV6haXmTUpq8UVEackbQV2A0uBHRFxUNLtwEREjOffvVvSIeBXwJ9HxP92KtfBZWZNypyAGhG7gF2ztt3a8DmAT+RLIQ4uMzvNgniQoJktPoN+y4+Da4Fpd/XQN2bbXDi4zCw5Di4zS46Dy8yS4gcJmlmSfFXRzJLjFpeZJcfBZQPBz7C3olIY4+rakZW0Q9JxSQcatt0m6UVJ+/JlQ3+raWZVKvPRzf1QZATuXqDVg73ujIi1+bKrxfdmlqg5PEiwFl27ihHxpKSL+18VMxsUyXcVO9gqaX/elTy/3U6SRmce63rixIl5HM7MqlC0mzjoXcVW7gIuBdYC08Ad7XaMiLGIGImIkeHh4R4PZ2ZVGvTg6umqYkS8NPNZ0t3At0qrkZnVbtC7ij0Fl6RlETGdr74PONBpfxtsniphsyUfXJIeIHv7xpCkKeAzwHWS1pK9reMo8NE+1tHMKrQgHiQYEZtbbL6nD3UxswGRfIvLzBYfB5eZJcfBZWbJcXCZWVLqnqNVhIPLOvJUicUp+auKZrb4uMVlZslxcJlZUjzGZWZJcnCZWXI8OG9myXGLyxasXqZKeJrE4PMYl5klycFlZslxcJlZchxcZpaUFB4kONi1M7NalPmyDEnrJT0naVLStg77vV9SSBrpVqZbXNYX7a4e+sbsNJTVVZS0FNgOvAuYAvZIGo+IQ7P2Ow/4U+DpIuW6xWVmTUpsca0DJiPiSES8CuwENrbY72+AzwM/L1Kog8vMmswhuIZmXvicL6OziloOvNCwPpVvazzWFcDKiCj8mkN3Fc3sNHOcgHoyIjqNSbUq6LUxAUlLgDuBjxSuIA4uM2uhxKuKU8DKhvUVwLGG9fOAy4En8rD8bWBc0o0RMdGuUAeXmTUpcR7XHmC1pEuAF4FNwIdmvoyInwBDDcd9Avhkp9ACj3GZWQtlDc5HxClgK7AbOAw8GBEHJd0u6cZe61fkTdYrgS+TNeF+DYxFxBclXQB8DbiY7G3WH4yIl3utiC0Ofob94Cv7JuuI2AXsmrXt1jb7XlekzCItrlPALRFxGXANcLOkNcA24LGIWA08lq+b2QJQ5gTUfuja4oqIaWA6//yKpMNklzM3Atflu90HPAH8RV9qaWaVGvRbfuY0OC/pYuAKstmtF+WhRkRMS7qw9NqZWeXqbk0VUTi4JJ0LPAR8PCJ+Oof7lEaBUYBVq1b1Ukczq9igB1eh9qCkM8lC6/6IeDjf/JKkZfn3y4DjrX4bEWMRMRIRI8PDw2XU2cz6bNDHuLoGl7La3QMcjogvNHw1DtyUf74J+Gb51TOzOgx6cBXpKl4LfBh4VtK+fNungM8CD0raAjwPfKA/VbTFwlMlBsegdxWLXFX8Dq3vNwK4vtzqmFndUniQoG/5MbMmybe4zGzxcXCZWXIcXGaWlLqvGBbh4DKzJh6cNyuBp0pUyy0uM0uOg8vMkuIxLjNLkoPLzJLj4DKz5PiqopklxWNcZhXoZaqEp0l05uAys+Q4uMwsOQ4uM0uOg8vMkuIHCZpZktziMqtRu6uHvjG7MweXmSXHwWVmSfEEVDNLkgfnzSw5bnGZWXIcXGaWlBTGuLp2ZCWtlPS4pMOSDkr6WL79NkkvStqXLxv6X12zckRE22XmH26rZbHodA4G4XwUGYE7BdwSEZcB1wA3S1qTf3dnRKzNl119q6WZVarM4JK0XtJzkiYlbWvx/SckHZK0X9Jjkt7YrcyuwRUR0xHxvfzzK8BhYHmhGptZkpYsWVJo6UbSUmA78B5gDbC5oeEz4xlgJCLeAnwD+HzX+s3lP0bSxcAVwNP5pq15Su6QdP5cyjKzwVS0tVWwxbUOmIyIIxHxKrAT2Ni4Q0Q8HhE/y1efAlZ0K7RwcEk6F3gI+HhE/BS4C7gUWAtMA3e0+d2opAlJEydOnCh6ODOr0RyCa2jm33e+jM4qajnwQsP6FJ17bFuAf+5Wv0JXFSWdSRZa90fEwwAR8VLD93cD32r124gYA8YARkZGfBOYWQLmMPB+MiJGOhXVYlvLHJD0+8AI8PZuB+0aXMr+C+4BDkfEFxq2L4uI6Xz1fcCBbmWZWRpKvGI4BaxsWF8BHGtxvBuATwNvj4hfdCu0SIvrWuDDwLOS9uXbPkU2yLaWLD2PAh8tUJbZwOvlGfbdfpeaEoNrD7Ba0iXAi8Am4EOzjnUF8A/A+og4XqTQrsEVEd+hdXPP0x/MFqAyHyQYEackbQV2A0uBHRFxUNLtwEREjAN/B5wLfD0PzOcj4sZO5XrmvJk1KXNyaT7Hc9esbbc2fL5hrmU6uMysyaDfJeDgMrMmDi4zS0rd9yEW4eAysyZ+kKDZArJYpkq4xWVmyXFwmVlSPMZlZklycJlZchxcZpYcX1U0s6R4jMtsEVlIUyUcXGaWHAeXmSXHwWVmyXFwmVlSynyQYL84uMysiVtcZpYcB5eZ9TRVos5pEg4uM0uKJ6CaWZI8OG9myXGLy8yS4+Ays6SkMMbVtSMr6WxJ35X0fUkHJf11vv0SSU9L+oGkr0k6q//VNVt4IqLlMhMgrZZ+63TsKuvRTpERuF8A74yItwJrgfWSrgE+B9wZEauBl4Et/aummVUp+eCKzP/lq2fmSwDvBL6Rb78PeG9famhmlZq55afIUpdCR5a0VNI+4DjwKPBD4McRcSrfZQpY3p8qmlnVkm9xAUTEryJiLbACWAdc1mq3Vr+VNCppQtLEiRMneq+pmVVmQQTXjIj4MfAEcA3wBkkzVyVXAMfa/GYsIkYiYmR4eHg+dTWziiQfXJKGJb0h//wbwA3AYeBx4P35bjcB3+xXJc2sWoMeXEXmcS0D7pO0lCzoHoyIb0k6BOyU9LfAM8A9fayn2aLT6zPs56vuUCqia3BFxH7gihbbj5CNd5nZAuN7Fc0sOcm3uMxs8Rn04Brs9qCZVa7owHzRcJO0XtJzkiYlbWvx/evy2wYn89sIL+5WpoPLzJqUFVz5Rb3twHuANcBmSWtm7bYFeDkifge4k+x2wo4cXGbWpMRbftYBkxFxJCJeBXYCG2fts5HstkHIbiO8Xl1SsdIxrr17956U9D/56hBwssrjt+F6nM71OF1q9XjjfA+0d+/e3ZKGCu5+tqSJhvWxiBhrWF8OvNCwPgVcPauM1/aJiFOSfgL8Fh3+eysNroh4beq8pImIGKny+K24Hq6H63G6iFhfYnGtWk6zJ6gV2ec07iqaWT9NASsb1lvdHvjaPvlthK8HftSpUAeXmfXTHmC1sgePngVsAsZn7TNOdtsgZLcR/mt0eTdbnfO4xrrvUgnX43Sux+lcj3nIx6y2AruBpcCOiDgo6XZgIiLGyW4X/IqkSbKW1qZu5arOl06amfXCXUUzS46Dy8ySU0twdbsFoMJ6HJX0rKR9s+ai9Pu4OyQdl3SgYdsFkh5V9takRyWdX1M9bpP0Yn5O9knaUEE9Vkp6XNJhZW+S+li+vdJz0qEelZ4T+c1a3bV7NVK/FrIBuh8CbwLOAr4PrKm6HnldjgJDNRz3bcCVwIGGbZ8HtuWftwGfq6ketwGfrPh8LAOuzD+fB/wX2e0hlZ6TDvWo9JyQzWs6N/98JvA02VOHHwQ25du/BPxxlf+fBmmpo8VV5BaABS0inqR5nkrjbQ+VvDWpTT0qFxHTEfG9/PMrZE/YXU7F56RDPSoVGb9Zq4M6gqvVLQB1vSEogG9L2itptKY6zLgoIqYh+wcEXFhjXbZK2p93JfveZW2UPxngCrJWRm3nZFY9oOJzIr9Zq6M6gmvO0/v76NqIuJLszvWbJb2tpnoMkruAS8le/jsN3FHVgSWdCzwEfDwiflrVcQvUo/JzEvN4s9ZiUEdwFbkFoBIRcSz/8zjwCPU+ivolScsA8j+P11GJiHgp/0fza+BuKjonks4kC4v7I+LhfHPl56RVPeo6J/mx5/xmrcWgjuAqcgtA30k6R9J5M5+BdwMHOv+qrxpve6jtrUkzQZF7HxWck/wRJvcAhyPiCw1fVXpO2tWj6nMiv1mruzquCAAbyK7Y/BD4dE11eBPZFc3vAwerrAfwAFmX45dkLdAtZI/xeAz4Qf7nBTXV4yvAs8B+suBYVkE9fo+s27Mf2JcvG6o+Jx3qUek5Ad5C9uas/WQheWvD39nvApPA14HXVfV3dtAW3/JjZsnxzHkzS46Dy8yS4+Ays+Q4uMwsOQ4uM0uOg8vMkuPgMrPk/D8d1gosTXMHJwAAAABJRU5ErkJggg==\n", 108 | "text/plain": [ 109 | "
" 110 | ] 111 | }, 112 | "metadata": { 113 | "needs_background": "light" 114 | }, 115 | "output_type": "display_data" 116 | } 117 | ], 118 | "source": [ 119 | "plt.imshow(np.dot(np.transpose(U),U),cmap='binary', interpolation='nearest')\n", 120 | "plt.colorbar()\n", 121 | "plt.show()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "# Coding step" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "**Task A :** build a sparse signal corrupted by small Gaussian noise in the cell below" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 12, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "# solution" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "# Decoding step" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "**Task B :** Compute the Haar transform of the noisy signal and plot it. What do you notice?" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 10, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "#solution\n" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "**Task C :** Compare this to what you learned in Exercise 1. Does the math match what we observe?" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 11, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "#solution\n" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "**Task D :** Apply thresholding on the Haar transform and plot the signal reconstruction." 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "#solution" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "**Task E** :Increase the noise level, what happens?" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [] 215 | } 216 | ], 217 | "metadata": { 218 | "kernelspec": { 219 | "display_name": "Python 3", 220 | "language": "python", 221 | "name": "python3" 222 | }, 223 | "language_info": { 224 | "codemirror_mode": { 225 | "name": "ipython", 226 | "version": 3 227 | }, 228 | "file_extension": ".py", 229 | "mimetype": "text/x-python", 230 | "name": "python", 231 | "nbconvert_exporter": "python", 232 | "pygments_lexer": "ipython3", 233 | "version": "3.7.1" 234 | } 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 2 238 | } 239 | -------------------------------------------------------------------------------- /exercises/2019/ex10_old/ex1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import datetime 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | 8 | # Create some noisy data 9 | x_data = np.random.rand(1000, 2).astype(np.float32) 10 | correct_W = [[1, 2, 3], [4, 5, 6]] 11 | correct_b = [11, 12, 13] 12 | correct_W, correct_b = map(lambda l: np.array(l, dtype=np.float32), (correct_W, correct_b)) 13 | noise_level = 0.01 14 | y_data = np.dot(x_data, correct_W) + correct_b + np.random.normal(size=(1000, 3)) 15 | 16 | # Define the symbolic variables 17 | W = tf.Variable(tf.random_uniform(correct_W.shape, -1.0, 1.0)) 18 | b = tf.Variable(tf.zeros(correct_b.shape)) 19 | 20 | # Define the model 21 | y_hat = ... 22 | 23 | # Define the loss 24 | loss = ... 25 | tf.summary.scalar('log loss', tf.log(1.0 + loss)) 26 | 27 | # Define the optimizer 28 | step_size = 0.1 29 | optimizer = tf.train.GradientDescentOptimizer(step_size) 30 | train_op = optimizer.minimize(loss) 31 | 32 | # Initialize the tensorflow session 33 | init = tf.initialize_all_variables() 34 | 35 | with tf.Session() as sess: 36 | sess.run(init) 37 | summary_op = tf.summary.merge_all() 38 | summary_writer = tf.summary.FileWriter("train/ex1_{}".format(datetime.datetime.now().strftime("%s")), sess.graph) 39 | 40 | # Call the train_op many times, each time it will update the variables W and b according to their gradients 41 | for step in range(201): 42 | _, loss_value, summary_str = sess.run([train_op, loss, summary_op]) 43 | summary_writer.add_summary(summary_str, step) 44 | print("iteration:", step, "loss:", loss_value) 45 | 46 | print("learned W:\n{}".format(sess.run(W))) 47 | print("learned b:\n{}".format(sess.run(b))) -------------------------------------------------------------------------------- /exercises/2019/ex10_old/ex1_solution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | import datetime 6 | 7 | # Create some noisy data 8 | x_data = np.random.rand(1000, 2).astype(np.float32) 9 | correct_W = [[1, 2, 3], [4, 5, 6]] 10 | correct_b = [11, 12, 13] 11 | correct_W, correct_b = map(lambda l: np.array(l, dtype=np.float32), (correct_W, correct_b)) 12 | noise_level = 0.01 13 | y_data = np.dot(x_data, correct_W) + correct_b + np.random.normal(size=(1000, 3)) 14 | 15 | # Define the symbolic variables 16 | W = tf.Variable(tf.random_uniform(correct_W.shape, -1.0, 1.0)) 17 | b = tf.Variable(tf.zeros(correct_b.shape)) 18 | 19 | # Define the model 20 | y_hat = tf.matmul(x_data, W) + b 21 | 22 | # Define the loss 23 | loss = tf.reduce_mean(tf.square(y_hat - y_data)) 24 | tf.summary.scalar('log loss', tf.log(1.0 + loss)) 25 | 26 | # Define the optimizer 27 | step_size = 0.1 28 | optimizer = tf.train.GradientDescentOptimizer(step_size) 29 | train_op = optimizer.minimize(loss) 30 | 31 | # Initialize the tensorflow session 32 | init = tf.initialize_all_variables() 33 | with tf.Session() as sess: 34 | sess.run(init) 35 | 36 | summary_op = tf.summary.merge_all() 37 | summary_writer = tf.summary.FileWriter("train/ex1_{}".format(datetime.datetime.now().strftime("%s")), sess.graph) 38 | 39 | # Call the train_op many times, each time it will update the variables W and b according to their gradients 40 | for step in range(201): 41 | _, loss_value, summary_str = sess.run([train_op, loss, summary_op]) 42 | summary_writer.add_summary(summary_str, step) 43 | print("iteration:", step, "loss:", loss_value) 44 | 45 | print("learned W:\n{}".format(sess.run(W))) 46 | print("learned b:\n{}".format(sess.run(b))) -------------------------------------------------------------------------------- /exercises/2019/ex10_old/ex2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import datetime 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | 8 | # Create some noisy data 9 | x_data = np.random.rand(1000, 2).astype(np.float32) 10 | correct_W = [[1, 2, 3], [4, 5, 6]] 11 | correct_b = [11, 12, 13] 12 | correct_W, correct_b = map(lambda l: np.array(l, dtype=np.float32), (correct_W, correct_b)) 13 | noise_level = 0.01 14 | y_data = np.dot(x_data, correct_W) + correct_b + np.random.normal(size=(1000, 3)) 15 | 16 | # Define the symbolic variables 17 | W = tf.Variable(tf.random_uniform(correct_W.shape, -1.0, 1.0)) 18 | b = tf.Variable(tf.zeros(correct_b.shape)) 19 | 20 | # Define the data placeholders 21 | batch_size = 10 22 | x_ph = tf.placeholder(tf.float32, shape=(batch_size, 2)) 23 | y_ph = tf.placeholder(tf.float32, shape=(batch_size, 3)) 24 | 25 | # Define the model (using placeholders) 26 | y_hat = ... 27 | 28 | # Define the (stochastic!) loss 29 | loss = ... 30 | tf.summary.scalar('log loss', tf.log(1.0 + loss)) # attention: this is the stochastic loss, i.e. it will be noisy 31 | 32 | # Define the optimizer 33 | step_size = 0.1 34 | optimizer = tf.train.GradientDescentOptimizer(step_size) 35 | train_op = optimizer.minimize(loss) 36 | 37 | # Initialize the tensorflow session 38 | init = tf.initialize_all_variables() 39 | 40 | with tf.Session() as sess: 41 | sess.run(init) 42 | summary_op = tf.summary.merge_all() 43 | summary_writer = tf.summary.FileWriter("train/ex2_{}".format(datetime.datetime.now().strftime("%s")), sess.graph) 44 | 45 | # Call the train_op many times, each time it will update the variables W and b according to their gradients 46 | for step in range(201): 47 | 48 | # Determine the minibatch 49 | start_index = (batch_size * step) % x_data.shape[0] 50 | stop_index = start_index + batch_size 51 | 52 | # Get the minibatch data 53 | x_minibatch = x_data[start_index:stop_index] 54 | y_minibatch = y_data[start_index:stop_index] 55 | 56 | feed_dict = { 57 | x_ph: x_minibatch, 58 | y_ph: y_minibatch 59 | } 60 | 61 | _, loss_value, summary_str = sess.run([train_op, loss, summary_op], feed_dict=feed_dict) 62 | summary_writer.add_summary(summary_str, step) 63 | print("iteration:", step, "loss:", loss_value) 64 | 65 | print("learned W:\n{}".format(sess.run(W))) 66 | print("learned b:\n{}".format(sess.run(b))) -------------------------------------------------------------------------------- /exercises/2019/ex10_old/ex2_solution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | import datetime 6 | 7 | # create some noisy data 8 | x_data = np.random.rand(1000, 2).astype(np.float32) 9 | correct_W = [[1, 2, 3], [4, 5, 6]] 10 | correct_b = [11, 12, 13] 11 | correct_W, correct_b = map(lambda l: np.array(l, dtype=np.float32), (correct_W, correct_b)) 12 | noise_level = 0.01 13 | y_data = np.dot(x_data, correct_W) + correct_b + np.random.normal(size=(1000, 3)) 14 | 15 | 16 | # define the symbolic variables 17 | W = tf.Variable(tf.random_uniform(correct_W.shape, -1.0, 1.0)) 18 | b = tf.Variable(tf.zeros(correct_b.shape)) 19 | 20 | # define the data placeholders 21 | batch_size = 10 22 | x_ph = tf.placeholder(tf.float32, shape=(batch_size, 2)) 23 | y_ph = tf.placeholder(tf.float32, shape=(batch_size, 3)) 24 | 25 | # define the model (using placeholders) 26 | y_hat = tf.matmul(x_ph, W) + b 27 | 28 | # define the (stochastic!) loss 29 | loss = tf.reduce_mean(tf.square(y_hat - y_ph)) 30 | tf.summary.scalar('log loss', tf.log(1.0 + loss)) # attention: this is the stochastic loss, i.e. it will be noisy 31 | 32 | # define the optimizer 33 | step_size = 0.1 34 | optimizer = tf.train.GradientDescentOptimizer(step_size) 35 | train_op = optimizer.minimize(loss) 36 | 37 | # initialize the tensorflow session 38 | init = tf.initialize_all_variables() 39 | with tf.Session() as sess: 40 | sess.run(init) 41 | 42 | summary_op = tf.summary.merge_all() 43 | summary_writer = tf.summary.FileWriter("train/ex2_{}".format(datetime.datetime.now().strftime("%s")), sess.graph) 44 | 45 | # call the train_op many times, each time it will update the variables W and b according to their gradients 46 | for step in range(201): 47 | # determine the minibatch 48 | start_index = (batch_size * step) % x_data.shape[0] 49 | stop_index = start_index + batch_size 50 | 51 | # get the minibatch data 52 | x_minibatch = x_data[start_index:stop_index] 53 | y_minibatch = y_data[start_index:stop_index] 54 | 55 | feed_dict = { 56 | x_ph: x_minibatch, 57 | y_ph: y_minibatch 58 | } 59 | 60 | _, loss_value, summary_str = sess.run([train_op, loss, summary_op], feed_dict=feed_dict) 61 | summary_writer.add_summary(summary_str, step) 62 | print("iteration:", step, "loss:", loss_value) 63 | 64 | print("learned W:\n{}".format(sess.run(W))) 65 | print("learned b:\n{}".format(sess.run(b))) -------------------------------------------------------------------------------- /exercises/2019/ex11/ex3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Understanding compressed sensing with a simple example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 8, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import matplotlib.pyplot as plt\n", 17 | "import numpy as np\n", 18 | "import sklearn.linear_model as linear_model" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "First, we generate the matrix $U$ corresponding to the dictionary of Haar wavelets.\n", 26 | "\n", 27 | "same as last week :)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 9, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "def haarMatrix(n):\n", 37 | " if n > 2:\n", 38 | " h = haarMatrix(n / 2)\n", 39 | " else:\n", 40 | " return np.array([[1, 1], [1, -1]])\n", 41 | "\n", 42 | " # calculate upper haar part\n", 43 | " h_n = np.kron(h, [1, 1])\n", 44 | " \n", 45 | " # calculate lower haar part \n", 46 | " h_i = np.sqrt(n/2)*np.kron(np.eye(len(h)), [1, -1])\n", 47 | "\n", 48 | " # combine parts\n", 49 | " h = np.vstack((h_n, h_i))\n", 50 | " return h" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 10, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAATwAAAD8CAYAAADqmhgGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAHsxJREFUeJzt3X2sXdV55/Hv715jKDjE4AvUsp3YScwMtCqQXhEQ02kKeGQylUGIdEB5IZUZCyUhidKXAWWE1HRGIo2mRGRQUgcyQGhLSEqKxTAljoEZjcWLnZoQwDU2nkzt4QrHjoEEhOH6PvPHXgeOr8+9Z5179svaez+faOmevc8++6wTyJO11rPW2jIznHOuDUaqroBzzpXFA55zrjU84DnnWsMDnnOuNTzgOedawwOec641Cgl4klZL2iFpl6Tri/gO55wblPKehydpFHgeWAXsBbYAV5nZc7l+kXPODaiIFt65wC4z221mbwL3AJcW8D3OOTeQeQXccwmwp+t4L/Ch2T6waGyRvfc97wXgMIcY5dgCqpU58MY/s+i49xR2/257Xv4nli38l6V8l3P9bNu2bb+ZnTLXz5+w7Dg7/MZU1LWH9r/1kJmtnut3FaWIgKce547qN0taB6wDWLZsKZs3bwbgVdvNiXpfAdXKfGf7Z/jEGbcWdv9uz6+5ndM3rC3lu8r2zRM+zrWv3V11NdwAjj/++P87zOcPvzHF8svGoq7dcdtE3IUlK6JLuxdY1nW8FHhx+kVmtt7Mxs1sfNHYogKq0duaJ8sLQE0NdoAHuxYSoJG4kqoiqrYFWClphaT5wJXAhgK+Z07effUHS/2+6+47v9Tvc64wgtFRRZVU5R7wzGwS+CzwELAduNfMns37e+riujuuqboKhTrnc++vugquJAJGRuJKqooYw8PMHgQeLOLeddPkbi3AtlteqLoKriwCjaTbeotRSMBzzjVTyq23GDWv/tx8Z/tnSv2+po/jffOEj1ddBVcST1q4vpo+jucZ23aQYESKKqlqZZd2zZNr4Yzyvq/p43iuPUZrHjFa2cIre2pKW3jGttmk+ndpax6vXUo8Y9t8IzXP0iYci5ul6YkL1w5SXEmVB7ySND1x0eEZ2+aS8p14HLNvpqQ/kPScpGcl/c2wvyG5gDd2YMkRx3sObZ7x2i0H/1vR1clNWxIX1752t4/lNZXE6Ly40v9WGgVuBS4BzgSuknTmtGtWAjcAF5jZbwBfGPYnJBfwmqwt3Vofy2uuHJMWMftm/nvgVjM7CGBm+4atvwe8ErWlWwuesW0ikes8vF77Zi6Zds3pwOmSNkt6XNLQ++vVOuB98PlL5vzZsldbQHu6teCtvEYabFrKmKStXWXd0Xc7yvR9M+cBK4EPA1cBt0laOMxP8GkpzrloA6yl3W9m47O8H7Nv5l7gcTN7C/g/knaQBcAt0bWYptYtvGGUuRFot7aM44FnbJsm2wBUUSVCzL6Zfw/8HoCkMbIu7u5hfkPyAW/FxFkzvjf6oV+f832rWm3x9csfq+R7q+AZ24YRjI7GlX5m2jdT0pclrQmXPQQckPQc8AjwJ2Z2YJif4F1aVygfy2uObAPQ/GYV99o308xu7HptwBdDyYUHPOdcHKW9TjZGzavv6sDH8ppjZERRJVWtDnhVTE2BdiUuwPfLawoRNwcv5f3wWh3wqtKmCcgd3sqrPwnmzRuJKqlKt2YlqGpqSpsmIHd4xrYZRjQSVVKVXM3eHDv2iOPJ5QtmvX6YDQSq3Ai0bd1a8Ixt7Slu/M7H8NxR2titdfXWmZbiAc8NrI3dWvCxvLqre9LC5+G5UnnGtr6yDUDr3Uaqd+0ZbscUqG5qCrRzHA+8lVdbEvNGR6JKqtKtWUmqytRCu9bVdvOMbT01YQzPu7SuEp6xrae6d2lrH/CG2TEF/Bm1zkWTUMIJiRj1Dteu1nwsr16a0KX1gFextiYuwDO2dVT3gFf7Lm3dtTVx4epHEvNidvdMmLfwXOU8Y1sfdZ947AEvAW3u1oJnbOsim3hc7y5t34An6duS9kl6puvcyZI2StoZ/p4UzkvSLZJ2SXpakqdAI/i6WlcXIyMjUSVVMTW7A5j+ANzrgU1mthLYFI4BLiF7jNpKYB3wjTwquefQ5lnfH2bHFKh2tQW0d11tN8/Ypq8VG4Ca2f8CfjHt9KXAneH1ncBlXefvsszjwEJJi/OqrGsuz9jWQIs3AD3NzCYAwt9Tw/klwJ6u6/aGc0eRtK7zVPID+4d68trQqlxe1vH8mturroJzsxK+Aeh0vdqy1utCM1tvZuNmNr5obNFQXzrsBgIprLbwbm3GM7YJy3kDUEmrJe0IY/7Xz3LdFZJM0viwP2GuAe+lTlc1/N0Xzu8FlnVdtxR4ce7Vc23jGdt0ZS280ajS917SKHAr2bj/mcBVks7scd27gM8BT+TxG+Ya8DYAV4fXVwP3d53/ZMjWnge80un6OufqTnlmac8FdpnZbjN7E7iHLAcw3Z8DfwG8kccviJmW8rfAY8C/kLRX0lrgJmCVpJ3AqnAM2VPEdwO7gG8Bn86jkq5dPGObJgGjGo0qEfqO90s6B1hmZg/k9Rv6Li0zs6tmeOuiHtcakPscjxUTZzG5fOb3h90xBbKpKZ8449ah7zOM6+4735ea4RnbZEnMG50fe/WYpK1dx+vNbH333Xp85u3xfkkjwM3Apwat5mx8LW2w5sm1cEa1dfBg51Km0KWNtN/MZksy9Bvvfxfwm8CjYUuqXwc2SFpjZt2BdCDp5o9d63nGNjFSnl3aLcBKSSskzQeuJMsBAGBmr5jZmJktN7PlwOPAUMEOPOC9LYWpKeDrart5xjYt2X54o1GlHzObBD4LPARsB+41s2clfVnSmqJ+g3dpE3PdHdfA5VXXwrlelOukYjN7kCzR2X3uxhmu/XAe3+ktvMT4BOQjecY2HdluKfm08KqSVMB7PUzZe9V2H3F+cvmCKqpTGe/WvsOfcJYOIY4ZOSaqpCqpgOcyvl3UkXwsLxXyFp7Ln3drj+atvAQov6VlVfGkhasFb+VVT4jRhFtvMbyF16XqjUCdS5lvD+VciTxjWzGNMG90flRJlQe8LilsBNrhG4IezTO21comHjf/mRa1MOxzLSCd1RbgiYuZ+FhelXJdWlaJxgQ81x7eyquGGpCl9YCXMJ+A3Ju38qri8/BcgXwCskuJZ2kTMuyDfDpSmpri43gz84xt+SRxzOj8qJKqxgS8vKSUqQXv1s7EM7ZVkLfwXLG8WzszH8srX92TFkkuLRs7sIQ3xwb7TB7PtYC0pqaAd2tdOoSSDmYxvIXnas3H8kokIY1GlVQl2cJzLpY/4axc3sJzrmLeyiuHEPM0P6qkygNeDylNTQFfV9uPZ2zL0RnD86RFw6TwjNpunrjozzO2ZVDS43MxPOA556KNUO+A16gubR47pkB6U1PAJyDH8LG8Ykn5TjyWtFrSDkm7JF3f4/0vSnpO0tOSNkl677C/oVEBr8l8AnJ/nrEtmpg3Mj+q9L1T1je+FbgEOBO4StKZ0y7bBoyb2W8B3wf+Ythf4AGvJnwcz1VN5DoP71xgl5ntNrM3gXuAS7svMLNHzOz1cPg4sHTY39CogJfXBgKp8m5tHM/YFmeE0agCjEna2lXWTbvVEmBP1/HecG4ma4H/MWz9PWlRI9fdcQ1cXnUt0ucZ26IMtLRsv5mNz3qzo1nPC6WPA+PA78Z++Uw84NWId2tdlTo7HudkL7Cs63gp8OLR36mLgS8Bv2tmh4b90kZ1aZ3r8IxtEXKdeLwFWClphaT5wJXAhiO+TToH+CtgjZnty+MX9A14kpZJekTSdknPSvp8OH+ypI2Sdoa/J4XzknRLSDU/LSl6jseJv8y2SHlz7Nij3ttzaHPfz+e1Ywqkt9rCDcYztvkTYlTzo0o/ZjYJfBZ4CNgO3Gtmz0r6sqQ14bKvAguA70l6StKGGW4XLaaFNwn8kZmdAZwHfCakj68HNpnZSmBTOIYszbwylHXAN4atZBVS2wi0wxMXrjr5zsMzswfN7HQze7+Z/edw7kYz2xBeX2xmp5nZ2aGsmf2O/fWtmZlNmNk/hte/JIvGS8hSyHeGy+4ELguvLwXusszjwEJJi4etqMv4fLzBeMY2P014atlASQtJy4FzgCeA08xsArKgKOnUcNlM6eaJYStbphRXW4AnLgblGds81X8D0OiAJ2kB8HfAF8zsValXVjm7tMe5o9LNYV7OOoBly4aeT+icK4HasJZW0jFkwe6vzey+cPqlTlc1/O1kUaLSzWa23szGzWx80diiuda/lXwcbzCesc2HGMltaVlVYrK0Am4HtpvZX3a9tQG4Ory+Gri/6/wnQ7b2POCVTtfX5cPH8QbjGdv8iJGokqqYml0AfAK4MKSGn5L0EeAmYJWkncCqcAzwILAb2AV8C/h0HhVdMXFW1HV57ZgC6U5N8XE8Vx1FljT1HcMzs//NzL/goh7XG5BmpBhQahuBuuGc87n3exJjKEq69RbDl5a51vBgN5ys7ZZu6y1GvcN1D3numJLq1BTwxIWrykhkSVO6NXOz8sTF3HjGdhj+EB9XEU9czI1nbIflXdpSTC5fEHVdnhsIQLqZWvBurStfG6aluER5t3bufI3tXCj6P6nygNdHqrumgHdrh+EZ27mqd9LCx/Ccc1F8WkoLpDw1BXwcbxiesR2UEKNRJVUe8Gru65c/VnUVauva1+72sbwB1T1p4V1a12o+ljcY79I6V3PeyoslPGnhXM15Ky9OlrRIN5jFqHftHeCJC1cSCWk0qqTKA14D+ATk4XnGNo5PPG6BlJeXgU9AzoNnbGMo1yytpNWSdoRnWF/f4/1jJX03vP9EeIjYUDzgRUh5tUWHd2uH52N5MfLZ8VhZv/dWsudYnwlcFZ533W0tcNDMPgDcDHxl2Np7wGsI79a6MuTYwjsX2GVmu83sTeAesmdad+t+9vX3gYs0y+MSYyQR8EZshEMcZPLEY94+96rtPuq6PYc2R90vz+daQPqrLcC7tXnxsbyZdZaWRY7hjUna2lXWTbvdTM+v7nmNmU0CrwBDPeLQp6U418X3y5uFKStx9pvZ+Czvxzy/OuoZ14NIooXn8uHjePnwVt5MDFlciRDz/Oq3r5E0D3g38IthfkEjA16ez7XoSD1TC76uNi+esZ2FRZb+tgArJa2QNB+4kuyZ1t26n319BfBweCrinHmX1rkePGM7g6HCTddtzCYlfRZ4CBgFvm1mz0r6MrDVzDYAtwPfkbSLrGV35bDf6wEvkj+j1jlguAbWtFvZg8CD087d2PX6DeCjuX0hDe3SOpcHH8ubxkCRJVW1CngrJs6Kui7vB/lAPaamgCcu8uQZ2x6mLK4kqlYBz/XnE5Dz5a28afJLWlTCA17D+ATkfHnGtlk84A2gDlNTwLu1efOMbWDkOQ+vEh7wGsi7ta4w3qVtjzrsmgLerS2Cj+UFNU9a+Dw85yJ4xjaT8pSTGMm28MYOTN84ASaXL4j+fN47pkB9pqYAPL/m9qqr4JomtjubcFBMNuC54Xi3thitz9g2PeBJOk7Sk5J+IulZSX8Wzq8I2y7vDNswzw/nc9+W2blUtDtja9nSspiSqJgW3iHgQjM7CzgbWC3pPLLtlm82s5XAQbLtmKGAbZnnoogdU5xrM9GCpWWW+VU4PCYUAy4k23YZsm2YLwuvc9+W2bmUtDpj24IWHpJGJT0F7AM2Ai8AL4dtl+HI7ZmjtmWWtK6z/fPPD+wf7le4nnwCcjFam7FtS9LCzA6b2dlku5KeS++Nkjo/M2pbZjNbb2bjZjZ+yqKx2PpGK2IDAajPagvwDUFd/hrfpe1mZi8DjwLnAQvDtstw5PbMuW/L7FyKWpmxbXqXVtIpkhaG178GXAxsBx4h23YZsm2Y7w+vc9+WOSV1WW3R4d3a4rQyY1vzLm3MSovFwJ3hwbkjwL1m9oCk54B7JP0nYBvZdsxQwLbMbu6uu+MauLzqWrhGMJJeNhajb8Azs6eBc3qc3002njf9fO7bMqekTqstwCcgF+2bJ3y8RUkMww43POA552bWnmAXhudq3sLzpWUt4OtqXW6anrSoyptjxw59jyI2EIB6TU0B79aWoS0ZW5uyqDIMSSdL2hiWrW6UdFKPa86W9FhY7vq0pH8Xc+9kA55zddKKjG0naVH8fnjXA5vCstVN4Xi614FPmtlvAKuBr3Vmk8zGA94c1G1qinP5yJIWMWVI3ctTu5etvlMTs+fNbGd4/SLZKrBT+t3YA55zOWnDGlsziypDOs3MJsL3TQCnznaxpHOB+WRLXmfV6IBX1I4pdZuaAj4BuQyNz9gO1qUd66yVD2Vd960k/UjSMz3KpYNUSdJi4DvAH5rZVL/rfVpKS/i6WpeHARIS+81sfMb7mF0803uSXpK02MwmQkDbN8N1JwL/HfiPZvZ4TKUa3cJzrgqNztiWMy2le3lq97LVt4UNh38A3GVm34u9caMDXlE7pkD9pqaAd2vL0tSMrVnclJQcJiffBKyStBNYFY6RNC7ptnDNHwD/GviUpKdCObvfjb1L2yK+rtYNq4ylZWZ2ALiox/mtwDXh9d3AwIOmjW7hFamOU1N8AnJ5GpmxLW8eXmE84M1RHTO14N3aslz72t3NHMvzgOfq5Lo7rqm6Cq3RxLG8kubhFcYDXst4t7ZcjWrlGTAVWRLlSQvnCtSsVp5hhxOOZhFq18Lbc2hz1VVwrp2snN1SilS7gOdc3TQqY+tJC1c3viFouZqSsTU8adFqdVxtAZ64qEIjxvIssnWXcAvPkxbOuWietGixOq626PAJyOWr/VieGVNvHY4qqfKA11I+Abl8td8vz4CpqbiSqNoFvBUTZw10fVEP8oH6Li8DH8erSp1beQZlbfFemNoFPJcf79aWr9YZWzNsaiqqpMoDXot5t7Yadc7Y2uGpqJKqxge8op5r0VHXqSng3Vo3IB/Dc87NRS3H8hqQpfV5eENa8+RaOKPqWri6qWPG1iDp8bkYybTwjn/9xKjrJpcvGOi+RT7XAuqdqQVPXLgBmMHhqbiSqGQCnquGJy6qVbeMbd13S/Eubct54qJatcrYWv2XlnnAc87FMcMSTkjEiO7SShqVtE3SA+F4haQnJO2U9N3wYFwkHRuOd4X3lxdT9XTUeWoK+Dhe1eqUsS1j4rGkkyVtDLFlo6STZrn2REn/T9J/jbn3IGN4nwe2dx1/BbjZzFYCB4FO32gtcNDMPgDcHK5zCfNxvGrVJWNrZmVNPL4e2BRiy6ZwPJM/B/5n7I2jAp6kpcC/BW4LxwIuBL4fLrkTuCy8vjQcE96/KFzfWHXeNQV8HM/FK2lpWXcM6Y4tR5D028BpwA9jbxzbwvsa8Ke88zyiRcDLZjYZjvcCS8LrJcAegPD+K+H66ZVdJ2mrpK0/P7A/tr5zUuQGAlD/qSng3doUJJ+xNeCwxZXhnGZmEwDh76nTL5A0AvwX4E8GuXHfgCfp94F9Zvbj7tM9LrWI9945YbbezMbNbPyURWNRlXXF8W5t9dLP2A60ecBYp0ETyrruO0n6kaRnepRLIyvzaeBBM9szyC+IydJeAKyR9BHgOOBEshbfQknzQituKfBiuH4vsAzYK2ke8G7gF4NUypXPu7WuLzOm3prsf11mv5mNz3wru3im9yS9JGmxmU1IWgzs63HZ+cDvSPo0sACYL+lXZjbbeF//Fp6Z3WBmS81sOXAl8LCZfQx4BLgiXHY1cH94vSEcE95/2FJ+qodzCUk6Y1tel7Y7hnTHlneqYvYxM3tPiEt/DNzVL9jBcCst/gPwRUm7yMboOo/Cuh1YFM5/kdkzLKUoescU5/KScsa2s5a2hKTFTcAqSTuBVeEYSeOSbhvmxgNNPDazR4FHw+vdwLk9rnkD+OgwlXLVuO6+8/n65Y9VXQ2XKjNssviVFmZ2ALiox/mtwFGDzWZ2B3BHzL19La17mycu0pFkxtZ8A9BK7Dm0eaDri94xBeq/2gI8cZGSJDO2BjY5FVVS5WtpnXNRzIypQ9FZ2iTVsoWXorqvtujwCcjpSC5jG8bw6tzC84DnjuDjeOlIMWNb98c0epc2J01YXgY+judmEcbw6sxbeM4lLpmMbXm7pRTGW3jOJS6VjK1NGVNveNKidCsmzhr4M0XvmALNmJoCnrhwM6t7C6+WAc8VyxMX6UkiY9uAeXge8HLUlKkpnrhIz7Wv3V39WJ5PS6mPMjYQaEqmFrxbm6Kqx/IM79K6hvJubZoqbeU1oIXnWVrXk3dr01RpK2+K2i8t84DnnItUzvZQRUq6S/uq7e55fnL5goHvVcaOKdCcqSng43ipqipja1b/pWVJB7w6akqmFvDNQBNVWca2AdNSvEvrXA1VM5ZnSWdgY3gLL2dNmpri0lZ6K89beM65qpTdyrMp3wDUNZwnLlw3n3jsGs0nIKet3IytMWVTUSVVHvDcrHwCctrKzNiawZRZVBmGpJMlbZS0M/w9aYbr3iPph5K2S3pO0vJ+9/aA5/rybm3ayhzLO2xTUWVI1wObzGwlsCkc93IX8FUzO4PsGdn7+t3YA57ry7u1DsAwJqcOR5UhXQrcGV7fCVw2/QJJZwLzzGwjgJn9ysxe73djD3gFaNJqC/BubR2UNZY3wBjemKStXWXdAF9zmplNAIS/p/a45nTgZUn3Sdom6auSRvvd2KelONcAZTzhzMwGSUjsN7Pxmd6U9COg13rPL0Xefx7wO8A5wD8D3wU+Bdw+24e8hVeAJi0v6/BxvPSV0crLK2lhZheb2W/2KPcDL0laDBD+9hqb2wtsM7PdZjYJ/D3Qd9Z/0gFv7MCSGd/bc2jzwPcr47kW0MzVFr6uNn1FZ2yNgbq0w9gAXB1eXw3c3+OaLcBJkk4JxxcCz/W7cdIBzzk3mGIztlZWlvYmYJWkncCqcIykcUm3AZjZYeCPgU2SfgoI+Fa/G/sYnnMuihl5ZGAjvscOABf1OL8VuKbreCPwW4Pc21t4zjVMcWN5vtKiVsp4kE9H06amgCcu6qKojG02hlf8SosieZe2IGueXAtnVF2LfHniwqXceosR1cKT9DNJP5X0lKSt4VzP9W7K3CJpl6SnJTUvZelcDeSdsbXykhaFGaRL+3tmdnbXZMKZ1rtdAqwMZR3wjbwq223FxFkDf6as51pAM6emgHdr6yT3jK2VNi2lMMOM4c203u1S4C7LPA4s7EwidPXn62rbq8S1tIWJDXgG/FDSj7vWxM203m0JsKfrs3vDuSNIWtdZZ/fzA/vnVntXOl9XWy95ZmxLnHhcmNikxQVm9qKkU4GNkv5plmvV49xRaRszWw+sB/jtcz6YblrHuRrLO2ObcgY2RlQLz8xeDH/3AT8g23tqpvVue4FlXR9fCryYV4Vd9Z5fM+v6bNdQZi1IWkg6QdK7Oq+BfwM8w8zr3TYAnwzZ2vOAVzpdX9cM3q2tn7wytm3o0p4G/EBS5/q/MbN/kLQFuFfSWrLtWT4arn8Q+AiwC3gd+MPca+2cG8i2W17g+NuOH+oenaRFnckS6JNL+iWwo+p6RBoD6pBlqUs9oT51rUs9oXdd32tmp/S6OIakfwj3jbHfzFbP9buKkkrA2zrbZoEpqUtd61JPqE9d61JPqFddy9SqtbTOuXbzgOeca41UAt76qiswgLrUtS71hPrUtS71hHrVtTRJjOE551wZUmnhOedc4SoPeJJWS9oRtpOa6QnjZdXl25L2SXqm61yS22BJWibpEUnbJT0r6fMp1lfScZKelPSTUM8/C+dXSHoi1PO7kuaH88eG413h/eVl1LOrvqPhOacPJF5P37JtDioNeMoenHsr2ZZSZwJXKXuieFXuAKbPHap0G6xZTAJ/ZGZnAOcBnwn/3aVW30PAhWZ2FnA2sDqswPkKcHOo50Ggs3xjLXDQzD4A3ByuK9Pnge1dx6nWExLbsq0WzKyyApwPPNR1fANwQ8V1Wg4803W8A1gcXi8GdoTXfwVc1eu6iup9P9kTnpKtL3A88I/Ah8gmxc6b/u8B8BBwfng9L1ynkuq3lCxQXAg8QLYRRnL1DN/5M2Bs2rlk/9mnUqru0kZtJVWxobbBKkPoTp0DPEGC9Q3dxKfINpjYCLwAvGzZA5Sn1+Xteob3XwEWlVFP4GvAnwKdxaCLEq0nFLBlWxtU/UyLqK2kEpVE3SUtAP4O+IKZvRrWPPe8tMe5Uupr2TNEz5a0kGy3nV5P++jUpZJ6Svp9YJ+Z/VjShyPqUvU//9y3bGuDqlt4ddhKKtltsCQdQxbs/trM7gunk62vmb0MPEo25rhQUuf/cLvr8nY9w/vvBn5RQvUuANZI+hlwD1m39msJ1hPwLdvmquqAtwVYGTJh84ErybaXSkmS22Apa8rdDmw3s79Mtb6STgktOyT9GnAxWVLgEeCKGerZqf8VwMMWBp6KZGY3mNlSM1tO9u/hw2b2sdTqCb5l21CqHkQk20rqebJxnS9VXJe/BSaAt8j+X3Et2bjMJmBn+HtyuFZkGeYXgJ8C4yXX9V+RdUueBp4K5SOp1ZfsyfDbQj2fAW4M598HPEm2jdj3gGPD+ePC8a7w/vsq+Pfgw8ADqdYz1OknoTzb+d9Nav/sUyy+0sI51xpVd2mdc640HvCcc63hAc851xoe8JxzreEBzznXGh7wnHOt4QHPOdcaHvCcc63x/wEEnpnPxqjgngAAAABJRU5ErkJggg==\n", 61 | "text/plain": [ 62 | "
" 63 | ] 64 | }, 65 | "metadata": { 66 | "needs_background": "light" 67 | }, 68 | "output_type": "display_data" 69 | } 70 | ], 71 | "source": [ 72 | "D = 512 #size of the dictionary. MUST be a power of 2.\n", 73 | "U = np.transpose(haarMatrix(D)/np.sqrt(D)) #computing dictionary matrix. \n", 74 | "plt.imshow(U, cmap='PiYG', interpolation='nearest')\n", 75 | "plt.colorbar()\n", 76 | "plt.show()" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "**TASK A** : generate a random signal $x = Uz$ for some $z$ s.t. $\\|z\\|_0\\ll D$ and project it to $M\\ll D$ random directions, which you encode in a matrix $W$. Call the resulting vector $Wx =: y\\in\\mathbb{R}^M$ as in the lecture. Plot $x,z,y$." 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 11, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "#generating the sparse signal\n", 93 | "\n", 94 | "#doing the random projection\n", 95 | "\n", 96 | "#plotting\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "**TASK B** : generate the matrix $\\Theta = W U$ you saw in the lecture. " 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 12, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "#plotting Theta" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "**TASK C** : We have that $y = \\Theta z$ and we want to solve for $z$. Is the solution unique?\n", 120 | "\n", 121 | "Solve the problem using linear least squares, that is\n", 122 | "$$\\hat z_{LS} = \\text{argmin}_{z} \\|y - \\Theta z\\|^2,$$\n", 123 | "\n", 124 | "and compare $\\hat z_{LS}$ to $z$. What do you notice? How is the signal reconstruction?\n", 125 | "\n", 126 | "*HINT : check documentation at* https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 13, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "#fitting the model, plotting the result" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "**TASK D** : Get a better solution by solving a different (regularized) optimization problem. Compare the result with the groud truth and reconstruct the signal $x$. \n", 143 | "\n", 144 | "Comment your solution, why is this better? How does the solution change by changing the regularizer power?\n", 145 | " \n", 146 | "*HINT : induce sparsity.. remember basic statistics!*" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 14, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "#fitting the model, plotting the result\n" 156 | ] 157 | } 158 | ], 159 | "metadata": { 160 | "kernelspec": { 161 | "display_name": "Python 3", 162 | "language": "python", 163 | "name": "python3" 164 | }, 165 | "language_info": { 166 | "codemirror_mode": { 167 | "name": "ipython", 168 | "version": 3 169 | }, 170 | "file_extension": ".py", 171 | "mimetype": "text/x-python", 172 | "name": "python", 173 | "nbconvert_exporter": "python", 174 | "pygments_lexer": "ipython3", 175 | "version": "3.7.1" 176 | } 177 | }, 178 | "nbformat": 4, 179 | "nbformat_minor": 2 180 | } 181 | -------------------------------------------------------------------------------- /exercises/2019/ex11_old/features_cosmology_project.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import os.path 4 | import random 5 | import csv 6 | import sys 7 | 8 | import PIL.Image 9 | import numpy as np 10 | import sklearn.linear_model as sklm 11 | import sklearn.model_selection as skms 12 | import sklearn.preprocessing as skpp 13 | import sklearn.metrics as skmet 14 | 15 | def csv_to_dict(csv_path): 16 | with open(csv_path,'r') as fp: 17 | csv_fp=csv.reader(fp) 18 | next(csv_fp) 19 | d = dict(filter(None, csv_fp)) 20 | return d 21 | 22 | def extract_feats(img_arr): 23 | hist,_=np.histogram(img_arr,bins=10) 24 | 25 | # Consider more sophisticated features here: Frequency domain energy, ROI histograms, shape descriptors, etc... 26 | 27 | return hist 28 | 29 | if __name__=="__main__": 30 | 31 | try: 32 | data_path=os.environ["COSMOLOGY_DATA"].strip() 33 | except KeyError: 34 | print("ERROR: Provide data path via environment...") 35 | sys.exit(1) 36 | 37 | # Parameters 38 | feat_size=10 39 | train_ratio=0.7 40 | 41 | # Paths 42 | labeled_path=os.path.join(data_path,"labeled") 43 | label_file=os.path.join(data_path,"labeled.csv") 44 | 45 | # Initialization 46 | label_dict=csv_to_dict(label_file) 47 | img_prefixes=list(label_dict.keys()) 48 | random.shuffle(img_prefixes) 49 | n_train=int(train_ratio*len(img_prefixes)) 50 | n_test=len(img_prefixes)-n_train 51 | train_mat=np.zeros((n_train,feat_size)) 52 | train_y=np.zeros(n_train) 53 | test_mat=np.zeros((n_test,feat_size)) 54 | test_y=np.zeros(n_test) 55 | train_idx=0 56 | test_idx=0 57 | 58 | # Assemble train/test feature matrices / label vectors 59 | for idx,img_prefix in enumerate(img_prefixes): 60 | 61 | print("Image: {}/{}".format(idx+1,len(img_prefixes))) 62 | raw_image=PIL.Image.open(os.path.join(labeled_path,"{}.png".format(img_prefix))) 63 | img_arr=np.array(raw_image.getdata()).reshape(raw_image.size[0],raw_image.size[1]).astype(np.uint8) 64 | img_feats=extract_feats(img_arr) 65 | label=float(label_dict[img_prefix]) 66 | 67 | if idx vocab.txt 6 | -------------------------------------------------------------------------------- /exercises/2019/ex6/cooc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from scipy.sparse import * # this script needs scipy >= v0.15 3 | import numpy as np 4 | import pickle 5 | 6 | 7 | def main(): 8 | with open('vocab.pkl', 'rb') as f: 9 | vocab = pickle.load(f) 10 | vocab_size = len(vocab) 11 | 12 | data, row, col = [], [], [] 13 | counter = 1 14 | for fn in ['train_pos.txt', 'train_neg.txt']: 15 | with open(fn) as f: 16 | for line in f: 17 | tokens = [vocab.get(t, -1) for t in line.strip().split()] 18 | tokens = [t for t in tokens if t >= 0] 19 | for t in tokens: 20 | for t2 in tokens: 21 | data.append(1) 22 | row.append(t) 23 | col.append(t2) 24 | 25 | if counter % 10000 == 0: 26 | print(counter) 27 | counter += 1 28 | cooc = coo_matrix((data, (row, col))) 29 | print("summing duplicates (this can take a while)") 30 | cooc.sum_duplicates() 31 | with open('cooc.pkl', 'wb') as f: 32 | pickle.dump(cooc, f, pickle.HIGHEST_PROTOCOL) 33 | 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /exercises/2019/ex6/cut_vocab.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Note that this script uses GNU-style sed. On Mac OS, you are required to first 4 | # brew install gnu-sed --with-default-names 5 | cat vocab.txt | sed "s/^\s\+//g" | sort -rn | grep -v "^[1234]\s" | cut -d' ' -f2 > vocab_cut.txt 6 | -------------------------------------------------------------------------------- /exercises/2019/ex6/glove_solution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from scipy.sparse import * 3 | import numpy as np 4 | import pickle 5 | import random 6 | 7 | 8 | def main(): 9 | 10 | print("loading cooccurrence matrix") 11 | with open('cooc.pkl', 'rb') as f: 12 | cooc = pickle.load(f) 13 | print("{} nonzero entries".format(cooc.nnz)) 14 | 15 | nmax = 100 16 | print("using nmax =", nmax, ", cooc.max() =", cooc.max()) 17 | 18 | print("initializing embeddings"); 19 | print("cooc shape 0: ", cooc.shape[0], "cooc shape 1: ", cooc.shape[1]) 20 | embedding_dim = 20 21 | xs = np.random.normal(size=(cooc.shape[0], embedding_dim)) 22 | ys = np.random.normal(size=(cooc.shape[1], embedding_dim)) 23 | 24 | eta = 0.001 25 | alpha = 3 / 4 26 | 27 | epochs = 20 28 | 29 | for epoch in range(epochs): 30 | print("epoch {}".format(epoch)) 31 | for ix, jy, n in zip(cooc.row, cooc.col, cooc.data): 32 | logn = np.log(n) 33 | fn = min(1.0, (n / nmax) ** alpha) 34 | x, y = xs[ix, :], ys[jy, :] 35 | scale = 2 * eta * fn * (logn - np.dot(x, y)) 36 | xs[ix, :] += scale * y 37 | ys[jy, :] += scale * x 38 | 39 | np.savez('embeddings', xs, ys) 40 | 41 | 42 | if __name__ == '__main__': 43 | main() 44 | -------------------------------------------------------------------------------- /exercises/2019/ex6/glove_template.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from scipy.sparse import * 3 | import numpy as np 4 | import pickle 5 | import random 6 | 7 | 8 | def main(): 9 | print("loading cooccurrence matrix") 10 | with open('cooc.pkl', 'rb') as f: 11 | cooc = pickle.load(f) 12 | print("{} nonzero entries".format(cooc.nnz)) 13 | 14 | nmax = 100 15 | print("using nmax =", nmax, ", cooc.max() =", cooc.max()) 16 | 17 | print("initializing embeddings") 18 | embedding_dim = 20 19 | xs = np.random.normal(size=(cooc.shape[0], embedding_dim)) 20 | ys = np.random.normal(size=(cooc.shape[1], embedding_dim)) 21 | 22 | eta = 0.001 23 | alpha = 3 / 4 24 | 25 | epochs = 10 26 | 27 | for epoch in range(epochs): 28 | print("epoch {}".format(epoch)) 29 | for ix, jy, n in zip(cooc.row, cooc.col, cooc.data): 30 | 31 | # fill in your SGD code here, 32 | # for the update resulting from co-occurence (i,j) 33 | 34 | 35 | np.save('embeddings', xs) 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /exercises/2019/ex6/pickle_vocab.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import pickle 3 | 4 | 5 | def main(): 6 | vocab = dict() 7 | with open('vocab_cut.txt') as f: 8 | for idx, line in enumerate(f): 9 | vocab[line.strip()] = idx 10 | 11 | with open('vocab.pkl', 'wb') as f: 12 | pickle.dump(vocab, f, pickle.HIGHEST_PROTOCOL) 13 | 14 | 15 | if __name__ == '__main__': 16 | main() 17 | -------------------------------------------------------------------------------- /exercises/2019/ex6/tutorial06.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2019/ex6/tutorial06.pdf -------------------------------------------------------------------------------- /exercises/2019/ex7/tutorial07.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2019/ex7/tutorial07.pdf -------------------------------------------------------------------------------- /exercises/2019/ex8/barbara.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2019/ex8/barbara.png -------------------------------------------------------------------------------- /exercises/2019/ex8/boat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2019/ex8/boat.png -------------------------------------------------------------------------------- /exercises/2019/ex8/tutorial08.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2019/ex8/tutorial08.pdf -------------------------------------------------------------------------------- /exercises/2019/ex9/Compressed_sensing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# CIL EXCERCISE 11 : COMPRESSED SENSING \n", 12 | "# ETH Zurich \n", 13 | "# @ Author: Hadi Daneshmand, Xinrui Lyu\n", 14 | "# required libraries \n", 15 | "import numpy as np\n", 16 | "from matplotlib import pyplot as plt\n", 17 | "from skimage import io \n", 18 | "from sklearn.linear_model import Lasso\n", 19 | "from scipy.fftpack import dct,idct\n", 20 | "%pylab\n", 21 | "%matplotlib inline" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "# creating image \n", 33 | "img = np.zeros((128,128))\n", 34 | "w = 64\n", 35 | "for i in range(w): \n", 36 | " img[i,:]= 1\n", 37 | "plt.gray()\n", 38 | "plt.imshow(img)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "We want to reconstract the above image from a few random measurements that are generated as: " 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "m = 1000 # measurment number \n", 57 | "rmat = np.random.randn(m,img.shape[0]*img.shape[1]) # random matrix\n", 58 | "measurements = np.dot(rmat,img.reshape(img.shape[0]*img.shape[1])) # measurements" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "As you observe, the above dataset is not sparse. However its 2d cosine transform, which is ploted in the next section, is sparse." 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": false, 73 | "scrolled": true 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "dcti = dct(img)\n", 78 | "plt.imshow(dcti)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "Use the above fact to recontruct the orginal signal from random measurements (Hint: You can also use sklearn.linear_model.Lasso())" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "collapsed": true 93 | }, 94 | "outputs": [], 95 | "source": [] 96 | } 97 | ], 98 | "metadata": { 99 | "kernelspec": { 100 | "display_name": "Python 2", 101 | "language": "python", 102 | "name": "python2" 103 | }, 104 | "language_info": { 105 | "codemirror_mode": { 106 | "name": "ipython", 107 | "version": 2 108 | }, 109 | "file_extension": ".py", 110 | "mimetype": "text/x-python", 111 | "name": "python", 112 | "nbconvert_exporter": "python", 113 | "pygments_lexer": "ipython2", 114 | "version": "2.7.11" 115 | } 116 | }, 117 | "nbformat": 4, 118 | "nbformat_minor": 0 119 | } 120 | -------------------------------------------------------------------------------- /exercises/2019/ex9/tutorial09.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2019/ex9/tutorial09.pdf -------------------------------------------------------------------------------- /exercises/2020/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2020/.DS_Store -------------------------------------------------------------------------------- /exercises/2020/ex10/barbara.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2020/ex10/barbara.png -------------------------------------------------------------------------------- /exercises/2020/ex10/boat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2020/ex10/boat.png -------------------------------------------------------------------------------- /exercises/2020/ex10/drum1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2020/ex10/drum1.wav -------------------------------------------------------------------------------- /exercises/2020/ex10/drum2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dalab/lecture_cil_public/1eea37b39c3f2a21ee6f6cd7811f1e11516ed0bf/exercises/2020/ex10/drum2.wav -------------------------------------------------------------------------------- /exercises/2020/ex10/ex2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import matplotlib.pyplot as plt\n", 10 | "import numpy as np\n", 11 | "import pylab as pl" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Dictionary properties" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "First, we want to generate the matrix $U$ corresponding to the dictionary of Haar wavelets.\n", 26 | "The following implementation is based on formulas (8) and (9) in\n", 27 | "https://iopscience.iop.org/article/10.1088/0305-4470/36/24/316/pdf" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 6, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "def haarMatrix(n):\n", 37 | " if n > 2:\n", 38 | " h = haarMatrix(n / 2)\n", 39 | " else:\n", 40 | " return np.array([[1, 1], [1, -1]])\n", 41 | "\n", 42 | " # calculate upper haar part\n", 43 | " h_n = np.kron(h, [1, 1])\n", 44 | " \n", 45 | " # calculate lower haar part \n", 46 | " h_i = np.sqrt(n/2)*np.kron(np.eye(len(h)), [1, -1])\n", 47 | "\n", 48 | " # combine parts\n", 49 | " h = np.vstack((h_n, h_i))\n", 50 | " return h" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "Run the code below to generate a visual representation of $U$. Enjoy the symmetry and understand why we need $K$ to be a power of 2." 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 7, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "image/png": "\n", 68 | "text/plain": [ 69 | "
" 70 | ] 71 | }, 72 | "metadata": { 73 | "needs_background": "light" 74 | }, 75 | "output_type": "display_data" 76 | } 77 | ], 78 | "source": [ 79 | "K = 32 #size of the dictionary. MUST be a power of 2.\n", 80 | "U = np.transpose(haarMatrix(K)/np.sqrt(K)) #computing dictionary matrix. \n", 81 | "plt.imshow(U, cmap='PiYG', interpolation='nearest')\n", 82 | "plt.colorbar()\n", 83 | "plt.show()" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "**Question:** Why are the entries in the last columns of dark color? Why is this crucial? (Hint: Energy, cf. Ex 1)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "Next, notice that $U$ is orthonormal" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 8, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "data": { 107 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAS4AAAD8CAYAAADJwUnTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAE8tJREFUeJzt3X+sX3V9x/HnqwVkAaawe2FNfwiymtAQLXBTWFgUBU3tH1QTNa2Zw6XZNQvddOKyThdkbH+oCxKXNLjLaECDVBSYN6ZbJQzCXAb2VmrpjzGvXQeX3tB2orIYxep7f5xzybf3++vc7z3fc76fe1+P5KTfc77n+zkfDu0rn8/nfM45igjMzFKypO4KmJnNlYPLzJLj4DKz5Di4zCw5Di4zS46Dy8yS4+Ays76RtEPScUkH2nwvSX8vaVLSfklXFinXwWVm/XQvsL7D9+8BVufLKHBXkUIdXGbWNxHxJPCjDrtsBL4cmaeAN0ha1q3cM8qqYBGS2k7Tv+qqq6qsitmCdPToUU6ePKn5lNHp32kLB4GfN6yPRcTYHH6/HHihYX0q3zbd6UfzCi5J64EvAkuBf4yIz/Za1sTExHyqYmbAyMhI1Yf8eUTM56CtQrZrcPbcVZS0FNhO1kddA2yWtKbX8sxscEgqtJRgCljZsL4CONbtR/MZ41oHTEbEkYh4FdhJ1l81s8QtWbKk0FKCceAP8quL1wA/iYiO3USYX1exVd/06tk7SRolu1pgZokoqTWFpAeA64AhSVPAZ4AzASLiS8AuYAMwCfwM+MMi5c4nuAr1TfOBujGY86CfmdWgxG4gEbG5y/cB3DzXcucTXD31Tc1s8JUVXP0yn07qHmC1pEsknQVsIuuvtnXVVVcRES2XPg8AmtkcVDg435OeW1wRcUrSVmA32XSIHRFxsLSamVltBr3BMK95XBGxi2xwzcwWCEllXTHsm0pnzptZGhZ0i8vMFiYHl5klx8FlZslxcBXU6f2O7U6i3wlpVj4PzptZktziMrPkOLjMLDkOLjNLSt238xTh4DKzJg6uErS7etjp5PqKo1nvfFXRzJLjFpeZJcVjXGaWJAeXmSXHwWVmyfHgvJklxWNcfdbLjdndfmdm7iqaWYIcXGaWHAeXmSXHwWVmSfGDBM0sSW5xmVlyHFw18VQJs94t6OCSdBR4BfgVcCoiRsqolJnVZ7FMQH1HRJwsoRwzGxCLIbjMbIEZ9KuK861dAN+WtFfSaKsdJI1KmpA0ceLEiXkezsyqMNNd7LbUZb4trmsj4pikC4FHJf1nRDzZuENEjAFjACMjIx75NhtwdYdSEfNqcUXEsfzP48AjwLoyKmVm9Rr0FlfPwSXpHEnnzXwG3g0cKKti/RQRbZdB/J9kVrUyg0vSeknPSZqUtK3F96skPS7pGUn7JW3oVuZ8uooXAY/klT8D+GpE/Ms8yjOzAVHW4LykpcB24F3AFLBH0nhEHGrY7a+AByPiLklrgF3AxZ3K7Tm4IuII8NZef29mg6nkHsY6YDLPCyTtBDYCjcEVwG/mn18PHOtWqKdDmFmTOQTXkKSJhvWx/ILcjOXACw3rU8DVs8q4jWx2wp8A5wA3dDuog8vMmswhuE52uWOmVUGzZxdsBu6NiDsk/S7wFUmXR8Sv2xXq4DKzJiV2FaeAlQ3rK2juCm4B1gNExH9IOhsYAo63K3Swp8eaWS1KvKq4B1gt6RJJZwGbgPFZ+zwPXJ8f9zLgbKDjbHW3uGbp5akSfqKELSRlPkgwIk5J2grsBpYCOyLioKTbgYmIGAduAe6W9Gdk3ciPRJd/VA4uM2tS5rzFiNhFNsWhcdutDZ8PAdfOpUwHl5k1GfQJ1w4uM2vi4DKzpKRwi5uDy8yaOLgWkHYXOvwMe1toBv1Bgg4uM2viFpeZJcVjXGaWJAeXmSXHwWVmyfHgvJklxWNci0QvN2Z3+51ZnRxcZpYcB5eZJcfBZWbJcXCZWVLKfJBgvzi4zKyJW1xmlhwH1yLnqRKWokEPrq4dWUk7JB2XdKBh2wWSHpX0g/zP8/tbTTOrStE3/NQZbkVG4O4lf+dZg23AYxGxGngsXzezBSL54IqIJ4Efzdq8Ebgv/3wf8N6S62VmNVqyZEmhpS69jnFdFBHTABExLenCdjtKGgVGAVatWtXj4cysKnW3poroe2RGxFhEjETEyPDwcL8PZ2YlSL6r2MZLkpYB5H8eL69KZla3hRpc48BN+eebgG+WU53FJSLaLoP4l8UWj0EPrq5jXJIeAK4DhiRNAZ8BPgs8KGkL8DzwgX5W0syqsyBu+YmIzW2+ur7kupjZgBj0lr1nzptZEweXmSXHwWVmyXFwmVlS6r5iWISDa0D5qRJWp+SvKprZ4jPoLa7BjlUzq0WZE1AlrZf0nKRJSS2fJCPpg5IOSToo6avdynSLy8xOU+YYl6SlwHbgXcAUsEfSeEQcathnNfCXwLUR8XKnhzbMcIvLzJqU2OJaB0xGxJGIeBXYSfZYrEZ/BGyPiJcBIqLrvc8OLjNrMofncQ1JmmhYRmcVtRx4oWF9Kt/W6M3AmyX9u6SnJM1+cGkTdxXNrMkcuoonI2KkU1Etts2+9H0GsJrsnugVwL9JujwiftyuUAdXgnqZKuFpElZUyfO4poCVDesrgGMt9nkqIn4J/Lek58iCbE+7Qt1VNLMmJY5x7QFWS7pE0lnAJrLHYjX6J+Ad+XGHyLqORzoV6haXmTUpq8UVEackbQV2A0uBHRFxUNLtwEREjOffvVvSIeBXwJ9HxP92KtfBZWZNypyAGhG7gF2ztt3a8DmAT+RLIQ4uMzvNgniQoJktPoN+y4+Da4Fpd/XQN2bbXDi4zCw5Di4zS46Dy8yS4gcJmlmSfFXRzJLjFpeZJcfBZQPBz7C3olIY4+rakZW0Q9JxSQcatt0m6UVJ+/JlQ3+raWZVKvPRzf1QZATuXqDVg73ujIi1+bKrxfdmlqg5PEiwFl27ihHxpKSL+18VMxsUyXcVO9gqaX/elTy/3U6SRmce63rixIl5HM7MqlC0mzjoXcVW7gIuBdYC08Ad7XaMiLGIGImIkeHh4R4PZ2ZVGvTg6umqYkS8NPNZ0t3At0qrkZnVbtC7ij0Fl6RlETGdr74PONBpfxtsniphsyUfXJIeIHv7xpCkKeAzwHWS1pK9reMo8NE+1tHMKrQgHiQYEZtbbL6nD3UxswGRfIvLzBYfB5eZJcfBZWbJcXCZWVLqnqNVhIPLOvJUicUp+auKZrb4uMVlZslxcJlZUjzGZWZJcnCZWXI8OG9myXGLyxasXqZKeJrE4PMYl5klycFlZslxcJlZchxcZpaUFB4kONi1M7NalPmyDEnrJT0naVLStg77vV9SSBrpVqZbXNYX7a4e+sbsNJTVVZS0FNgOvAuYAvZIGo+IQ7P2Ow/4U+DpIuW6xWVmTUpsca0DJiPiSES8CuwENrbY72+AzwM/L1Kog8vMmswhuIZmXvicL6OziloOvNCwPpVvazzWFcDKiCj8mkN3Fc3sNHOcgHoyIjqNSbUq6LUxAUlLgDuBjxSuIA4uM2uhxKuKU8DKhvUVwLGG9fOAy4En8rD8bWBc0o0RMdGuUAeXmTUpcR7XHmC1pEuAF4FNwIdmvoyInwBDDcd9Avhkp9ACj3GZWQtlDc5HxClgK7AbOAw8GBEHJd0u6cZe61fkTdYrgS+TNeF+DYxFxBclXQB8DbiY7G3WH4yIl3utiC0Ofob94Cv7JuuI2AXsmrXt1jb7XlekzCItrlPALRFxGXANcLOkNcA24LGIWA08lq+b2QJQ5gTUfuja4oqIaWA6//yKpMNklzM3Atflu90HPAH8RV9qaWaVGvRbfuY0OC/pYuAKstmtF+WhRkRMS7qw9NqZWeXqbk0VUTi4JJ0LPAR8PCJ+Oof7lEaBUYBVq1b1Ukczq9igB1eh9qCkM8lC6/6IeDjf/JKkZfn3y4DjrX4bEWMRMRIRI8PDw2XU2cz6bNDHuLoGl7La3QMcjogvNHw1DtyUf74J+Gb51TOzOgx6cBXpKl4LfBh4VtK+fNungM8CD0raAjwPfKA/VbTFwlMlBsegdxWLXFX8Dq3vNwK4vtzqmFndUniQoG/5MbMmybe4zGzxcXCZWXIcXGaWlLqvGBbh4DKzJh6cNyuBp0pUyy0uM0uOg8vMkuIxLjNLkoPLzJLj4DKz5PiqopklxWNcZhXoZaqEp0l05uAys+Q4uMwsOQ4uM0uOg8vMkuIHCZpZktziMqtRu6uHvjG7MweXmSXHwWVmSfEEVDNLkgfnzSw5bnGZWXIcXGaWlBTGuLp2ZCWtlPS4pMOSDkr6WL79NkkvStqXLxv6X12zckRE22XmH26rZbHodA4G4XwUGYE7BdwSEZcB1wA3S1qTf3dnRKzNl119q6WZVarM4JK0XtJzkiYlbWvx/SckHZK0X9Jjkt7YrcyuwRUR0xHxvfzzK8BhYHmhGptZkpYsWVJo6UbSUmA78B5gDbC5oeEz4xlgJCLeAnwD+HzX+s3lP0bSxcAVwNP5pq15Su6QdP5cyjKzwVS0tVWwxbUOmIyIIxHxKrAT2Ni4Q0Q8HhE/y1efAlZ0K7RwcEk6F3gI+HhE/BS4C7gUWAtMA3e0+d2opAlJEydOnCh6ODOr0RyCa2jm33e+jM4qajnwQsP6FJ17bFuAf+5Wv0JXFSWdSRZa90fEwwAR8VLD93cD32r124gYA8YARkZGfBOYWQLmMPB+MiJGOhXVYlvLHJD0+8AI8PZuB+0aXMr+C+4BDkfEFxq2L4uI6Xz1fcCBbmWZWRpKvGI4BaxsWF8BHGtxvBuATwNvj4hfdCu0SIvrWuDDwLOS9uXbPkU2yLaWLD2PAh8tUJbZwOvlGfbdfpeaEoNrD7Ba0iXAi8Am4EOzjnUF8A/A+og4XqTQrsEVEd+hdXPP0x/MFqAyHyQYEackbQV2A0uBHRFxUNLtwEREjAN/B5wLfD0PzOcj4sZO5XrmvJk1KXNyaT7Hc9esbbc2fL5hrmU6uMysyaDfJeDgMrMmDi4zS0rd9yEW4eAysyZ+kKDZArJYpkq4xWVmyXFwmVlSPMZlZklycJlZchxcZpYcX1U0s6R4jMtsEVlIUyUcXGaWHAeXmSXHwWVmyXFwmVlSynyQYL84uMysiVtcZpYcB5eZ9TRVos5pEg4uM0uKJ6CaWZI8OG9myXGLy8yS4+Ays6SkMMbVtSMr6WxJ35X0fUkHJf11vv0SSU9L+oGkr0k6q//VNVt4IqLlMhMgrZZ+63TsKuvRTpERuF8A74yItwJrgfWSrgE+B9wZEauBl4Et/aummVUp+eCKzP/lq2fmSwDvBL6Rb78PeG9famhmlZq55afIUpdCR5a0VNI+4DjwKPBD4McRcSrfZQpY3p8qmlnVkm9xAUTEryJiLbACWAdc1mq3Vr+VNCppQtLEiRMneq+pmVVmQQTXjIj4MfAEcA3wBkkzVyVXAMfa/GYsIkYiYmR4eHg+dTWziiQfXJKGJb0h//wbwA3AYeBx4P35bjcB3+xXJc2sWoMeXEXmcS0D7pO0lCzoHoyIb0k6BOyU9LfAM8A9fayn2aLT6zPs56vuUCqia3BFxH7gihbbj5CNd5nZAuN7Fc0sOcm3uMxs8Rn04Brs9qCZVa7owHzRcJO0XtJzkiYlbWvx/evy2wYn89sIL+5WpoPLzJqUFVz5Rb3twHuANcBmSWtm7bYFeDkifge4k+x2wo4cXGbWpMRbftYBkxFxJCJeBXYCG2fts5HstkHIbiO8Xl1SsdIxrr17956U9D/56hBwssrjt+F6nM71OF1q9XjjfA+0d+/e3ZKGCu5+tqSJhvWxiBhrWF8OvNCwPgVcPauM1/aJiFOSfgL8Fh3+eysNroh4beq8pImIGKny+K24Hq6H63G6iFhfYnGtWk6zJ6gV2ec07iqaWT9NASsb1lvdHvjaPvlthK8HftSpUAeXmfXTHmC1sgePngVsAsZn7TNOdtsgZLcR/mt0eTdbnfO4xrrvUgnX43Sux+lcj3nIx6y2AruBpcCOiDgo6XZgIiLGyW4X/IqkSbKW1qZu5arOl06amfXCXUUzS46Dy8ySU0twdbsFoMJ6HJX0rKR9s+ai9Pu4OyQdl3SgYdsFkh5V9takRyWdX1M9bpP0Yn5O9knaUEE9Vkp6XNJhZW+S+li+vdJz0qEelZ4T+c1a3bV7NVK/FrIBuh8CbwLOAr4PrKm6HnldjgJDNRz3bcCVwIGGbZ8HtuWftwGfq6ketwGfrPh8LAOuzD+fB/wX2e0hlZ6TDvWo9JyQzWs6N/98JvA02VOHHwQ25du/BPxxlf+fBmmpo8VV5BaABS0inqR5nkrjbQ+VvDWpTT0qFxHTEfG9/PMrZE/YXU7F56RDPSoVGb9Zq4M6gqvVLQB1vSEogG9L2itptKY6zLgoIqYh+wcEXFhjXbZK2p93JfveZW2UPxngCrJWRm3nZFY9oOJzIr9Zq6M6gmvO0/v76NqIuJLszvWbJb2tpnoMkruAS8le/jsN3FHVgSWdCzwEfDwiflrVcQvUo/JzEvN4s9ZiUEdwFbkFoBIRcSz/8zjwCPU+ivolScsA8j+P11GJiHgp/0fza+BuKjonks4kC4v7I+LhfHPl56RVPeo6J/mx5/xmrcWgjuAqcgtA30k6R9J5M5+BdwMHOv+qrxpve6jtrUkzQZF7HxWck/wRJvcAhyPiCw1fVXpO2tWj6nMiv1mruzquCAAbyK7Y/BD4dE11eBPZFc3vAwerrAfwAFmX45dkLdAtZI/xeAz4Qf7nBTXV4yvAs8B+suBYVkE9fo+s27Mf2JcvG6o+Jx3qUek5Ad5C9uas/WQheWvD39nvApPA14HXVfV3dtAW3/JjZsnxzHkzS46Dy8yS4+Ays+Q4uMwsOQ4uM0uOg8vMkuPgMrPk/D8d1gosTXMHJwAAAABJRU5ErkJggg==\n", 108 | "text/plain": [ 109 | "
" 110 | ] 111 | }, 112 | "metadata": { 113 | "needs_background": "light" 114 | }, 115 | "output_type": "display_data" 116 | } 117 | ], 118 | "source": [ 119 | "plt.imshow(np.dot(np.transpose(U),U),cmap='binary', interpolation='nearest')\n", 120 | "plt.colorbar()\n", 121 | "plt.show()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "# Coding step" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "**Task A :** build a sparse signal corrupted by small Gaussian noise in the cell below" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 12, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "# solution" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "# Decoding step" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "**Task B :** Compute the Haar transform of the noisy signal and plot it. What do you notice?" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 10, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "#solution\n" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "**Task C :** Apply thresholding on the Haar transform and plot the signal reconstruction." 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "#solution" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "**Task D** :Increase the noise level, what happens?" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [] 199 | } 200 | ], 201 | "metadata": { 202 | "kernelspec": { 203 | "display_name": "Python 3", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.7.5" 218 | }, 219 | "toc": { 220 | "base_numbering": 1, 221 | "nav_menu": {}, 222 | "number_sections": true, 223 | "sideBar": true, 224 | "skip_h1_title": false, 225 | "title_cell": "Table of Contents", 226 | "title_sidebar": "Contents", 227 | "toc_cell": false, 228 | "toc_position": {}, 229 | "toc_section_display": true, 230 | "toc_window_display": false 231 | }, 232 | "varInspector": { 233 | "cols": { 234 | "lenName": 16, 235 | "lenType": 16, 236 | "lenVar": 40 237 | }, 238 | "kernels_config": { 239 | "python": { 240 | "delete_cmd_postfix": "", 241 | "delete_cmd_prefix": "del ", 242 | "library": "var_list.py", 243 | "varRefreshCmd": "print(var_dic_list())" 244 | }, 245 | "r": { 246 | "delete_cmd_postfix": ") ", 247 | "delete_cmd_prefix": "rm(", 248 | "library": "var_list.r", 249 | "varRefreshCmd": "cat(var_dic_list()) " 250 | } 251 | }, 252 | "types_to_exclude": [ 253 | "module", 254 | "function", 255 | "builtin_function_or_method", 256 | "instance", 257 | "_Feature" 258 | ], 259 | "window_display": false 260 | } 261 | }, 262 | "nbformat": 4, 263 | "nbformat_minor": 2 264 | } 265 | -------------------------------------------------------------------------------- /exercises/2020/ex11/ex3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Understanding compressed sensing with a simple example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 8, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import matplotlib.pyplot as plt\n", 17 | "import numpy as np\n", 18 | "import sklearn.linear_model as linear_model" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "First, we generate the matrix $U$ corresponding to the dictionary of Haar wavelets.\n", 26 | "\n", 27 | "same as last week :)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 9, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "def haarMatrix(n):\n", 37 | " if n > 2:\n", 38 | " h = haarMatrix(n / 2)\n", 39 | " else:\n", 40 | " return np.array([[1, 1], [1, -1]])\n", 41 | "\n", 42 | " # calculate upper haar part\n", 43 | " h_n = np.kron(h, [1, 1])\n", 44 | " \n", 45 | " # calculate lower haar part \n", 46 | " h_i = np.sqrt(n/2)*np.kron(np.eye(len(h)), [1, -1])\n", 47 | "\n", 48 | " # combine parts\n", 49 | " h = np.vstack((h_n, h_i))\n", 50 | " return h" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 10, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "image/png": "\n", 61 | "text/plain": [ 62 | "
" 63 | ] 64 | }, 65 | "metadata": { 66 | "needs_background": "light" 67 | }, 68 | "output_type": "display_data" 69 | } 70 | ], 71 | "source": [ 72 | "D = 512 #size of the dictionary. MUST be a power of 2.\n", 73 | "U = np.transpose(haarMatrix(D)/np.sqrt(D)) #computing dictionary matrix. \n", 74 | "plt.imshow(U, cmap='PiYG', interpolation='nearest')\n", 75 | "plt.colorbar()\n", 76 | "plt.show()" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "**TASK A** : generate a random signal $x = Uz$ for some $z$ s.t. $\\|z\\|_0\\ll D$ and project it to $M\\ll D$ random directions, which you encode in a matrix $W$. Call the resulting vector $Wx =: y\\in\\mathbb{R}^M$ as in the lecture. Plot $x,z,y$." 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 11, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "#generating the sparse signal\n", 93 | "\n", 94 | "#doing the random projection\n", 95 | "\n", 96 | "#plotting\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "**TASK B** : generate the matrix $\\Theta = W U$ you saw in the lecture. " 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 12, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "#plotting Theta" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "**TASK C** : We have that $y = \\Theta z$ and we want to solve for $z$. Is the solution unique?\n", 120 | "\n", 121 | "Solve the problem using linear least squares, that is\n", 122 | "$$\\hat z_{LS} = \\text{argmin}_{z} \\|y - \\Theta z\\|^2,$$\n", 123 | "\n", 124 | "and compare $\\hat z_{LS}$ to $z$. What do you notice? How is the signal reconstruction?\n", 125 | "\n", 126 | "*HINT : check documentation at* https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 13, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "#fitting the model, plotting the result" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "**TASK D** : Get a better solution by solving a different (regularized) optimization problem. Compare the result with the groud truth and reconstruct the signal $x$. \n", 143 | "\n", 144 | "Comment your solution, why is this better? How does the solution change by changing the regularizer power?\n", 145 | " \n", 146 | "*HINT : induce sparsity.. remember basic statistics!*" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 14, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "#fitting the model, plotting the result\n" 156 | ] 157 | } 158 | ], 159 | "metadata": { 160 | "kernelspec": { 161 | "display_name": "Python 3", 162 | "language": "python", 163 | "name": "python3" 164 | }, 165 | "language_info": { 166 | "codemirror_mode": { 167 | "name": "ipython", 168 | "version": 3 169 | }, 170 | "file_extension": ".py", 171 | "mimetype": "text/x-python", 172 | "name": "python", 173 | "nbconvert_exporter": "python", 174 | "pygments_lexer": "ipython3", 175 | "version": "3.7.1" 176 | } 177 | }, 178 | "nbformat": 4, 179 | "nbformat_minor": 2 180 | } 181 | -------------------------------------------------------------------------------- /exercises/2020/ex9/features_cosmology_project.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import os.path 4 | import random 5 | import csv 6 | import sys 7 | 8 | import PIL.Image 9 | import numpy as np 10 | import sklearn.linear_model as sklm 11 | import sklearn.model_selection as skms 12 | import sklearn.preprocessing as skpp 13 | import sklearn.metrics as skmet 14 | 15 | def csv_to_dict(csv_path): 16 | with open(csv_path,'r') as fp: 17 | csv_fp=csv.reader(fp) 18 | next(csv_fp) 19 | d = dict(filter(None, csv_fp)) 20 | return d 21 | 22 | def extract_feats(img_arr): 23 | hist,_=np.histogram(img_arr,bins=10) 24 | 25 | # Consider more sophisticated features here: Frequency domain energy, ROI histograms, shape descriptors, etc... 26 | 27 | return hist 28 | 29 | if __name__=="__main__": 30 | 31 | try: 32 | data_path=os.environ["COSMOLOGY_DATA"].strip() 33 | except KeyError: 34 | print("ERROR: Provide data path via environment...") 35 | sys.exit(1) 36 | 37 | # Parameters 38 | feat_size=10 39 | train_ratio=0.7 40 | 41 | # Paths 42 | labeled_path=os.path.join(data_path,"labeled") 43 | label_file=os.path.join(data_path,"labeled.csv") 44 | 45 | # Initialization 46 | label_dict=csv_to_dict(label_file) 47 | img_prefixes=list(label_dict.keys()) 48 | random.shuffle(img_prefixes) 49 | n_train=int(train_ratio*len(img_prefixes)) 50 | n_test=len(img_prefixes)-n_train 51 | train_mat=np.zeros((n_train,feat_size)) 52 | train_y=np.zeros(n_train) 53 | test_mat=np.zeros((n_test,feat_size)) 54 | test_y=np.zeros(n_test) 55 | train_idx=0 56 | test_idx=0 57 | 58 | # Assemble train/test feature matrices / label vectors 59 | for idx,img_prefix in enumerate(img_prefixes): 60 | 61 | print("Image: {}/{}".format(idx+1,len(img_prefixes))) 62 | raw_image=PIL.Image.open(os.path.join(labeled_path,"{}.png".format(img_prefix))) 63 | img_arr=np.array(raw_image.getdata()).reshape(raw_image.size[0],raw_image.size[1]).astype(np.uint8) 64 | img_feats=extract_feats(img_arr) 65 | label=float(label_dict[img_prefix]) 66 | 67 | if idx" 32 | ] 33 | }, 34 | "metadata": { 35 | "needs_background": "light" 36 | }, 37 | "output_type": "display_data" 38 | } 39 | ], 40 | "source": [ 41 | "# Load Fashion MNIST dataset\n", 42 | "(trainX, trainy), (testX, testy) = fashion_mnist.load_data()\n", 43 | "\n", 44 | "# Display first image\n", 45 | "plt.imshow(trainX[0,:,:])\n", 46 | "width, height = trainX[0,:,:].shape" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "Num samples: 60000\n", 59 | "Num samples: 6000\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "# Take only a few samples of the shoe class\n", 65 | "print(f'Num samples: {trainX.shape[0]}')\n", 66 | "trainX = trainX[trainy == 9, :, :] \n", 67 | "print(f'Num samples: {trainX.shape[0]}')" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 4, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "(100, 784)\n" 80 | ] 81 | } 82 | ], 83 | "source": [ 84 | "# Only consider trainX, and compute input matrix X : n x d\n", 85 | "X = trainX.reshape(trainX.shape[0], -1)\n", 86 | "\n", 87 | "# Take only a few samples of the shoe class\n", 88 | "X = X[:100,:]\n", 89 | "\n", 90 | "print(X.shape)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 5, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "# Subtract the mean image from input matrix X\n", 100 | "mX = ..." 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": true 108 | }, 109 | "outputs": [], 110 | "source": [] 111 | } 112 | ], 113 | "metadata": { 114 | "kernelspec": { 115 | "display_name": "Python 3", 116 | "language": "python", 117 | "name": "python3" 118 | }, 119 | "language_info": { 120 | "codemirror_mode": { 121 | "name": "ipython", 122 | "version": 3 123 | }, 124 | "file_extension": ".py", 125 | "mimetype": "text/x-python", 126 | "name": "python", 127 | "nbconvert_exporter": "python", 128 | "pygments_lexer": "ipython3", 129 | "version": "3.8.5" 130 | } 131 | }, 132 | "nbformat": 4, 133 | "nbformat_minor": 1 134 | } 135 | -------------------------------------------------------------------------------- /exercises/2021/Exercise_2_gaussian_isolines.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Data Generation\n", 8 | "===" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 7, 14 | "metadata": { 15 | "scrolled": false 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "import numpy as np\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "from mpl_toolkits import mplot3d\n", 22 | "%matplotlib inline" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 8, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "d=2" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 10, 37 | "metadata": { 38 | "scrolled": true 39 | }, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "image/png": "\n", 44 | "text/plain": [ 45 | "
" 46 | ] 47 | }, 48 | "metadata": { 49 | "needs_background": "light" 50 | }, 51 | "output_type": "display_data" 52 | } 53 | ], 54 | "source": [ 55 | "mean=np.array([1,1])\n", 56 | "sigma=np.array(([1,+0.5],[+0.5,1]))\n", 57 | "\n", 58 | "fig=plt.figure(1)\n", 59 | "s=0.05\n", 60 | "x_ax = np.arange(0,2+s, s) \n", 61 | "y_ax = np.arange(0,2+s, s)\n", 62 | "x_ax,y_ax = np.meshgrid(x_ax,y_ax)\n", 63 | "y=np.zeros((x_ax.shape[0],x_ax.shape[1]))\n", 64 | "\n", 65 | "for i in range(x_ax.shape[0]):\n", 66 | " for j in range(x_ax.shape[1]):\n", 67 | " y[i,j]=compute_p(np.array((x_ax[i,j],y_ax[i,j])),mean,sigma)\n", 68 | " \n", 69 | "\n", 70 | "plt.contourf(x_ax,y_ax,y)\n", 71 | "plt.colorbar()\n", 72 | "plt.gcf().set_size_inches((8,8))\n", 73 | "plt.show()\n" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "Solution\n", 81 | "===" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 9, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "def compute_p(X,mean,sigma):\n", 91 | " factor=1/((2*np.pi)**(d/2)*np.linalg.det(sigma)**(1/2))\n", 92 | " p=factor*np.exp(-0.5*np.dot(np.dot((X-mean).T,np.linalg.inv(sigma)),(X-mean)))\n", 93 | " return p" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [] 102 | } 103 | ], 104 | "metadata": { 105 | "kernelspec": { 106 | "display_name": "Python 3", 107 | "language": "python", 108 | "name": "python3" 109 | }, 110 | "language_info": { 111 | "codemirror_mode": { 112 | "name": "ipython", 113 | "version": 3 114 | }, 115 | "file_extension": ".py", 116 | "mimetype": "text/x-python", 117 | "name": "python", 118 | "nbconvert_exporter": "python", 119 | "pygments_lexer": "ipython3", 120 | "version": "3.8.5" 121 | } 122 | }, 123 | "nbformat": 4, 124 | "nbformat_minor": 1 125 | } 126 | -------------------------------------------------------------------------------- /exercises/2021/Exercise_8_NMF.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Non-Negative Matrix Factorization (NMF) vs PCA" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Setup\n", 15 | "1. Download the [CBCL faces dataset](http://www.ai.mit.edu/courses/6.899/lectures/faces.tar.gz) and extract `face.train.tar.gz` to the same directory as this notebook.\n", 16 | "2. Make sure that you have a working PyTorch setup (this is what we will use in this notebook)." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "Loaded 2429 images of size 19 x 19\n" 29 | ] 30 | }, 31 | { 32 | "data": { 33 | "text/plain": [ 34 | "torch.Size([361, 2429])" 35 | ] 36 | }, 37 | "execution_count": 1, 38 | "metadata": {}, 39 | "output_type": "execute_result" 40 | } 41 | ], 42 | "source": [ 43 | "import glob\n", 44 | "import numpy as np\n", 45 | "import torch\n", 46 | "from PIL import Image\n", 47 | "\n", 48 | "# Prepare data matrix X\n", 49 | "images = []\n", 50 | "for file in glob.glob('train/face/*.pgm'):\n", 51 | " images.append(torch.FloatTensor(np.array(Image.open(file))))\n", 52 | " \n", 53 | "X = torch.stack(images, dim=-1)\n", 54 | "print(f'Loaded {X.shape[2]} images of size {X.shape[0]} x {X.shape[1]}')\n", 55 | "img_size = X.shape[:2]\n", 56 | "X = X.flatten(0, 1) # Unroll each 2D image into a 1D vector\n", 57 | "X /= 255 # Normalize in [0, 1]\n", 58 | "\n", 59 | "X.shape" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "## NMF" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "scrolled": true 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "import torch.optim as optim\n", 78 | "import torch.nn as nn\n", 79 | "import math\n", 80 | "\n", 81 | "K = 49 # Number of features\n", 82 | "\n", 83 | "# Initialize W and Z from a uniform distribution U(0, 1)\n", 84 | "# Additionally, the matrices are scaled by 1/sqrt(K) to make the variance of the resulting product independent of K\n", 85 | "W = torch.rand(X.shape[0], K).mul_(1/math.sqrt(K)).requires_grad_()\n", 86 | "Z = torch.rand(X.shape[1], K).mul_(1/math.sqrt(K)).requires_grad_()\n", 87 | "\n", 88 | "# Write PyTorch code...." 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "# PCA" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "scrolled": true 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "import torch.optim as optim\n", 107 | "import torch.nn as nn\n", 108 | "import math\n", 109 | "\n", 110 | "K = 49 # Number of features\n", 111 | "\n", 112 | "# Initialize W and Z from a uniform distribution U(0, 1)\n", 113 | "W = torch.rand(X.shape[0], K).mul_(1/math.sqrt(K)).requires_grad_()\n", 114 | "Z = torch.rand(X.shape[1], K).mul_(1/math.sqrt(K)).requires_grad_()\n", 115 | "\n", 116 | "# Write PyTorch code...." 117 | ] 118 | } 119 | ], 120 | "metadata": { 121 | "kernelspec": { 122 | "display_name": "Python 3", 123 | "language": "python", 124 | "name": "python3" 125 | }, 126 | "language_info": { 127 | "codemirror_mode": { 128 | "name": "ipython", 129 | "version": 3 130 | }, 131 | "file_extension": ".py", 132 | "mimetype": "text/x-python", 133 | "name": "python", 134 | "nbconvert_exporter": "python", 135 | "pygments_lexer": "ipython3", 136 | "version": "3.6.5" 137 | } 138 | }, 139 | "nbformat": 4, 140 | "nbformat_minor": 4 141 | } 142 | -------------------------------------------------------------------------------- /exercises/2021/Project_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Sentiment Classification Project" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "# Load data" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "name": "stdout", 33 | "output_type": "stream", 34 | "text": [ 35 | "2500000 tweets loaded\n" 36 | ] 37 | } 38 | ], 39 | "source": [ 40 | "tweets = []\n", 41 | "labels = []\n", 42 | "\n", 43 | "def load_tweets(filename, label):\n", 44 | " with open(filename, 'r', encoding='utf-8') as f:\n", 45 | " for line in f:\n", 46 | " tweets.append(line.rstrip())\n", 47 | " labels.append(label)\n", 48 | " \n", 49 | "load_tweets('twitter-datasets/train_neg_full.txt', 0)\n", 50 | "load_tweets('twitter-datasets/train_pos_full.txt', 1)\n", 51 | "\n", 52 | "# Convert to NumPy array to facilitate indexing\n", 53 | "tweets = np.array(tweets)\n", 54 | "labels = np.array(labels)\n", 55 | "\n", 56 | "print(f'{len(tweets)} tweets loaded')" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# Build validation set\n", 64 | "We use 90% of tweets for training, and 10% for validation" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 3, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "data": { 74 | "text/plain": [ 75 | "(2250000, 250000)" 76 | ] 77 | }, 78 | "execution_count": 3, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | } 82 | ], 83 | "source": [ 84 | "np.random.seed(1) # Reproducibility!\n", 85 | "\n", 86 | "shuffled_indices = np.random.permutation(len(tweets))\n", 87 | "split_idx = int(0.9 * len(tweets))\n", 88 | "train_indices = shuffled_indices[:split_idx]\n", 89 | "val_indices = shuffled_indices[split_idx:]\n", 90 | "\n", 91 | "len(train_indices), len(val_indices)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "# Bag-of-words baseline" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 4, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "from sklearn.feature_extraction.text import CountVectorizer\n", 108 | "\n", 109 | "# We only keep the 5000 most frequent words, both to reduce the computational cost and reduce overfitting\n", 110 | "vectorizer = CountVectorizer(max_features=5000)\n", 111 | "\n", 112 | "# Important: we call fit_transform on the training set, and only transform on the validation set\n", 113 | "X_train = vectorizer.fit_transform(tweets[train_indices])\n", 114 | "X_val = vectorizer.transform(tweets[val_indices])\n", 115 | "\n", 116 | "Y_train = labels[train_indices]\n", 117 | "Y_val = labels[val_indices]" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "Now we train a logistic classifier..." 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 5, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "name": "stderr", 134 | "output_type": "stream", 135 | "text": [ 136 | "C:\\Users\\Admin\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:764: ConvergenceWarning: lbfgs failed to converge (status=1):\n", 137 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", 138 | "\n", 139 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n", 140 | " https://scikit-learn.org/stable/modules/preprocessing.html\n", 141 | "Please also refer to the documentation for alternative solver options:\n", 142 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", 143 | " extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n" 144 | ] 145 | }, 146 | { 147 | "data": { 148 | "text/plain": [ 149 | "LogisticRegression(C=100000.0)" 150 | ] 151 | }, 152 | "execution_count": 5, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "from sklearn.linear_model import LogisticRegression\n", 159 | "\n", 160 | "model = LogisticRegression(C=1e5, max_iter=100)\n", 161 | "model.fit(X_train, Y_train)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 6, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "Y_train_pred = model.predict(X_train)\n", 171 | "Y_val_pred = model.predict(X_val)\n", 172 | "\n", 173 | "train_accuracy = (Y_train_pred == Y_train).mean()\n", 174 | "val_accuracy = (Y_val_pred == Y_val).mean()" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 7, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "name": "stdout", 184 | "output_type": "stream", 185 | "text": [ 186 | "Accuracy (training set): 0.80527\n", 187 | "Accuracy (validation set): 0.80324\n" 188 | ] 189 | } 190 | ], 191 | "source": [ 192 | "print(f'Accuracy (training set): {train_accuracy:.05f}')\n", 193 | "print(f'Accuracy (validation set): {val_accuracy:.05f}')" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "# Model interpretation" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 8, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "name": "stdout", 210 | "output_type": "stream", 211 | "text": [ 212 | "---- Top 10 negative words\n", 213 | "paperback -7.733715089476916\n", 214 | "hardcover -6.749721263599857\n", 215 | "sadtweet -4.022199848355659\n", 216 | "audio -3.8849876465208113\n", 217 | "misc -3.7553966613158702\n", 218 | "depressing -3.63732789050843\n", 219 | "gutted -3.5956754364460863\n", 220 | "wahhh -3.521614632401248\n", 221 | "apparel -3.217069805985382\n", 222 | "fml -3.1400132802859333\n", 223 | "\n", 224 | "---- Top 10 positive words\n", 225 | "thx 2.057920021771283\n", 226 | "cantsayno 2.059860424345465\n", 227 | "blessed 2.1638415390167096\n", 228 | "smiling 2.195291992774262\n", 229 | "worries 2.3181506563261367\n", 230 | "ifindthatattractive 2.4271197000353912\n", 231 | "harrypotterchatuplines 2.4633027181285185\n", 232 | "smartnokialumia 3.1312606562595673\n", 233 | "waystomakemehappy 3.382280627938651\n", 234 | "yougetmajorpointsif 4.349066000550539\n", 235 | "\n" 236 | ] 237 | } 238 | ], 239 | "source": [ 240 | "model_features = model.coef_[0]\n", 241 | "sorted_features = np.argsort(model_features)\n", 242 | "top_neg = sorted_features[:10]\n", 243 | "top_pos = sorted_features[-10:]\n", 244 | "\n", 245 | "mapping = vectorizer.get_feature_names()\n", 246 | "\n", 247 | "print('---- Top 10 negative words')\n", 248 | "for i in top_neg:\n", 249 | " print(mapping[i], model_features[i])\n", 250 | "print()\n", 251 | "\n", 252 | "print('---- Top 10 positive words')\n", 253 | "for i in top_pos:\n", 254 | " print(mapping[i], model_features[i])\n", 255 | "print()" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [] 264 | } 265 | ], 266 | "metadata": { 267 | "kernelspec": { 268 | "display_name": "Python 3", 269 | "language": "python", 270 | "name": "python3" 271 | }, 272 | "language_info": { 273 | "codemirror_mode": { 274 | "name": "ipython", 275 | "version": 3 276 | }, 277 | "file_extension": ".py", 278 | "mimetype": "text/x-python", 279 | "name": "python", 280 | "nbconvert_exporter": "python", 281 | "pygments_lexer": "ipython3", 282 | "version": "3.6.5" 283 | } 284 | }, 285 | "nbformat": 4, 286 | "nbformat_minor": 4 287 | } 288 | -------------------------------------------------------------------------------- /exercises/2021/ex8/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Twitter Datasets 4 | 5 | Download the tweet datasets from here: 6 | http://www.da.inf.ethz.ch/teaching/2018/CIL/material/exercise/twitter-datasets.zip 7 | 8 | 9 | The dataset should have the following files: 10 | - sample_submission.csv 11 | - train_neg.txt : a subset of negative training samples 12 | - train_pos.txt: a subset of positive training samples 13 | - test_data.txt: 14 | - train_neg_full.txt: the full negative training samples 15 | - train_pos_full.txt: the full positive training samples 16 | 17 | ## Build the Co-occurence Matrix 18 | 19 | To build a co-occurence matrix, run the following commands. (Remember to put the data files 20 | in the correct locations) 21 | 22 | Note that the cooc.py script takes a few minutes to run, and displays the number of tweets processed. 23 | 24 | - build_vocab.sh 25 | - cut_vocab.sh 26 | - python3 pickle_vocab.py 27 | - python3 cooc.py 28 | 29 | ## Template for Glove Question 30 | 31 | Your task is to fill in the SGD updates to the template 32 | glove_template.py 33 | 34 | Once you tested your system on the small set of 10% of all tweets, we suggest you run on the full datasets train_pos_full.txt, train_neg_full.txt 35 | -------------------------------------------------------------------------------- /exercises/2021/ex8/build_vocab.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Note that this script uses GNU-style sed. On Mac OS, you are required to first 4 | # brew install gnu-sed --with-default-names 5 | cat train_pos.txt train_neg.txt | sed "s/ /\n/g" | grep -v "^\s*$" | sort | uniq -c > vocab.txt 6 | -------------------------------------------------------------------------------- /exercises/2021/ex8/cooc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from scipy.sparse import * # this script needs scipy >= v0.15 3 | import numpy as np 4 | import pickle 5 | 6 | 7 | def main(): 8 | with open('vocab.pkl', 'rb') as f: 9 | vocab = pickle.load(f) 10 | vocab_size = len(vocab) 11 | 12 | data, row, col = [], [], [] 13 | counter = 1 14 | for fn in ['train_pos.txt', 'train_neg.txt']: 15 | with open(fn) as f: 16 | for line in f: 17 | tokens = [vocab.get(t, -1) for t in line.strip().split()] 18 | tokens = [t for t in tokens if t >= 0] 19 | for t in tokens: 20 | for t2 in tokens: 21 | data.append(1) 22 | row.append(t) 23 | col.append(t2) 24 | 25 | if counter % 10000 == 0: 26 | print(counter) 27 | counter += 1 28 | cooc = coo_matrix((data, (row, col))) 29 | print("summing duplicates (this can take a while)") 30 | cooc.sum_duplicates() 31 | with open('cooc.pkl', 'wb') as f: 32 | pickle.dump(cooc, f, pickle.HIGHEST_PROTOCOL) 33 | 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /exercises/2021/ex8/cut_vocab.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Note that this script uses GNU-style sed. On Mac OS, you are required to first 4 | # brew install gnu-sed --with-default-names 5 | cat vocab.txt | sed "s/^\s\+//g" | sort -rn | grep -v "^[1234]\s" | cut -d' ' -f2 > vocab_cut.txt 6 | -------------------------------------------------------------------------------- /exercises/2021/ex8/glove_solution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from scipy.sparse import * 3 | import numpy as np 4 | import pickle 5 | import random 6 | 7 | 8 | def main(): 9 | 10 | print("loading cooccurrence matrix") 11 | with open('cooc.pkl', 'rb') as f: 12 | cooc = pickle.load(f) 13 | print("{} nonzero entries".format(cooc.nnz)) 14 | 15 | nmax = 100 16 | print("using nmax =", nmax, ", cooc.max() =", cooc.max()) 17 | 18 | print("initializing embeddings"); 19 | print("cooc shape 0: ", cooc.shape[0], "cooc shape 1: ", cooc.shape[1]) 20 | embedding_dim = 20 21 | xs = np.random.normal(size=(cooc.shape[0], embedding_dim)) 22 | ys = np.random.normal(size=(cooc.shape[1], embedding_dim)) 23 | 24 | eta = 0.001 25 | alpha = 3 / 4 26 | 27 | epochs = 20 28 | 29 | for epoch in range(epochs): 30 | print("epoch {}".format(epoch)) 31 | for ix, jy, n in zip(cooc.row, cooc.col, cooc.data): 32 | logn = np.log(n) 33 | fn = min(1.0, (n / nmax) ** alpha) 34 | x, y = xs[ix, :], ys[jy, :] 35 | scale = 2 * eta * fn * (logn - np.dot(x, y)) 36 | xs[ix, :] += scale * y 37 | ys[jy, :] += scale * x 38 | 39 | np.savez('embeddings', xs, ys) 40 | 41 | 42 | if __name__ == '__main__': 43 | main() 44 | -------------------------------------------------------------------------------- /exercises/2021/ex8/glove_template.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from scipy.sparse import * 3 | import numpy as np 4 | import pickle 5 | import random 6 | 7 | 8 | def main(): 9 | print("loading cooccurrence matrix") 10 | with open('cooc.pkl', 'rb') as f: 11 | cooc = pickle.load(f) 12 | print("{} nonzero entries".format(cooc.nnz)) 13 | 14 | nmax = 100 15 | print("using nmax =", nmax, ", cooc.max() =", cooc.max()) 16 | 17 | print("initializing embeddings") 18 | embedding_dim = 20 19 | xs = np.random.normal(size=(cooc.shape[0], embedding_dim)) 20 | ys = np.random.normal(size=(cooc.shape[1], embedding_dim)) 21 | 22 | eta = 0.001 23 | alpha = 3 / 4 24 | 25 | epochs = 10 26 | 27 | for epoch in range(epochs): 28 | print("epoch {}".format(epoch)) 29 | for ix, jy, n in zip(cooc.row, cooc.col, cooc.data): 30 | 31 | # fill in your SGD code here, 32 | # for the update resulting from co-occurence (i,j) 33 | 34 | 35 | np.save('embeddings', xs) 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /exercises/2021/ex8/pickle_vocab.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import pickle 3 | 4 | 5 | def main(): 6 | vocab = dict() 7 | with open('vocab_cut.txt') as f: 8 | for idx, line in enumerate(f): 9 | vocab[line.strip()] = idx 10 | 11 | with open('vocab.pkl', 'wb') as f: 12 | pickle.dump(vocab, f, pickle.HIGHEST_PROTOCOL) 13 | 14 | 15 | if __name__ == '__main__': 16 | main() 17 | --------------------------------------------------------------------------------