├── .gitignore ├── 1_N-dimensional_arrays_and_Tensor_class.ipynb ├── 2_Efficient_representations_of_tensors.ipynb ├── 3_Fundamental_tensor_decompositions.ipynb ├── 4_Ecosystem_of_Tensor_class.ipynb ├── 5_Tensor_meta_information_and_pandas_integration.ipynb ├── README.rst ├── bootstrap_venv.sh ├── data └── ETH80 │ └── basic_066-063.npy ├── images ├── C_Fortran_ordering.png ├── TensorCPD.png ├── TensorTKD.png ├── TensorTT.png ├── cpd_as_rank_one.png ├── data-modes-state.png ├── different-forms-of-data.png ├── different-tensors.png ├── folding.png ├── mode_n_product.png ├── outerproduct_3.png ├── storage_complexity.png ├── tensor_substructures.png ├── tensors.png └── unfolding.png └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | test_[0-9]* 3 | Untitled*.ipynb 4 | 5 | 6 | 7 | ######################## 8 | ### TEMPLATE FOR PYTHON 9 | ######################## 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | .Python 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | .pytest_cache/ 49 | htmlcov/ 50 | .tox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | .hypothesis/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | .static_storage/ 66 | .media/ 67 | local_settings.py 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # celery beat schedule file 89 | celerybeat-schedule 90 | 91 | # SageMath parsed files 92 | *.sage.py 93 | 94 | # Environments 95 | .env 96 | .venv 97 | env/ 98 | venv/ 99 | ENV/ 100 | env.bak/ 101 | venv.bak/ 102 | 103 | # Spyder project settings 104 | .spyderproject 105 | .spyproject 106 | 107 | # Rope project settings 108 | .ropeproject 109 | 110 | # mkdocs documentation 111 | /site 112 | 113 | # mypy 114 | .mypy_cache/ 115 | 116 | 117 | 118 | ######################## 119 | ### TEMPLATE TEX 120 | ######################## 121 | ## Core latex/pdflatex auxiliary files: 122 | *.aux 123 | *.lof 124 | *.log 125 | *.lot 126 | *.fls 127 | *.out 128 | *.toc 129 | *.fmt 130 | *.fot 131 | *.cb 132 | *.cb2 133 | 134 | ## Intermediate documents: 135 | *.dvi 136 | *.xdv 137 | *-converted-to.* 138 | # these rules might exclude image files for figures etc. 139 | # *.ps 140 | # *.eps 141 | # *.pdf 142 | 143 | ## Generated if empty string is given at "Please type another file name for output:" 144 | .pdf 145 | 146 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 147 | *.bbl 148 | *.bcf 149 | *.blg 150 | *-blx.aux 151 | *-blx.bib 152 | *.run.xml 153 | 154 | ## Build tool auxiliary files: 155 | *.fdb_latexmk 156 | *.synctex 157 | *.synctex(busy) 158 | *.synctex.gz 159 | *.synctex.gz(busy) 160 | *.pdfsync 161 | 162 | ## Auxiliary and intermediate files from other packages: 163 | # algorithms 164 | *.alg 165 | *.loa 166 | 167 | # achemso 168 | acs-*.bib 169 | 170 | # amsthm 171 | *.thm 172 | 173 | # beamer 174 | *.nav 175 | *.pre 176 | *.snm 177 | *.vrb 178 | 179 | # changes 180 | *.soc 181 | 182 | # cprotect 183 | *.cpt 184 | 185 | # elsarticle (documentclass of Elsevier journals) 186 | *.spl 187 | 188 | # endnotes 189 | *.ent 190 | 191 | # fixme 192 | *.lox 193 | 194 | # feynmf/feynmp 195 | *.mf 196 | *.mp 197 | *.t[1-9] 198 | *.t[1-9][0-9] 199 | *.tfm 200 | 201 | #(r)(e)ledmac/(r)(e)ledpar 202 | *.end 203 | *.?end 204 | *.[1-9] 205 | *.[1-9][0-9] 206 | *.[1-9][0-9][0-9] 207 | *.[1-9]R 208 | *.[1-9][0-9]R 209 | *.[1-9][0-9][0-9]R 210 | *.eledsec[1-9] 211 | *.eledsec[1-9]R 212 | *.eledsec[1-9][0-9] 213 | *.eledsec[1-9][0-9]R 214 | *.eledsec[1-9][0-9][0-9] 215 | *.eledsec[1-9][0-9][0-9]R 216 | 217 | # glossaries 218 | *.acn 219 | *.acr 220 | *.glg 221 | *.glo 222 | *.gls 223 | *.glsdefs 224 | 225 | # gnuplottex 226 | *-gnuplottex-* 227 | 228 | # gregoriotex 229 | *.gaux 230 | *.gtex 231 | 232 | # hyperref 233 | *.brf 234 | 235 | # knitr 236 | *-concordance.tex 237 | # TODO Comment the next line if you want to keep your tikz graphics files 238 | *.tikz 239 | *-tikzDictionary 240 | 241 | # listings 242 | *.lol 243 | 244 | # makeidx 245 | *.idx 246 | *.ilg 247 | *.ind 248 | *.ist 249 | 250 | # minitoc 251 | *.maf 252 | *.mlf 253 | *.mlt 254 | *.mtc[0-9]* 255 | *.slf[0-9]* 256 | *.slt[0-9]* 257 | *.stc[0-9]* 258 | 259 | # minted 260 | _minted* 261 | *.pyg 262 | 263 | # morewrites 264 | *.mw 265 | 266 | # nomencl 267 | *.nlo 268 | 269 | # pax 270 | *.pax 271 | 272 | # pdfpcnotes 273 | *.pdfpc 274 | 275 | # sagetex 276 | *.sagetex.sage 277 | *.sagetex.py 278 | *.sagetex.scmd 279 | 280 | # scrwfile 281 | *.wrt 282 | 283 | # sympy 284 | *.sout 285 | *.sympy 286 | sympy-plots-for-*.tex/ 287 | 288 | # pdfcomment 289 | *.upa 290 | *.upb 291 | 292 | # pythontex 293 | *.pytxcode 294 | pythontex-files-*/ 295 | 296 | # thmtools 297 | *.loe 298 | 299 | # TikZ & PGF 300 | *.dpth 301 | *.md5 302 | *.auxlock 303 | 304 | # todonotes 305 | *.tdo 306 | 307 | # easy-todo 308 | *.lod 309 | 310 | # xindy 311 | *.xdy 312 | 313 | # xypic precompiled matrices 314 | *.xyc 315 | 316 | # endfloat 317 | *.ttt 318 | *.fff 319 | 320 | # Latexian 321 | TSWLatexianTemp* 322 | 323 | ## Editors: 324 | # WinEdt 325 | *.bak 326 | *.sav 327 | 328 | # Texpad 329 | .texpadtmp 330 | 331 | # Kile 332 | *.backup 333 | 334 | # KBibTeX 335 | *~[0-9]* 336 | 337 | # auto folder when using emacs and auctex 338 | /auto/* 339 | 340 | # expex forward references with \gathertags 341 | *-tags.tex 342 | 343 | 344 | 345 | ######################## 346 | ### TEMPLATE MATLAB 347 | ######################## 348 | ##--------------------------------------------------- 349 | ## Remove autosaves generated by the Matlab editor 350 | ## We have git for backups! 351 | ##--------------------------------------------------- 352 | 353 | # Windows default autosave extension 354 | *.asv 355 | 356 | # OSX / *nix default autosave extension 357 | *.m~ 358 | 359 | # Compiled MEX binaries (all platforms) 360 | *.mex* 361 | 362 | # Simulink Code Generation 363 | slprj/ 364 | 365 | # Session info 366 | octave-workspace 367 | 368 | # Simulink autosave extension 369 | *.autosave 370 | 371 | 372 | 373 | ######################## 374 | ### TEMPLATE JETBRAINS 375 | ######################## 376 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 377 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 378 | 379 | # User-specific stuff: 380 | .idea/**/workspace.xml 381 | .idea/**/tasks.xml 382 | .idea/dictionaries 383 | 384 | # Sensitive or high-churn files: 385 | .idea/**/dataSources/ 386 | .idea/**/dataSources.ids 387 | .idea/**/dataSources.xml 388 | .idea/**/dataSources.local.xml 389 | .idea/**/sqlDataSources.xml 390 | .idea/**/dynamic.xml 391 | .idea/**/uiDesigner.xml 392 | 393 | # Gradle: 394 | .idea/**/gradle.xml 395 | .idea/**/libraries 396 | 397 | # CMake 398 | cmake-build-debug/ 399 | 400 | # Mongo Explorer plugin: 401 | .idea/**/mongoSettings.xml 402 | 403 | ## File-based project format: 404 | *.iws 405 | 406 | ## Plugin-specific files: 407 | 408 | # IntelliJ 409 | out/ 410 | 411 | # mpeltonen/sbt-idea plugin 412 | .idea_modules/ 413 | 414 | # JIRA plugin 415 | atlassian-ide-plugin.xml 416 | 417 | # Cursive Clojure plugin 418 | .idea/replstate.xml 419 | 420 | # Crashlytics plugin (for Android Studio and IntelliJ) 421 | com_crashlytics_export_strings.xml 422 | crashlytics.properties 423 | crashlytics-build.properties 424 | fabric.properties 425 | 426 | 427 | 428 | ######################## 429 | ### TEMPLATE MS AND LIBRE OFFICE 430 | ######################## 431 | *.tmp 432 | 433 | # Word temporary 434 | ~$*.doc* 435 | 436 | # Excel temporary 437 | ~$*.xls* 438 | 439 | # Excel Backup File 440 | *.xlk 441 | 442 | # PowerPoint temporary 443 | ~$*.ppt* 444 | 445 | # Visio autosave temporary files 446 | *.~vsd* 447 | 448 | 449 | # LibreOffice locks 450 | .~lock.*# 451 | 452 | 453 | 454 | ######################## 455 | ### TEMPLATE DROPBOX 456 | ######################## 457 | # Dropbox settings and caches 458 | .dropbox 459 | .dropbox.attr 460 | .dropbox.cache 461 | 462 | 463 | 464 | ######################## 465 | ### TEMPLATE LINUX 466 | ######################## 467 | *~ 468 | 469 | # temporary files which can be created if a process still has a handle open of a deleted file 470 | .fuse_hidden* 471 | 472 | # KDE directory preferences 473 | .directory 474 | 475 | # Linux trash folder which might appear on any partition or disk 476 | .Trash-* 477 | 478 | # .nfs files are created when an open file is removed but is still being accessed 479 | .nfs* 480 | 481 | 482 | 483 | ######################## 484 | ### TEMPLATE MAC OS 485 | ######################## 486 | # General 487 | .DS_Store 488 | .AppleDouble 489 | .LSOverride 490 | 491 | # Icon must end with two \r 492 | Icon 493 | 494 | 495 | # Thumbnails 496 | ._* 497 | 498 | # Files that might appear in the root of a volume 499 | .DocumentRevisions-V100 500 | .fseventsd 501 | .Spotlight-V100 502 | .TemporaryItems 503 | .Trashes 504 | .VolumeIcon.icns 505 | .com.apple.timemachine.donotpresent 506 | 507 | # Directories potentially created on remote AFP share 508 | .AppleDB 509 | .AppleDesktop 510 | Network Trash Folder 511 | Temporary Items 512 | .apdisk 513 | 514 | 515 | 516 | ######################## 517 | ### TEMPLATE FOR WINDOWS FILES 518 | ######################## 519 | # Windows thumbnail cache files 520 | Thumbs.db 521 | ehthumbs.db 522 | ehthumbs_vista.db 523 | 524 | # Dump file 525 | *.stackdump 526 | 527 | # Folder config file 528 | [Dd]esktop.ini 529 | 530 | # Recycle Bin used on file shares 531 | $RECYCLE.BIN/ 532 | 533 | # Windows Installer files 534 | *.cab 535 | *.msi 536 | *.msm 537 | *.msp 538 | 539 | # Windows shortcuts 540 | *.lnk 541 | -------------------------------------------------------------------------------- /1_N-dimensional_arrays_and_Tensor_class.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Multi-dimensional arrays and Tensor class\n", 8 | "### Last modification (05.06.2018).\n", 9 | "\n", 10 | "\n", 11 | "In this tutorial we will show the core data structures of multidimenaional arrays within tensor algebra and illustrate how they are integrated into [hottbox](https://github.com/hottbox/hottbox). For more details visit our [documentation page](https://hottbox.github.io/stable/api/hottbox.core.html#module-hottbox.core).\n", 12 | "\n", 13 | "**Requirements:** ``hottbox==0.1.3``\n", 14 | "\n", 15 | "**Authors:** \n", 16 | "Ilya Kisil (ilyakisil@gmail.com); \n", 17 | "Giuseppe G. Calvi (ggc115@ic.ac.uk)" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import numpy as np\n", 27 | "from hottbox.core import Tensor" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "![tensors](./images/tensors.png)\n", 35 | "\n", 36 | "Tensor is a multi-dimenaional array of data where each dimension is conventionally referred to as **mode**. Its order is defined by the number of its modes which is equivivalent to the number of indices required to identify a particular entry of a multi-dimensional array. For example, an element of a third order tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I \\times J \\times K}$ can be written in general form as:\n", 37 | "\n", 38 | "$$ x_{ijk} = \\mathbf{\\underline{X}}[i, j, k]$$\n", 39 | "\n", 40 | "\n", 41 | "## Tensor class in hottbox\n", 42 | "In order to create tensor using **`hottbox`**, you simply need to pass numpy ndarray to the constructor of the **`Tensor`** class. This will allow you to use top level API for the most common properties and operations on the tensor itself that correspond to the conventional definitions. \n", 43 | "\n", 44 | "**Note:** In order to be consistent with python indexing, count of modes starts from zeros." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 2, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "name": "stdout", 54 | "output_type": "stream", 55 | "text": [ 56 | "This tensor is of order 3 and consists of 24 elements.\n", 57 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n" 58 | ] 59 | }, 60 | { 61 | "data": { 62 | "text/plain": [ 63 | "array([[[ 0, 1, 2, 3],\n", 64 | " [ 4, 5, 6, 7],\n", 65 | " [ 8, 9, 10, 11]],\n", 66 | "\n", 67 | " [[12, 13, 14, 15],\n", 68 | " [16, 17, 18, 19],\n", 69 | " [20, 21, 22, 23]]])" 70 | ] 71 | }, 72 | "execution_count": 2, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": [ 78 | "array_3d = np.arange(24).reshape((2, 3, 4))\n", 79 | "tensor = Tensor(array_3d)\n", 80 | "print(tensor)\n", 81 | "tensor.data" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "As mentioned previously, the conventional names of the tensor characteristics (e.g. order, shape, size) are preserved for the objects of **`Tensor`** class." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 3, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "This tensor is of order 3.\n", 101 | "The sizes of its modes are (2, 3, 4) respectively.\n", 102 | "It consists of 24 elemetns.\n", 103 | "Its Frobenious norm = 65.76\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "print('This tensor is of order {}.'.format(tensor.order))\n", 109 | "print('The sizes of its modes are {} respectively.'.format(tensor.shape))\n", 110 | "print('It consists of {} elemetns.'.format(tensor.size))\n", 111 | "print('Its Frobenious norm = {:.2f}'.format(tensor.frob_norm))" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "# Fundamental operations with the obejcts of Tensor class\n", 119 | "\n", 120 | "Next, let's have a look at the fundamental operation with a tensor and how to apply them to the object of class **`Tensor`**. We shall start from defining the main substructures of a tensor. \n", 121 | "For ease of visualisation and compact notation, we consider a third order tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I \\times J \\times K}$.\n", 122 | "\n", 123 | "![tensor_substructures](./images/tensor_substructures.png)\n", 124 | "\n", 125 | "1. A **fiber** is a vector obtained by fixing all but one of the indices, e.g. $\\mathbf{\\underline{X}}[i,:,k]$ is the mode-2 fiber (usually refered to as row fiber). \n", 126 | "\n", 127 | "- Fixing all but two of the indices yields a matrix called a **slice** of a tensor, e.g. $\\mathbf{\\underline{X}}[:,:,k]$ is the mode-[1,2] slice (usually refered to as frontal slice).\n", 128 | "\n", 129 | "**Note:** The same principals and definitions can be applied to a tensor of arbitrarily large order. On top of that, one can obtain a **subtensor** by fixing at least three indecies and let other vary.\n", 130 | "\n", 131 | "## Unfolding a tensor\n", 132 | "\n", 133 | "Conventionally, unfolding is considered to be a process of element mapping from a tensor to a matrix. In other words, it arranges the mode-$n$ fibers of a tensor to be the columns of the matrix and denoted as:\n", 134 | "\n", 135 | "$$\\mathbf{\\underline{A}} \\xrightarrow{n} \\mathbf{A}_{(n)}$$\n", 136 | "\n", 137 | "Thus, this operations requires to specify a mode along which a tensor will be unfolded. For a third order tensor, a visually representation of such operation is as following:\n", 138 | "\n", 139 | "![unfolding](./images/unfolding.png)\n", 140 | "\n", 141 | "**Note:** it can be extended to a more general case, when one converts a tensor of order $N$ into a tensor of order $M$ where $N > M$. In this case, one would need to specify a set of modes along which a tensor will be unfolded. \n", 142 | "\n", 143 | "In **`hottbox`** this functionality is available through the corresponding methods of the **`Tensor`** class:\n", 144 | "\n", 145 | "```python\n", 146 | "tensor.unfold(mode=0)\n", 147 | "```\n", 148 | "\n", 149 | "By default, it changes the data array of a tensor. If you want to get unfolded tensor as a new object then use the following:\n", 150 | "\n", 151 | "```python\n", 152 | "tensor_unfolded = tensor.unfold(mode=0, inplace=False)\n", 153 | "```" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 4, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "data": { 163 | "text/plain": [ 164 | "array([[[ 0, 1, 2, 3],\n", 165 | " [ 4, 5, 6, 7],\n", 166 | " [ 8, 9, 10, 11]],\n", 167 | "\n", 168 | " [[12, 13, 14, 15],\n", 169 | " [16, 17, 18, 19],\n", 170 | " [20, 21, 22, 23]]])" 171 | ] 172 | }, 173 | "execution_count": 4, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | } 177 | ], 178 | "source": [ 179 | "array_3d = np.arange(24).reshape((2, 3, 4))\n", 180 | "tensor = Tensor(array_3d)\n", 181 | "tensor.data" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 5, 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "data": { 191 | "text/plain": [ 192 | "array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],\n", 193 | " [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]])" 194 | ] 195 | }, 196 | "execution_count": 5, 197 | "metadata": {}, 198 | "output_type": "execute_result" 199 | } 200 | ], 201 | "source": [ 202 | "tensor_unfolded = tensor.unfold(mode=0, inplace=False)\n", 203 | "tensor_unfolded.data" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 6, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "data": { 213 | "text/plain": [ 214 | "array([[[ 0, 1, 2, 3],\n", 215 | " [ 4, 5, 6, 7],\n", 216 | " [ 8, 9, 10, 11]],\n", 217 | "\n", 218 | " [[12, 13, 14, 15],\n", 219 | " [16, 17, 18, 19],\n", 220 | " [20, 21, 22, 23]]])" 221 | ] 222 | }, 223 | "execution_count": 6, 224 | "metadata": {}, 225 | "output_type": "execute_result" 226 | } 227 | ], 228 | "source": [ 229 | "tensor.data" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 7, 235 | "metadata": {}, 236 | "outputs": [ 237 | { 238 | "data": { 239 | "text/plain": [ 240 | "array([[ 0, 1, 2, 3, 12, 13, 14, 15],\n", 241 | " [ 4, 5, 6, 7, 16, 17, 18, 19],\n", 242 | " [ 8, 9, 10, 11, 20, 21, 22, 23]])" 243 | ] 244 | }, 245 | "execution_count": 7, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "tensor.unfold(mode=1)\n", 252 | "tensor.data" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "## Folding of a tensor\n", 260 | "\n", 261 | "Folding is most commonly referred to as a process of element mapping from a matrix or a vector to a tensor. However, it can be extended to a more general case, when one converts a tensor of order $N$ into a tensor of order $M$ where $N < M$.\n", 262 | "\n", 263 | "![folding](./images/folding.png)\n", 264 | "\n", 265 | "In **`hottbox`** this functionality is available through the corresponding methods of the **`Tensor`** class:\n", 266 | "\n", 267 | "```python\n", 268 | "tensor_unfolded.fold()\n", 269 | "```\n", 270 | "\n", 271 | "By default, it changes the data array of a tensor. If you want to get folded tensor as a new object then use the following:\n", 272 | "\n", 273 | "```python\n", 274 | "tensor_folded = tensor_unfolded.fold(inplace=False)\n", 275 | "```\n", 276 | "\n", 277 | "In **`hottbox`** this operation merely reverts the unfolding operation. Thus, there is no need to pass any parameters (all relevant information is extracted behind the scenes) and can be used only for a tensor in an unfolded state.\n", 278 | "\n", 279 | "**Note:** Canonical folding and unfolding will be implemented in a future releases of **`hottbox`**." 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 8, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "data": { 289 | "text/plain": [ 290 | "array([[[ 0, 1, 2, 3],\n", 291 | " [ 4, 5, 6, 7],\n", 292 | " [ 8, 9, 10, 11]],\n", 293 | "\n", 294 | " [[12, 13, 14, 15],\n", 295 | " [16, 17, 18, 19],\n", 296 | " [20, 21, 22, 23]]])" 297 | ] 298 | }, 299 | "execution_count": 8, 300 | "metadata": {}, 301 | "output_type": "execute_result" 302 | } 303 | ], 304 | "source": [ 305 | "array_3d = np.arange(24).reshape((2, 3, 4))\n", 306 | "tensor = Tensor(array_3d)\n", 307 | "tensor.data" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 9, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/plain": [ 318 | "array([[ 0, 1, 2, 3, 12, 13, 14, 15],\n", 319 | " [ 4, 5, 6, 7, 16, 17, 18, 19],\n", 320 | " [ 8, 9, 10, 11, 20, 21, 22, 23]])" 321 | ] 322 | }, 323 | "execution_count": 9, 324 | "metadata": {}, 325 | "output_type": "execute_result" 326 | } 327 | ], 328 | "source": [ 329 | "tensor.unfold(mode=1)\n", 330 | "tensor.data" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 10, 336 | "metadata": {}, 337 | "outputs": [ 338 | { 339 | "data": { 340 | "text/plain": [ 341 | "array([[[ 0, 1, 2, 3],\n", 342 | " [ 4, 5, 6, 7],\n", 343 | " [ 8, 9, 10, 11]],\n", 344 | "\n", 345 | " [[12, 13, 14, 15],\n", 346 | " [16, 17, 18, 19],\n", 347 | " [20, 21, 22, 23]]])" 348 | ] 349 | }, 350 | "execution_count": 10, 351 | "metadata": {}, 352 | "output_type": "execute_result" 353 | } 354 | ], 355 | "source": [ 356 | "tensor.fold()\n", 357 | "tensor.data" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 11, 363 | "metadata": {}, 364 | "outputs": [ 365 | { 366 | "data": { 367 | "text/plain": [ 368 | "array([[[ 0, 1, 2, 3],\n", 369 | " [ 4, 5, 6, 7],\n", 370 | " [ 8, 9, 10, 11]],\n", 371 | "\n", 372 | " [[12, 13, 14, 15],\n", 373 | " [16, 17, 18, 19],\n", 374 | " [20, 21, 22, 23]]])" 375 | ] 376 | }, 377 | "execution_count": 11, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": [ 383 | "tensor_unfolded = tensor.unfold(mode=1, inplace=False)\n", 384 | "tensor_folded = tensor_unfolded.fold(inplace=False)\n", 385 | "tensor_folded.data" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 12, 391 | "metadata": {}, 392 | "outputs": [ 393 | { 394 | "data": { 395 | "text/plain": [ 396 | "array([[ 0, 1, 2, 3, 12, 13, 14, 15],\n", 397 | " [ 4, 5, 6, 7, 16, 17, 18, 19],\n", 398 | " [ 8, 9, 10, 11, 20, 21, 22, 23]])" 399 | ] 400 | }, 401 | "execution_count": 12, 402 | "metadata": {}, 403 | "output_type": "execute_result" 404 | } 405 | ], 406 | "source": [ 407 | "tensor_unfolded.data" 408 | ] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "## Mode-n product\n", 415 | "\n", 416 | "The mode-$n$ product is the multiplication of a tensor by a matrix along the $n^{th}$ mode of a tensor. This essentially means that each mode-$n$ fiber should be multiplied by this matrix. Mathematically, this is expressed as:\n", 417 | "\n", 418 | "$$\\mathbf{\\underline{X}} \\times_n \\mathbf{A} = \\mathbf{\\underline{Y}} \\quad \\Leftrightarrow \\quad \\mathbf{Y}_{(n)} = \\mathbf{A} \\mathbf{X}_{(n)} $$\n", 419 | "\n", 420 | "![mode_n_product](./images/mode_n_product.png)\n", 421 | "\n", 422 | "Important properties of the mode-$n$ product:\n", 423 | "\n", 424 | "1. For distinct modes in a series of multiplications, the order of the multiplication is irrelevent: \n", 425 | "\n", 426 | " $$\\mathbf{\\underline{X}} \\times_n \\mathbf{A} \\times_m \\mathbf{B} = \\mathbf{\\underline{X}} \\times_m \\mathbf{B} \\times_n \\mathbf{A} \\quad (m \\neq n)$$\n", 427 | "\n", 428 | "- However, it does not hold if the modes are the same :\n", 429 | "\n", 430 | " $$\\mathbf{\\underline{X}} \\times_n \\mathbf{A} \\times_n \\mathbf{B} = \\mathbf{\\underline{X}} \\times_n (\\mathbf{B}\\mathbf{A})$$\n", 431 | "\n", 432 | "In **`hottbox`**, mode-$n$ product is available through the corresponding method of the **`Tensor`** class:\n", 433 | "\n", 434 | "```python\n", 435 | "tensor.mode_n_product(matrix, mode=n)\n", 436 | "```\n", 437 | "\n", 438 | "By default, it changes the data array of a tensor. If you want to get a resulting tensor as a new object use the following:\n", 439 | "\n", 440 | "```python\n", 441 | "tensor.mode_n_product(matrix, mode=n, inplace=False)\n", 442 | "```\n", 443 | "\n", 444 | "Starting from **`hottbox v0.1.3`**, you can perform mode-n product with a **`matrix`** represented either as a **`numpy array`** or as an object of **`Tensor`** class.\n", 445 | "\n", 446 | "In the following example, we will consider the sequence of mode-$n$ products:\n", 447 | "\n", 448 | "$$\\mathbf{\\underline{Y}} = \\mathbf{\\underline{X}} \\times_2 \\mathbf{A} \\times_3 \\mathbf{B}$$\n", 449 | "$$\\mathbf{\\underline{Z}} = \\mathbf{\\underline{X}} \\times_3 \\mathbf{B} \\times_2 \\mathbf{A}$$\n", 450 | "\n", 451 | "Where $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{2 \\times 3 \\times 4}, \\mathbf{A} \\in \\mathbb{R}^{5 \\times 3}$ and $\\mathbf{B} \\in \\mathbb{R}^{6 \\times 4}$. Thus, the resulting tensors $\\mathbf{\\underline{Y}}, \\mathbf{\\underline{Z}}$ will be equal and of shape (2,5,6), e.g. $\\mathbf{\\underline{Y}} \\in \\mathbb{R}^{2 \\times 6 \\times 5}$\n", 452 | "\n", 453 | "In order to perform a sequence of mode-$n$ products, methods can be chained. " 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 13, 459 | "metadata": {}, 460 | "outputs": [ 461 | { 462 | "name": "stdout", 463 | "output_type": "stream", 464 | "text": [ 465 | "The initial shape of tensor X is (2, 3, 4)\n", 466 | "The shape of tensor Y is (2, 5, 6)\n", 467 | "The shape of tensor Z is (2, 5, 6)\n" 468 | ] 469 | } 470 | ], 471 | "source": [ 472 | "I, J, K = 2, 3, 4\n", 473 | "J_new, K_new = 5, 6\n", 474 | "\n", 475 | "array_3d = np.arange(I * J * K).reshape(I, J ,K)\n", 476 | "X = Tensor(array_3d)\n", 477 | "A = np.arange(J_new * J).reshape(J_new, J)\n", 478 | "B = np.arange(K_new * K).reshape(K_new, K)\n", 479 | "\n", 480 | "Y = X.mode_n_product(A, mode=1, inplace=False).mode_n_product(B, mode=2, inplace=False)\n", 481 | "\n", 482 | "# Perform mode-n product in reversed order\n", 483 | "Z = X.mode_n_product(B, mode=2, inplace=False).mode_n_product(A, mode=1, inplace=False)\n", 484 | "\n", 485 | "print('The initial shape of tensor X is {}'.format(X.shape))\n", 486 | "print('The shape of tensor Y is {}'.format(Y.shape))\n", 487 | "print('The shape of tensor Z is {}'.format(Z.shape))" 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "Next, we will change a tensor data itself by applying the same mode-$n$ products to it." 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 14, 500 | "metadata": {}, 501 | "outputs": [ 502 | { 503 | "name": "stdout", 504 | "output_type": "stream", 505 | "text": [ 506 | "The shape of tensor X is (2, 5, 6)\n" 507 | ] 508 | } 509 | ], 510 | "source": [ 511 | "X.mode_n_product(A, mode=1).mode_n_product(B, mode=2)\n", 512 | "print('The shape of tensor X is {}'.format(X.shape))" 513 | ] 514 | }, 515 | { 516 | "cell_type": "markdown", 517 | "metadata": {}, 518 | "source": [ 519 | "Here, despite the **`X`**, **`Y`** and **`Z`** are being different objects, their data values will remain the same since that same operation were applied to them. We can verify that by:\n", 520 | "1. Substraction of their data arrays which should result in an array filled with zeros\n", 521 | "- Using numpy assertion utility which should not raise an **`AssertionError`**.\n", 522 | "\n", 523 | "We will use the second option." 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": 15, 529 | "metadata": {}, 530 | "outputs": [ 531 | { 532 | "name": "stdout", 533 | "output_type": "stream", 534 | "text": [ 535 | "The underlying data arrays are equal for all of them.\n" 536 | ] 537 | } 538 | ], 539 | "source": [ 540 | "np.testing.assert_array_equal(Y.data, Z.data)\n", 541 | "np.testing.assert_array_equal(X.data, Y.data)\n", 542 | "np.testing.assert_array_equal(X.data, Z.data)\n", 543 | "print('The underlying data arrays are equal for all of them.')" 544 | ] 545 | }, 546 | { 547 | "cell_type": "markdown", 548 | "metadata": {}, 549 | "source": [ 550 | "# Additional notes on API of Tensor class\n", 551 | "\n", 552 | "1. When object of **`Tensor`** class is created, the numy array with data values is stored in **`_data`** placeholder with the correspndong property **`data`** for accessing it. If you want to modify these values, then call the corresponding transformation methods available for the **`Tensor`** class." 553 | ] 554 | }, 555 | { 556 | "cell_type": "markdown", 557 | "metadata": {}, 558 | "source": [ 559 | "# Further reading list\n", 560 | "- Tamara G. Kolda and Brett W. Bader, \"Tensor decompositions and applications.\" SIAM REVIEW, 51(3):455–500, 2009." 561 | ] 562 | } 563 | ], 564 | "metadata": { 565 | "kernelspec": { 566 | "display_name": "hottbox-tutorials", 567 | "language": "python", 568 | "name": "hottbox-tutorials" 569 | }, 570 | "language_info": { 571 | "codemirror_mode": { 572 | "name": "ipython", 573 | "version": 3 574 | }, 575 | "file_extension": ".py", 576 | "mimetype": "text/x-python", 577 | "name": "python", 578 | "nbconvert_exporter": "python", 579 | "pygments_lexer": "ipython3", 580 | "version": "3.6.6" 581 | } 582 | }, 583 | "nbformat": 4, 584 | "nbformat_minor": 2 585 | } 586 | -------------------------------------------------------------------------------- /2_Efficient_representations_of_tensors.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Efficient representation of multidimensional arrays.\n", 8 | "### Last modification (05.06.2018)\n", 9 | "\n", 10 | "![storage_complexity](./images/storage_complexity.png)\n", 11 | "\n", 12 | "\n", 13 | "In this tutorial we provide a theoretical backgound on efficient representation of multidimensional arrays and show how these data structures are integrated into [hottbox](https://github.com/hottbox/hottbox) through **TensorCPD**, **TensorTKD** and **TensorTT** classes.\n", 14 | "\n", 15 | "More details on **TensorCPD**, **TensorTKD** and **TensorTT** classes can be found on our [documentation page](https://hottbox.github.io/stable/api/hottbox.core.html#module-hottbox.core).\n", 16 | "\n", 17 | "**Note:** this tutorial assumes that you are familiar with the basics of tensor algebra and the corresponding conventional notation. If you are new to this area, the required background is covered in our [introductory notebook](https://github.com/hottbox/hottbox-tutorials/blob/master/1_N-dimensional_arrays_and_Tensor_class.ipynb).\n", 18 | "\n", 19 | "**Requirements:** ``hottbox==0.1.3``\n", 20 | "\n", 21 | "**Authors:** \n", 22 | "Ilya Kisil (ilyakisil@gmail.com); \n", 23 | "Giuseppe G. Calvi (ggc115@ic.ac.uk)" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import numpy as np\n", 33 | "from hottbox.core import Tensor, TensorCPD, TensorTKD, TensorTT" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "# Outer product, rank-1 tensor and definitions of rank of a multi-dimensional array.\n", 41 | "\n", 42 | "\n", 43 | "The central operator in tensor analysis is the outer product (sometimes refered to as the tensor product). \n", 44 | "Consider tensors $\\mathbf{\\underline{A}} \\in \\mathbb{R}^{I_1 \\times \\cdots \\times I_N}$ and $\\mathbf{\\underline{B}} \\in \\mathbb{R}^{J_1 \\times \\cdots \\times J_M}$, then their outer product yeilds a tensor of higher order then both of them:\n", 45 | "\n", 46 | "$$\n", 47 | "\\begin{equation}\n", 48 | "\\begin{aligned}\n", 49 | " \\mathbf{\\underline{A}} \\circ \\mathbf{\\underline{B}} &= \\mathbf{\\underline{C}} \\in \\mathbb{R}^{I_1 \\times \\cdots \\times I_N \\times J_1 \\times \\cdots \\times J_M} \\\\\n", 50 | " a_{i_1,\\dots,i_N}b_{j_1,\\dots,j_M} &= c_{i_1,\\dots,i_N,j_1,\\dots,j_M} \n", 51 | "\\end{aligned} \n", 52 | "\\end{equation}\n", 53 | "$$\n", 54 | "\n", 55 | "Most of the time we deal with the outer product of vectors, which significanlty simplifies the general form expressed above and establishes one the of the most fundamenatal definitions. A tensor of order $N$ is said to be of **rank-1** if it can be represented as an outer product of $N$ vectors. The figure below illustrates an example of rank-1 tensor $\\mathbf{\\underline{X}}$ and provides intuition of how operation of outer product is computed:\n", 56 | "\n", 57 | "![outerproduct](./images/outerproduct_3.png)\n", 58 | "\n", 59 | "There are several forms of the rank of N-dimensional arrays each of which is accosiated with a representation of a tensor in a particular form:\n", 60 | "\n", 61 | "1. Kruskal rank $\\rightarrow$ canonical polyadic form.\n", 62 | "\n", 63 | "- Multi-linear rank $\\rightarrow$ tucker form.\n", 64 | "\n", 65 | "- TT rank $\\rightarrow$ tensor train form.\n", 66 | "\n", 67 | "Each of this representations has the correposing class: **``TensorCPD``**, **``TensorTKD``**, **``TensorTT``**. All of them come with almost identical API except for obejct creation and, as a result, the names for some attributes. But before, we can proceed, it is crucial to get acquainted with the following definitions." 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "# Canonical Polydiac representation (CP), Kruskal rank and TensorCPD class\n", 75 | "\n", 76 | "![cpd_as_rank_one](./images/cpd_as_rank_one.png)\n", 77 | "\n", 78 | "## Kryskal rank\n", 79 | "This figure illustrates a tensor $\\mathbf{\\underline{X}}$ of rank $R$. The **rank** of a tensor $\\mathbf{\\underline{X}}$ is defined as the smallest number of rank-one tensors that produce $\\mathbf{\\underline{X}}$ as their linear combination. This definition of a tensor rank is also known as the **Kruskal rank**.\n", 80 | "\n", 81 | "## CP representation\n", 82 | "For a third order tensor or rank $R$ it can be expressed as follows:\n", 83 | "\n", 84 | "$$\\mathbf{\\underline{X}} = \\sum_{r=1}^R \\mathbf{\\underline{X}}_r = \\sum_{r=1}^R \\lambda_{r} \\cdot \\mathbf{a}_r \\circ \\mathbf{b}_r \\circ \\mathbf{c}_r$$\n", 85 | "\n", 86 | "The vectors $\\mathbf{a}_r, \\mathbf{b}_r$ and $\\mathbf{c}_r$ are oftentime combined into corresponding **factor matrices**:\n", 87 | "\n", 88 | "$$\n", 89 | "\\mathbf{A} = \\Big[ \\mathbf{a}_1 \\cdots \\mathbf{a}_R \\Big] \\quad\n", 90 | "\\mathbf{B} = \\Big[ \\mathbf{b}_1 \\cdots \\mathbf{b}_R \\Big] \\quad\n", 91 | "\\mathbf{C} = \\Big[ \\mathbf{c}_1 \\cdots \\mathbf{c}_R \\Big] \\quad\n", 92 | "$$\n", 93 | "\n", 94 | "Thus, if we employ the mode-$n$ product, the canonical polyadic representation takes form:\n", 95 | "\n", 96 | "$$\n", 97 | "\\mathbf{\\underline{X}} = \\mathbf{\\underline{\\Lambda}} \\times_1 \\mathbf{A} \\times_2 \\mathbf{B} \\times_3 \\mathbf{C} = \\Big[\\mathbf{\\underline{\\Lambda}}; \\mathbf{A}, \\mathbf{B}, \\mathbf{C} \\Big]\n", 98 | "$$\n", 99 | "\n", 100 | "where the elements on the super-diagonal of $\\mathbf{\\underline{\\Lambda}}$ are occupied by the values $\\lambda_r$ and all other equal to zero. This is the **canonical polyadic (CP)** representation of the original tensor\n", 101 | "and can be visualised as shown on figure below:\n", 102 | "\n", 103 | "![tensorcpd](./images/TensorCPD.png)\n", 104 | "\n", 105 | "\n", 106 | "## TensorCPD class in hottbox\n", 107 | "\n", 108 | "In **`hottbox`**, this form is available through the **``TensorCPD``** class. In order to create such object, you need to pass a list of factor matrices (2d numpy arrays) and a vector of values (as 1d numpy array) for the main diagonal:\n", 109 | "\n", 110 | "```python\n", 111 | "tensor_cpd = TensorCPD(fmat=[A, B, C], core_values=values)\n", 112 | "```\n", 113 | "\n", 114 | "**Note:** all matrices should have the same number of columns and be equal to the length of ``values``" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 2, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "Kruskal representation of a tensor with rank=(2,).\n", 127 | "Factor matrices represent properties: ['mode-0', 'mode-1', 'mode-2']\n", 128 | "With corresponding latent components described by (3, 4, 5) features respectively.\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "I, J, K = 3, 4, 5 # define shape of the tensor in full form\n", 134 | "R = 2 # define Kryskal rank of a tensor in CP form \n", 135 | "\n", 136 | "A = np.arange(I * R).reshape(I, R)\n", 137 | "B = np.arange(J * R).reshape(J, R)\n", 138 | "C = np.arange(K * R).reshape(K, R)\n", 139 | "values = np.arange(R)\n", 140 | "\n", 141 | "tensor_cpd = TensorCPD(fmat=[A, B, C], core_values=values)\n", 142 | "print(tensor_cpd)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "The list of factor matrices **[A, B, C]** is stored in **`_fmat`** placeholder which can (should) be accessed through the correspodning property **`fmat`**. The values for the super-diagonal are stored in **`_core_values`** placeholder. But there is no direct access to them, because they are used fore creation of the core tensor:\n", 150 | "\n", 151 | "```python\n", 152 | "tensor_cpd.core\n", 153 | "```\n", 154 | "\n", 155 | "This returns an object of the **``Tensor``** class with the **``_core_values``** placed on its super-diagonal." 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 3, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "\tFactor matrices\n", 168 | "Mode-0 factor matrix is of shape (3, 2)\n", 169 | "Mode-1 factor matrix is of shape (4, 2)\n", 170 | "Mode-2 factor matrix is of shape (5, 2)\n", 171 | "\n", 172 | "\tCore tensor\n", 173 | "This tensor is of order 3 and consists of 8 elements.\n", 174 | "Sizes and names of its modes are (2, 2, 2) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n" 175 | ] 176 | }, 177 | { 178 | "data": { 179 | "text/plain": [ 180 | "array([[[0., 0.],\n", 181 | " [0., 0.]],\n", 182 | "\n", 183 | " [[0., 0.],\n", 184 | " [0., 1.]]])" 185 | ] 186 | }, 187 | "execution_count": 3, 188 | "metadata": {}, 189 | "output_type": "execute_result" 190 | } 191 | ], 192 | "source": [ 193 | "print('\\tFactor matrices')\n", 194 | "for mode, fmat in enumerate(tensor_cpd.fmat):\n", 195 | " print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))\n", 196 | " \n", 197 | "print('\\n\\tCore tensor')\n", 198 | "print(tensor_cpd.core)\n", 199 | "tensor_cpd.core.data" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "In order to convert **``TensorCPD``** into the full representation, simply call: \n", 207 | "\n", 208 | "```python\n", 209 | "tensor_cpd.reconstruct()\n", 210 | "```\n", 211 | "\n", 212 | "This returns an object of the **``Tensor``** class with N-dimensional array calculated as described above and being assinged to the **``_data``** attibute." 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 4, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "name": "stdout", 222 | "output_type": "stream", 223 | "text": [ 224 | "This tensor is of order 3 and consists of 60 elements.\n", 225 | "Sizes and names of its modes are (3, 4, 5) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n" 226 | ] 227 | }, 228 | { 229 | "data": { 230 | "text/plain": [ 231 | "array([[[ 1., 3., 5., 7., 9.],\n", 232 | " [ 3., 9., 15., 21., 27.],\n", 233 | " [ 5., 15., 25., 35., 45.],\n", 234 | " [ 7., 21., 35., 49., 63.]],\n", 235 | "\n", 236 | " [[ 3., 9., 15., 21., 27.],\n", 237 | " [ 9., 27., 45., 63., 81.],\n", 238 | " [ 15., 45., 75., 105., 135.],\n", 239 | " [ 21., 63., 105., 147., 189.]],\n", 240 | "\n", 241 | " [[ 5., 15., 25., 35., 45.],\n", 242 | " [ 15., 45., 75., 105., 135.],\n", 243 | " [ 25., 75., 125., 175., 225.],\n", 244 | " [ 35., 105., 175., 245., 315.]]])" 245 | ] 246 | }, 247 | "execution_count": 4, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "tensor_full = tensor_cpd.reconstruct()\n", 254 | "print(tensor_full)\n", 255 | "tensor_full.data" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "# Tucker representation, Multi-linear rank and TensorTKD class\n", 263 | "\n", 264 | "## Multi-linear rank\n", 265 | "\n", 266 | "The **multi-linear rank** of a tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I_1 \\times \\cdots \\times I_N}$ is the $N$-tuple $(R_1, \\dots, R_N)$ where each $R_n$ is the rank of the subspace spanned by mode-$n$ fibers, i.e. $R_n = \\text{rank} \\big( \\mathbf{X}_{(n)} \\big)$. For a tensor of order $N$ the values $R_1, R_2, \\dots , R_N$ are not necessarily the same, whereas, for matrices (tensors of order 2) the equality $R_1 = R_2$ always holds, where $R_1$ and $R_2$ are the matrix column rank and row rank respectively.\n", 267 | "\n", 268 | "\n", 269 | "## Tucker representation\n", 270 | "![tensortkd](./images/TensorTKD.png)\n", 271 | "\n", 272 | "For a tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I \\times J \\times K}$ illustrated above, the **tucker form** represents it as a dense core tensor $\\mathbf{\\underline{G}}$ with multi-linear rank ($Q, R, P$) and a set of factor matrices $\\mathbf{A} \\in \\mathbb{R}^{I \\times Q}, \\mathbf{B} \\in \\mathbb{R}^{J \\times R}$ and $\\mathbf{C} \\in \\mathbb{R}^{K \\times P}$.\n", 273 | "\n", 274 | "The tucker form of a tensor is closely related to the CP form and can be expressed through a \n", 275 | "sequence of mode-$n$ products in a similar way.\n", 276 | "\n", 277 | "$$\n", 278 | "\\mathbf{\\underline{X}} = \\mathbf{\\underline{G}} \\times_1 \\mathbf{A} \\times_2 \\mathbf{B} \\times_3 \\mathbf{C} = \\Big[\\mathbf{\\underline{G}}; \\mathbf{A}, \\mathbf{B}, \\mathbf{C} \\Big]\n", 279 | "$$\n", 280 | "\n", 281 | "## TensorTKD class in hottbox\n", 282 | "\n", 283 | "In **`hottbox`**, this form is available through the **``TensorTKD``** class. In order to create such object, you need to pass a list of $N$ factor matrices (2d numpy arrays) and values for the core tensor (as n-dimensional numpy array):\n", 284 | "\n", 285 | "```python\n", 286 | "tensor_tkd = TensorTKD(fmat=[A, B, C], core_values=values)\n", 287 | "```\n", 288 | "\n", 289 | "**Note:** the number of columns in each of the factor matrices should be the same as the corresponding size of the numpy array with the values for the core tensor" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 5, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "name": "stdout", 299 | "output_type": "stream", 300 | "text": [ 301 | "Tucker representation of a tensor with multi-linear rank=(2, 3, 4).\n", 302 | "Factor matrices represent properties: ['mode-0', 'mode-1', 'mode-2']\n", 303 | "With corresponding latent components described by (5, 6, 7) features respectively.\n" 304 | ] 305 | } 306 | ], 307 | "source": [ 308 | "I, J, K = 5, 6, 7 # define shape of the tensor in full form\n", 309 | "Q, R, P = 2, 3, 4 # define multi-linear rank of the tensor in Tucker form\n", 310 | "\n", 311 | "A = np.arange(I * Q).reshape(I, Q)\n", 312 | "B = np.arange(J * R).reshape(J, R)\n", 313 | "C = np.arange(K * P).reshape(K, P)\n", 314 | "values = np.arange(Q * R * P).reshape(Q, R, P)\n", 315 | "\n", 316 | "tensor_tkd = TensorTKD(fmat=[A, B, C], core_values=values)\n", 317 | "print(tensor_tkd)" 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": {}, 323 | "source": [ 324 | "By analogy with the **`TensorCPD`**, the list of factor matrices **[A, B, C]** is stored in **`_fmat`** placeholder which can (should) be accessed through the correspodning property **`fmat`**. Similarly, the values of the core tensor are stored in **`_core_values`** placeholder and they cannot (should not) be accessed directly, because they are used to create a core tensors as an object of **`Tensor`** class, when the corresponding property is called:\n", 325 | "\n", 326 | "```python\n", 327 | "tensor_tkd.core\n", 328 | "```\n", 329 | "\n", 330 | "**Note:** the core values occupy all data values of a core tensor, as opposed to **`TensorCPD`** class where they are placed on the main diagonal." 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 6, 336 | "metadata": {}, 337 | "outputs": [ 338 | { 339 | "name": "stdout", 340 | "output_type": "stream", 341 | "text": [ 342 | "\tFactor matrices\n", 343 | "Mode-0 factor matrix is of shape (5, 2)\n", 344 | "Mode-1 factor matrix is of shape (6, 3)\n", 345 | "Mode-2 factor matrix is of shape (7, 4)\n", 346 | "\n", 347 | "\tCore tensor\n", 348 | "This tensor is of order 3 and consists of 24 elements.\n", 349 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n" 350 | ] 351 | }, 352 | { 353 | "data": { 354 | "text/plain": [ 355 | "array([[[ 0, 1, 2, 3],\n", 356 | " [ 4, 5, 6, 7],\n", 357 | " [ 8, 9, 10, 11]],\n", 358 | "\n", 359 | " [[12, 13, 14, 15],\n", 360 | " [16, 17, 18, 19],\n", 361 | " [20, 21, 22, 23]]])" 362 | ] 363 | }, 364 | "execution_count": 6, 365 | "metadata": {}, 366 | "output_type": "execute_result" 367 | } 368 | ], 369 | "source": [ 370 | "print('\\tFactor matrices')\n", 371 | "for mode, fmat in enumerate(tensor_tkd.fmat):\n", 372 | " print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))\n", 373 | " \n", 374 | "print('\\n\\tCore tensor')\n", 375 | "print(tensor_tkd.core)\n", 376 | "tensor_tkd.core.data" 377 | ] 378 | }, 379 | { 380 | "cell_type": "markdown", 381 | "metadata": {}, 382 | "source": [ 383 | "In order to convert **``TensorTKD``** into the full representation, simply call: \n", 384 | "\n", 385 | "```python\n", 386 | "tensor_tkd.reconstruct()\n", 387 | "```\n", 388 | "\n", 389 | "This return an object of the **``Tensor``** class with N-dimensional array calculated as \n", 390 | "described above and being assinged to the **``_data``** attibute." 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 7, 396 | "metadata": {}, 397 | "outputs": [ 398 | { 399 | "name": "stdout", 400 | "output_type": "stream", 401 | "text": [ 402 | "This tensor is of order 3 and consists of 210 elements.\n", 403 | "Sizes and names of its modes are (5, 6, 7) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n" 404 | ] 405 | }, 406 | { 407 | "data": { 408 | "text/plain": [ 409 | "array([[[ 378, 1346, 2314, 3282, 4250, 5218, 6186],\n", 410 | " [ 1368, 4856, 8344, 11832, 15320, 18808, 22296],\n", 411 | " [ 2358, 8366, 14374, 20382, 26390, 32398, 38406],\n", 412 | " [ 3348, 11876, 20404, 28932, 37460, 45988, 54516],\n", 413 | " [ 4338, 15386, 26434, 37482, 48530, 59578, 70626],\n", 414 | " [ 5328, 18896, 32464, 46032, 59600, 73168, 86736]],\n", 415 | "\n", 416 | " [[ 1458, 5146, 8834, 12522, 16210, 19898, 23586],\n", 417 | " [ 5112, 17944, 30776, 43608, 56440, 69272, 82104],\n", 418 | " [ 8766, 30742, 52718, 74694, 96670, 118646, 140622],\n", 419 | " [ 12420, 43540, 74660, 105780, 136900, 168020, 199140],\n", 420 | " [ 16074, 56338, 96602, 136866, 177130, 217394, 257658],\n", 421 | " [ 19728, 69136, 118544, 167952, 217360, 266768, 316176]],\n", 422 | "\n", 423 | " [[ 2538, 8946, 15354, 21762, 28170, 34578, 40986],\n", 424 | " [ 8856, 31032, 53208, 75384, 97560, 119736, 141912],\n", 425 | " [ 15174, 53118, 91062, 129006, 166950, 204894, 242838],\n", 426 | " [ 21492, 75204, 128916, 182628, 236340, 290052, 343764],\n", 427 | " [ 27810, 97290, 166770, 236250, 305730, 375210, 444690],\n", 428 | " [ 34128, 119376, 204624, 289872, 375120, 460368, 545616]],\n", 429 | "\n", 430 | " [[ 3618, 12746, 21874, 31002, 40130, 49258, 58386],\n", 431 | " [ 12600, 44120, 75640, 107160, 138680, 170200, 201720],\n", 432 | " [ 21582, 75494, 129406, 183318, 237230, 291142, 345054],\n", 433 | " [ 30564, 106868, 183172, 259476, 335780, 412084, 488388],\n", 434 | " [ 39546, 138242, 236938, 335634, 434330, 533026, 631722],\n", 435 | " [ 48528, 169616, 290704, 411792, 532880, 653968, 775056]],\n", 436 | "\n", 437 | " [[ 4698, 16546, 28394, 40242, 52090, 63938, 75786],\n", 438 | " [ 16344, 57208, 98072, 138936, 179800, 220664, 261528],\n", 439 | " [ 27990, 97870, 167750, 237630, 307510, 377390, 447270],\n", 440 | " [ 39636, 138532, 237428, 336324, 435220, 534116, 633012],\n", 441 | " [ 51282, 179194, 307106, 435018, 562930, 690842, 818754],\n", 442 | " [ 62928, 219856, 376784, 533712, 690640, 847568, 1004496]]])" 443 | ] 444 | }, 445 | "execution_count": 7, 446 | "metadata": {}, 447 | "output_type": "execute_result" 448 | } 449 | ], 450 | "source": [ 451 | "tensor_full = tensor_tkd.reconstruct()\n", 452 | "print(tensor_full)\n", 453 | "tensor_full.data" 454 | ] 455 | }, 456 | { 457 | "cell_type": "markdown", 458 | "metadata": {}, 459 | "source": [ 460 | "# Tensor Train representation, TT-rank and TensorTT class\n", 461 | "\n", 462 | "## Tensor Train representation\n", 463 | "\n", 464 | "![tensortt](./images/TensorTT.png)" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "**Tensor trains (TTs)** are the simplest kinds of tensor networks, i.e. a decomposition of a high-order tensor in a set of sparsely interconnected lower-order tensors and factor matrices. Mathematically, an $N$-th order tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I_1 \\times I_2 \\times \\cdots \\times I_N}$ can be expressed as a TT as\n", 472 | "\n", 473 | "$$\n", 474 | "\\mathbf{\\underline{X}} = \\mathbf{A} \\times^1_2 \\mathbf{\\underline{G}}^{(1)} \\times^1_3 \\mathbf{\\underline{G}}^{(2)} \\times^1_3 \\cdots \\times^1_3 \\mathbf{\\underline{G}}^{(N-1)} \\times^1_3 \\mathbf{B} = \\Big[ \\mathbf{A}, \\mathbf{\\underline{G}}^{(1)}, \\mathbf{\\underline{G}}^{(2)}, \\cdots, \\mathbf{\\underline{G}}^{(N-1)}, \\mathbf{B} \\Big]\n", 475 | "$$\n", 476 | "\n", 477 | "Each element of a TT is generally referred to as **TT-core**, and $\\mathbf{A} \\in \\mathbb{R}^{I_1 \\times R_1}$, $\\mathbf{B} \\in \\mathbb{R}^{R_{N-1}\\times I_N}$, $\\mathbf{\\underline{G}}^{(n)} \\in \\mathbb{R}^{R_n \\times I_{n+1} \\times R_{n+1}}$ and the tuple $(R_1, R_2, \\dots, R_{N-1})$ is called the **TT-rank**.\n" 478 | ] 479 | }, 480 | { 481 | "cell_type": "markdown", 482 | "metadata": {}, 483 | "source": [ 484 | "## TensorTT class in hottbox\n", 485 | "\n", 486 | "In **`hottbox`**, this form is available through the **``TensorTT``** class. In order to create such object, you need to pass a list of values (as numpy arrays) for \n", 487 | "cores:\n", 488 | "\n", 489 | "```python\n", 490 | "tensor_tt = TensorTT(core_values=values)\n", 491 | "```" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": 8, 497 | "metadata": {}, 498 | "outputs": [ 499 | { 500 | "name": "stdout", 501 | "output_type": "stream", 502 | "text": [ 503 | "Tensor train representation of a tensor with tt-rank=(2, 3).\n", 504 | "Shape of this representation in the full format is (4, 5, 6).\n", 505 | "Physical modes of its cores represent properties: ['mode-0', 'mode-1', 'mode-2']\n" 506 | ] 507 | } 508 | ], 509 | "source": [ 510 | "I, J, K = 4, 5, 6 # define shape of the tensor in full form\n", 511 | "R1, R2 = 2, 3 # define tt rank of the tensor in Tensor train form\n", 512 | "\n", 513 | "values_1 = np.arange(I * R1).reshape(I, R1)\n", 514 | "values_2 = np.arange(R1 * J * R2).reshape(R1, J, R2)\n", 515 | "values_3 = np.arange(R2 * K).reshape(R2, K)\n", 516 | "\n", 517 | "tensor_tt = TensorTT(core_values=[values_1, values_2, values_3])\n", 518 | "print(tensor_tt)" 519 | ] 520 | }, 521 | { 522 | "cell_type": "markdown", 523 | "metadata": {}, 524 | "source": [ 525 | "The list of values for these core tensors is stored in **`_core_values`** placeholder. They should not be accessed directly, because they are used\n", 526 | "for creation of **`Tensor`** class objects each of which represent a particular tt-core. The list of all cores can be accessed as \n", 527 | "\n", 528 | "```python\n", 529 | "tensor_tt.cores\n", 530 | "```\n", 531 | "\n", 532 | "**Note:** All components of the Tensor Train representation are conventionally considered to be a core therefore, even matrices are objects of **`Tensor`** class." 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 9, 538 | "metadata": {}, 539 | "outputs": [ 540 | { 541 | "name": "stdout", 542 | "output_type": "stream", 543 | "text": [ 544 | "\n", 545 | "\tCore tensor #0 of TT representation\n", 546 | "This tensor is of order 2 and consists of 8 elements.\n", 547 | "Sizes and names of its modes are (4, 2) and ['mode-0', 'mode-1'] respectively.\n", 548 | "[[0 1]\n", 549 | " [2 3]\n", 550 | " [4 5]\n", 551 | " [6 7]]\n", 552 | "\n", 553 | "\tCore tensor #1 of TT representation\n", 554 | "This tensor is of order 3 and consists of 30 elements.\n", 555 | "Sizes and names of its modes are (2, 5, 3) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n", 556 | "[[[ 0 1 2]\n", 557 | " [ 3 4 5]\n", 558 | " [ 6 7 8]\n", 559 | " [ 9 10 11]\n", 560 | " [12 13 14]]\n", 561 | "\n", 562 | " [[15 16 17]\n", 563 | " [18 19 20]\n", 564 | " [21 22 23]\n", 565 | " [24 25 26]\n", 566 | " [27 28 29]]]\n", 567 | "\n", 568 | "\tCore tensor #2 of TT representation\n", 569 | "This tensor is of order 2 and consists of 18 elements.\n", 570 | "Sizes and names of its modes are (3, 6) and ['mode-0', 'mode-1'] respectively.\n", 571 | "[[ 0 1 2 3 4 5]\n", 572 | " [ 6 7 8 9 10 11]\n", 573 | " [12 13 14 15 16 17]]\n" 574 | ] 575 | } 576 | ], 577 | "source": [ 578 | "for i, tt_core in enumerate(tensor_tt.cores): \n", 579 | " print('\\n\\tCore tensor #{} of TT representation'.format(i)) \n", 580 | " print(tt_core) \n", 581 | " print(tt_core.data)" 582 | ] 583 | }, 584 | { 585 | "cell_type": "markdown", 586 | "metadata": {}, 587 | "source": [ 588 | "If you what to access a specific tt-core of the TT representation, then it is more efficient to use a corresponding method which takes a positional number of desired core as input parameters\n", 589 | "\n", 590 | "```python\n", 591 | "tensor_tt.core(i=0)\n", 592 | "```\n", 593 | "\n", 594 | "**Note:** this parameter should not exceed the order of TT representation" 595 | ] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "execution_count": 10, 600 | "metadata": {}, 601 | "outputs": [ 602 | { 603 | "name": "stdout", 604 | "output_type": "stream", 605 | "text": [ 606 | "\n", 607 | "\tCore tensor #0 of TT representation\n", 608 | "This tensor is of order 2 and consists of 8 elements.\n", 609 | "Sizes and names of its modes are (4, 2) and ['mode-0', 'mode-1'] respectively.\n", 610 | "[[0 1]\n", 611 | " [2 3]\n", 612 | " [4 5]\n", 613 | " [6 7]]\n", 614 | "\n", 615 | "\tCore tensor #1 of TT representation\n", 616 | "This tensor is of order 3 and consists of 30 elements.\n", 617 | "Sizes and names of its modes are (2, 5, 3) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n", 618 | "[[[ 0 1 2]\n", 619 | " [ 3 4 5]\n", 620 | " [ 6 7 8]\n", 621 | " [ 9 10 11]\n", 622 | " [12 13 14]]\n", 623 | "\n", 624 | " [[15 16 17]\n", 625 | " [18 19 20]\n", 626 | " [21 22 23]\n", 627 | " [24 25 26]\n", 628 | " [27 28 29]]]\n", 629 | "\n", 630 | "\tCore tensor #2 of TT representation\n", 631 | "This tensor is of order 2 and consists of 18 elements.\n", 632 | "Sizes and names of its modes are (3, 6) and ['mode-0', 'mode-1'] respectively.\n", 633 | "[[ 0 1 2 3 4 5]\n", 634 | " [ 6 7 8 9 10 11]\n", 635 | " [12 13 14 15 16 17]]\n" 636 | ] 637 | } 638 | ], 639 | "source": [ 640 | "for i in range(tensor_tt.order):\n", 641 | " tt_core = tensor_tt.core(i)\n", 642 | " print('\\n\\tCore tensor #{} of TT representation'.format(i)) \n", 643 | " print(tt_core) \n", 644 | " print(tt_core.data)" 645 | ] 646 | }, 647 | { 648 | "cell_type": "markdown", 649 | "metadata": {}, 650 | "source": [ 651 | "In order to convert **``TensorTT``** into the full representation, simply call: \n", 652 | "\n", 653 | "```python\n", 654 | "tensor_tt.reconstruct()\n", 655 | "```\n", 656 | "\n", 657 | "This return an object of the **``Tensor``** class with N-dimensional array calculated as described above and being assinged to the **``_data``** attibute." 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": 11, 663 | "metadata": {}, 664 | "outputs": [ 665 | { 666 | "name": "stdout", 667 | "output_type": "stream", 668 | "text": [ 669 | "This tensor is of order 3 and consists of 120 elements.\n", 670 | "Sizes and names of its modes are (4, 5, 6) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n" 671 | ] 672 | }, 673 | { 674 | "data": { 675 | "text/plain": [ 676 | "array([[[ 300, 348, 396, 444, 492, 540],\n", 677 | " [ 354, 411, 468, 525, 582, 639],\n", 678 | " [ 408, 474, 540, 606, 672, 738],\n", 679 | " [ 462, 537, 612, 687, 762, 837],\n", 680 | " [ 516, 600, 684, 768, 852, 936]],\n", 681 | "\n", 682 | " [[ 960, 1110, 1260, 1410, 1560, 1710],\n", 683 | " [1230, 1425, 1620, 1815, 2010, 2205],\n", 684 | " [1500, 1740, 1980, 2220, 2460, 2700],\n", 685 | " [1770, 2055, 2340, 2625, 2910, 3195],\n", 686 | " [2040, 2370, 2700, 3030, 3360, 3690]],\n", 687 | "\n", 688 | " [[1620, 1872, 2124, 2376, 2628, 2880],\n", 689 | " [2106, 2439, 2772, 3105, 3438, 3771],\n", 690 | " [2592, 3006, 3420, 3834, 4248, 4662],\n", 691 | " [3078, 3573, 4068, 4563, 5058, 5553],\n", 692 | " [3564, 4140, 4716, 5292, 5868, 6444]],\n", 693 | "\n", 694 | " [[2280, 2634, 2988, 3342, 3696, 4050],\n", 695 | " [2982, 3453, 3924, 4395, 4866, 5337],\n", 696 | " [3684, 4272, 4860, 5448, 6036, 6624],\n", 697 | " [4386, 5091, 5796, 6501, 7206, 7911],\n", 698 | " [5088, 5910, 6732, 7554, 8376, 9198]]])" 699 | ] 700 | }, 701 | "execution_count": 11, 702 | "metadata": {}, 703 | "output_type": "execute_result" 704 | } 705 | ], 706 | "source": [ 707 | "tensor_full = tensor_tt.reconstruct()\n", 708 | "print(tensor_full)\n", 709 | "tensor_full.data" 710 | ] 711 | }, 712 | { 713 | "cell_type": "markdown", 714 | "metadata": {}, 715 | "source": [ 716 | "# Further reading list\n", 717 | "- Tamara G. Kolda and Brett W. Bader, \"Tensor decompositions and applications.\" SIAM REVIEW, 51(3):455–500, 2009.\n", 718 | "\n", 719 | "- Ivan V. Oseledets, \"Tensor-train decomposition.\" SIAM Journal on Scientific Computing 33.5 (2011): 2295-2317." 720 | ] 721 | } 722 | ], 723 | "metadata": { 724 | "kernelspec": { 725 | "display_name": "hottbox-tutorials", 726 | "language": "python", 727 | "name": "hottbox-tutorials" 728 | }, 729 | "language_info": { 730 | "codemirror_mode": { 731 | "name": "ipython", 732 | "version": 3 733 | }, 734 | "file_extension": ".py", 735 | "mimetype": "text/x-python", 736 | "name": "python", 737 | "nbconvert_exporter": "python", 738 | "pygments_lexer": "ipython3", 739 | "version": "3.6.6" 740 | } 741 | }, 742 | "nbformat": 4, 743 | "nbformat_minor": 2 744 | } 745 | -------------------------------------------------------------------------------- /3_Fundamental_tensor_decompositions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Fundamental tensor decompositions.\n", 8 | "### Last modification (05.06.2018)\n", 9 | "\n", 10 | "In this tutorial we provide a theoretical backgound on the fundamental tensor decompositions of multidimensional arrays and show how these data algorithms can be used with [hottbox](https://github.com/hottbox/hottbox) through **CPD**, **HOSVD**, **HOOI** and **TTSVD** classes.\n", 11 | "\n", 12 | "More details on **CPD**, **HOSVD**, **HOOI** and **TTSVD** classes can be found on the [documentation page](https://hottbox.github.io/stable/api/hottbox.algorithms.decomposition).\n", 13 | "\n", 14 | "**Note:** this tutorial assumes that you are familiar with the basics of tensor algebra, tensor representaitons in different forms and the corresponding conventional notation. If you are new to these topics, check out our previous tutorials: [tutorial_1](https://github.com/hottbox/hottbox-tutorials/blob/master/1_N-dimensional_arrays_and_Tensor_class.ipynb) and [tutorial_2](https://github.com/hottbox/hottbox-tutorials/blob/master/2_Efficient_representations_of_tensors.ipynb).\n", 15 | "\n", 16 | "**Requirements:** ``hottbox==0.1.3``\n", 17 | "\n", 18 | "**Authors:** \n", 19 | "Ilya Kisil (ilyakisil@gmail.com); \n", 20 | "Giuseppe G. Calvi (ggc115@ic.ac.uk)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import numpy as np\n", 30 | "from hottbox.core import Tensor, residual_tensor\n", 31 | "from hottbox.algorithms.decomposition import TTSVD, HOSVD, HOOI, CPD\n", 32 | "from hottbox.metrics import residual_rel_error" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "# Tensor decompositions and their API\n", 40 | "\n", 41 | "In [previous tutorial](https://github.com/hottbox/hottbox-tutorials/blob/master/2_Efficient_representations_of_tensors.ipynb), we have introduced various efficient representations of the multi-dimensional arrays (tensors) and how they can be created using the **`hottbox`** API. Here were show how these representations can obtained for a given tensor.\n", 42 | "\n", 43 | "For these purposes, the following algorithms have been implemented in **``hottbox>=0.1.2``**:\n", 44 | "\n", 45 | "- CPD: produces instance of **TensorCPD** class\n", 46 | "- HOSVD: produces instance of **TensorTKD** class\n", 47 | "- HOOI: produces instance of **TensorTKD** class\n", 48 | "- TTSVD: produces instance of **TensorTT** class\n", 49 | "\n", 50 | "By analogy with the computation algorithms in **`sklearn`**, you first need to create an instance of this algorithm. Then you use its method **`decompose`** in order to obtain an efficient representation of the original tensor. See [tutorial_2](https://github.com/hottbox/hottbox-tutorials/blob/master/2_Efficient_representations_of_tensors.ipynb) for more information on various efficient resentations of multi-dimensional arrays. For simplicity and ease of visualisation, the following matrial is provided for the tensors of order $3$, but can be easily generalised to a case of $N$-th order.\n", 51 | "\n", 52 | "In all computational examples below we will decompose the same 3-D array with randomly generated values, while all algorithms will be initialised with default parameters." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 2, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "This tensor is of order 3 and consists of 210 elements.\n", 65 | "Sizes and names of its modes are (5, 6, 7) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n" 66 | ] 67 | } 68 | ], 69 | "source": [ 70 | "np.random.seed(0)\n", 71 | "I, J, K = 5, 6, 7\n", 72 | "\n", 73 | "array_3d = np.random.rand(I * J * K).reshape((I, J, K)).astype(np.float)\n", 74 | "\n", 75 | "tensor = Tensor(array_3d)\n", 76 | "print(tensor)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "# Canonical Polyadic Decomposition (CPD)\n", 84 | "![tensorcpd](./images/TensorCPD.png)\n", 85 | "## Theoretical background\n", 86 | "\n", 87 | "The **Canonical Polyadic Decomposition (CPD)** (also referred to as PARAFAC or CANDECOMP) is an algorithms that factorizes an $3$-rd order tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I \\times J \\times K}$ into a linear combination of terms $\\mathbf{\\underline{X}}_r = \\mathbf{a}_r \\circ \\mathbf{b}_r \\circ \\mathbf{c}_r$, which are rank-$1$ tensors. In other words the tensor $\\mathbf{\\underline{X}}$ is decomposed as\n", 88 | "\n", 89 | "$$\n", 90 | "\\begin{equation}\n", 91 | "\\begin{aligned}\n", 92 | "\\mathbf{\\underline{X}} & \\simeq \\sum_{r=1}^{R} \\lambda_r \\mathbf{a}_r \\circ \\mathbf{b}_r \\circ \\mathbf{c}_r\\\\\n", 93 | "& = \\mathbf{\\underline{\\Lambda}} \\times_1 \\mathbf{A} \\times_2 \\mathbf{B} \\times_3 \\mathbf{C}\\\\\n", 94 | "& = \\Big[ \\mathbf{\\underline{\\Lambda}} ; \\mathbf{A}, \\mathbf{B}, \\mathbf{C} \\Big]\n", 95 | "\\end{aligned}\n", 96 | "\\end{equation}\n", 97 | "$$\n", 98 | "\n", 99 | "where \n", 100 | "\n", 101 | "- $\\mathbf{\\underline{\\Lambda}}$ is an $3$-rd order core tensor having $\\lambda_r$ as entries in positions $\\mathbf{\\underline{\\Lambda}}[i, j, k]$, where $i = j = k$, and zeroes elsewhere\n", 102 | "\n", 103 | "- $\\mathbf{A}, \\mathbf{B}, \\mathbf{C}$ are factor matrix obtained as the concatenation of the corresponding factor vectors, i.e $ \\mathbf{A} = \\Big[ \\mathbf{a}_1 \\mathbf{a}_2 \\cdots \\mathbf{a}_R \\Big] $ \n", 104 | "\n", 105 | "Assuming the kruskal rank is fixed, there are many algorithms to compute a CPD. The most popular aproach is via the alternating least squares (ALS) method. The goal is to find such CP represenation $[ \\mathbf{\\underline{\\Lambda}} ; \\mathbf{A}, \\mathbf{B}, \\mathbf{C} ]$ which provides the best approximation of the original tensor $\\mathbf{\\underline{X}}$:\n", 106 | "\n", 107 | "$$\n", 108 | "\\text{min} \\| \\mathbf{\\underline{X}} - [ \\mathbf{\\underline{\\Lambda}} ; \\mathbf{A}, \\mathbf{B}, \\mathbf{C} ] \\|^2_F\n", 109 | "$$\n", 110 | "\n", 111 | "The alternating least squares approach fixes $\\mathbf{B}$ and $\\mathbf{C}$ to solve for $\\mathbf{A}$, then fixes $\\mathbf{A}$ and $\\mathbf{C}$ to solve for $\\mathbf{B}$, then fixes $\\mathbf{A}$ and $\\mathbf{B}$ to solve for $\\mathbf{C}$, and continues to repeat the\n", 112 | "entire procedure until some convergence criterion is satisfied.\n" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## CPD class in hottbox\n", 120 | "\n", 121 | "In **`hottbox`**, the CPD-ALS algorithm is implemented by the **`CPD`** class. Despite of the parameters used to initialise this algorithm, it outputs an instance of **`TensorCPD`** class after each call of the **`decompose`** method. This method takes an object of **`Tensor`** class and desired value of kruskal rank passed as a tuple of length 1. \n", 122 | "\n", 123 | "**Note:** the Kruskal rank is passed as a tuple so to keep the same format with other algorithms for tensor decompositions." 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 3, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/plain": [ 134 | "CPD(epsilon=0.01, init='svd', max_iter=50, random_state=None, tol=0.0001,\n", 135 | " verbose=False)" 136 | ] 137 | }, 138 | "execution_count": 3, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "alg = CPD()\n", 145 | "alg" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 4, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "name": "stdout", 155 | "output_type": "stream", 156 | "text": [ 157 | "\tOutput of the CPD algorithm:\n", 158 | "Kruskal representation of a tensor with rank=(5,).\n", 159 | "Factor matrices represent properties: ['mode-0', 'mode-1', 'mode-2']\n", 160 | "With corresponding latent components described by (5, 6, 7) features respectively.\n", 161 | "\n", 162 | "\tFactor matrices\n", 163 | "Mode-0 factor matrix is of shape (5, 5)\n", 164 | "Mode-1 factor matrix is of shape (6, 5)\n", 165 | "Mode-2 factor matrix is of shape (7, 5)\n", 166 | "\n", 167 | "\tCore tensor\n", 168 | "This tensor is of order 3 and consists of 125 elements.\n", 169 | "Sizes and names of its modes are (5, 5, 5) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "kruskal_rank = (5,)\n", 175 | "\n", 176 | "tensor_cpd = alg.decompose(tensor, rank=kruskal_rank)\n", 177 | "print(\"\\tOutput of the {} algorithm:\".format(alg.name))\n", 178 | "print(tensor_cpd)\n", 179 | "\n", 180 | "print('\\n\\tFactor matrices')\n", 181 | "for mode, fmat in enumerate(tensor_cpd.fmat):\n", 182 | " print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))\n", 183 | " \n", 184 | "print('\\n\\tCore tensor')\n", 185 | "print(tensor_cpd.core)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "As we can see, the produced object of the **`TensorCPD`** class also contains general information about the underlying tensor, such as its shape, order etc, which can be accessed through the corresponding properties" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 5, 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "name": "stdout", 202 | "output_type": "stream", 203 | "text": [ 204 | "The shape of the underlying tensor is (5, 6, 7)\n", 205 | "The order of the underlying tensor is 3\n" 206 | ] 207 | } 208 | ], 209 | "source": [ 210 | "full_shape = tensor_cpd.ft_shape\n", 211 | "order = tensor_cpd.order\n", 212 | "print('The shape of the underlying tensor is {}'.format(full_shape))\n", 213 | "print('The order of the underlying tensor is {}'.format(order))" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "# Tucker Decomposition\n", 221 | "\n", 222 | "![tensortkd](./images/TensorTKD.png)\n", 223 | "\n", 224 | "**Tucker Decomposition** represents a given tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I \\times J \\times K}$ if the form of a dense core tensor $\\mathbf{\\underline{G}}$ with multi-linear rank $(Q, R, P)$ and a set of\n", 225 | "factor matrices $\\mathbf{A} \\in \\mathbb{R}^{I \\times Q}, \\mathbf{B} \\in \\mathbb{R}^{J \\times R}$ and $\\mathbf{C} \\in\n", 226 | "\\mathbb{R}^{K \\times P}$ as illustrated above. In other words, the tensor $\\mathbf{\\underline{X}}$ can represented in tucker form as\n", 227 | "\n", 228 | "$$\n", 229 | "\\begin{equation}\n", 230 | "\\begin{aligned}\n", 231 | "\\mathbf{\\underline{X}} & \\simeq \\sum_{q=1}^{Q} \\sum_{r=1}^{R} \\sum_{p=1}^{P} g_{qrp} \\mathbf{a}_q \\circ \\mathbf{b}_r \\circ \\mathbf{c}_p\\\\\n", 232 | "& = \\mathbf{\\underline{G}} \\times_1 \\mathbf{A} \\times_2 \\mathbf{B} \\times_3 \\mathbf{C}\\\\\n", 233 | "& = \\Big[ \\mathbf{\\underline{G}} ; \\mathbf{A}, \\mathbf{B}, \\mathbf{C} \\Big]\n", 234 | "\\end{aligned}\n", 235 | "\\end{equation}\n", 236 | "$$\n", 237 | "\n", 238 | "On practice, there exist several algorithms to represent a given tensor in the Tucker format. The two most used ones are Higher Order Singular Value Decomposition (HOSVD), and Higher Order Orthogonal Iteration (HOOI), which are implemented through the **`HOSVD`** and **`HOOI`** classes respectively." 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "## Higher Order Singular Value Decomposition (HOSVD)\n", 246 | "\n", 247 | "Consider an $3$-rd order tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I \\times J \\times K}$, decomposed in the Tucker format as\n", 248 | "\n", 249 | "$$\n", 250 | "\\mathbf{\\underline{X}} = \\mathbf{\\underline{G}} \\times_1 \\mathbf{A} \\times_2 \\mathbf{B} \\times_3 \\mathbf{C}\n", 251 | "$$\n", 252 | "\n", 253 | "The HOSVD is a special case of the Tucker decomposition, in which all the factor matrices are constrained to be orthogonal. They are computed as truncated version of the left singular matrices of all possible mode-$n$ unfoldings of tensor $\\mathbf{\\underline{X}}$:\n", 254 | "\n", 255 | "$$\n", 256 | "\\begin{aligned}\n", 257 | "\\mathbf{X}_{(1)} &= \\mathbf{U}_1 \\mathbf{\\Sigma}_1 \\mathbf{V}_1^T \\quad \\rightarrow \\quad \\mathbf{A} = \\mathbf{U}_1[1:R_1]\\\\\n", 258 | "\\mathbf{X}_{(2)} &= \\mathbf{U}_2 \\mathbf{\\Sigma}_2 \\mathbf{V}_2^T \\quad \\rightarrow \\quad \\mathbf{B} = \\mathbf{U}_2[1:R_2] \\\\\n", 259 | "\\mathbf{X}_{(3)} &= \\mathbf{U}_3 \\mathbf{\\Sigma}_3 \\mathbf{V}_3^T \\quad \\rightarrow \\quad \\mathbf{C} = \\mathbf{U}_3[1:R_3] \\\\\n", 260 | "\\end{aligned}\n", 261 | "$$\n", 262 | "\n", 263 | "For a general order-$N$ tensor, the $N$-tuple $(R_1, \\ldots, R_N)$ is called the **multi-linear rank** and provides flexibility in compression and approximation of the original tensor. For our order-$3$ tensor in the multilinear rank is therefore $(R_1, R_2, R_3)$. After factor matrices are obtained, the core tensor $\\mathbf{\\underline{G}}$ is computed as\n", 264 | "$$\n", 265 | "\\mathbf{\\underline{G}} = \\mathbf{\\underline{X}} \\times_1 \\mathbf{A}^T \\times_2 \\mathbf{B}^T \\times_3 \\mathbf{C}^T \n", 266 | "$$" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "## HOSVD class in hottbox\n", 274 | "\n", 275 | "In **`hottbox`**, the HOSVD algorithm is implemented by the **`HOSVD`** class. Despite of the parameters used to initialise this algorithm, it outputs an instance of **`TensorTKD`** class after each call of the **`decompose`** method. This method takes an object of **`Tensor`** class and desired values of multi-linear rank passed as a tuple. " 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 6, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "data": { 285 | "text/plain": [ 286 | "HOSVD(process=(), verbose=False)" 287 | ] 288 | }, 289 | "execution_count": 6, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "alg = HOSVD()\n", 296 | "alg" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 7, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "name": "stdout", 306 | "output_type": "stream", 307 | "text": [ 308 | "\tOutput of the HOSVD algorithm:\n", 309 | "Tucker representation of a tensor with multi-linear rank=(4, 5, 6).\n", 310 | "Factor matrices represent properties: ['mode-0', 'mode-1', 'mode-2']\n", 311 | "With corresponding latent components described by (5, 6, 7) features respectively.\n", 312 | "\n", 313 | "\tFactor matrices\n", 314 | "Mode-0 factor matrix is of shape (5, 4)\n", 315 | "Mode-1 factor matrix is of shape (6, 5)\n", 316 | "Mode-2 factor matrix is of shape (7, 6)\n", 317 | "\n", 318 | "\tCore tensor\n", 319 | "This tensor is of order 3 and consists of 120 elements.\n", 320 | "Sizes and names of its modes are (4, 5, 6) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n" 321 | ] 322 | } 323 | ], 324 | "source": [ 325 | "ml_rank = (4, 5, 6)\n", 326 | "tensor_tkd_hosvd = alg.decompose(tensor, ml_rank)\n", 327 | "print(\"\\tOutput of the {} algorithm:\".format(alg.name))\n", 328 | "print(tensor_tkd_hosvd)\n", 329 | "\n", 330 | "print('\\n\\tFactor matrices')\n", 331 | "for mode, fmat in enumerate(tensor_tkd_hosvd.fmat):\n", 332 | " print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))\n", 333 | " \n", 334 | "print('\\n\\tCore tensor')\n", 335 | "print(tensor_tkd_hosvd.core)" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "As we can see, the produced object of the **`TensorTKD`** class also contains general information about the underlying tensor, such as its shape, order etc, which can be accessed through the corresponding properties" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 8, 348 | "metadata": {}, 349 | "outputs": [ 350 | { 351 | "name": "stdout", 352 | "output_type": "stream", 353 | "text": [ 354 | "The shape of the underlying tensor is (5, 6, 7)\n", 355 | "The order of the underlying tensor is 3\n" 356 | ] 357 | } 358 | ], 359 | "source": [ 360 | "full_shape = tensor_tkd_hosvd.ft_shape\n", 361 | "order = tensor_tkd_hosvd.order\n", 362 | "print('The shape of the underlying tensor is {}'.format(full_shape))\n", 363 | "print('The order of the underlying tensor is {}'.format(order))" 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": {}, 369 | "source": [ 370 | "## Higher Order Orthogonal Iteration (HOOI)" 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "HOOI algorithm is another special case of the Tuker decomposition. Like HOSVD, it decomposes a tensor into a dense core tensor and orthogonal factor matrices. The difference between the two lies in the fact that in HOOI the factor matrices are optimized iteratively using an Alternating Least Squares (ALS) approach. (In practice HOSVD is usually used within HOOI to initialize the factor matrices). In other words, the tucker representation $[ \\mathbf{\\underline{G}};\\mathbf{A}^{(1)}, \\mathbf{A}^{(2)}, \\cdots,\\mathbf{A}^{(N)} ]$ of the given tensor $\\mathbf{\\underline{X}}$ is obtained through the HOOI as follows\n", 378 | "\n", 379 | "$$\n", 380 | "\\begin{aligned}\n", 381 | "&\\mathbf{\\underline{Y}} = \\mathbf{\\underline{X}} \\times_1 \\mathbf{A}^{(1)T} \\times_2 \\cdots \\times_{n-1} \\mathbf{A}^{(n-1)T} \\times_{n+1} \\mathbf{A}^{(n+1)} \\times \\cdots \\times_N \\mathbf{A}^{(N)} \\\\\n", 382 | "&\\mathbf{A}^{(n)} \\leftarrow R_n \\text{ leftmost singular vectors of } \\mathbf{Y}_{(n)}\n", 383 | "\\end{aligned}\n", 384 | "$$\n", 385 | "\n", 386 | "The above is repeated until convergence, then the core tensor $\\mathbf{\\underline{G}} \\in \\mathbb{R}^{R_1 \\times R_2 \\times \\cdots \\times R_N}$ is computed as\n", 387 | "\n", 388 | "$$\n", 389 | "\\mathbf{\\underline{G}} = \\mathbf{\\underline{X}} \\times_1 \\mathbf{A}^{(1)T} \\times_2 \\mathbf{A}^{(2)T} \\times_3 \\cdots \\times_N \\mathbf{A}^{(N)T}\n", 390 | "$$" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "## HOOI class in hottbox\n", 398 | "\n", 399 | "In **`hottbox`**, the HOOI algorithm is implemented by the **`HOOI`** class. Despite of the parameters used to initialise this algorithm, it outputs an instance of **`TensorTKD`** class after each call of the **`decompose`** method. This method takes an object of **`Tensor`** class and desired values of multi-linear rank passed as a tuple. " 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 9, 405 | "metadata": {}, 406 | "outputs": [ 407 | { 408 | "data": { 409 | "text/plain": [ 410 | "HOOI(epsilon=0.01, init='hosvd', max_iter=50, process=(),\n", 411 | " random_state=None, tol=0.0001, verbose=False)" 412 | ] 413 | }, 414 | "execution_count": 9, 415 | "metadata": {}, 416 | "output_type": "execute_result" 417 | } 418 | ], 419 | "source": [ 420 | "alg = HOOI()\n", 421 | "alg" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 10, 427 | "metadata": {}, 428 | "outputs": [ 429 | { 430 | "name": "stdout", 431 | "output_type": "stream", 432 | "text": [ 433 | "\tOutput of the HOOI algorithm:\n", 434 | "Tucker representation of a tensor with multi-linear rank=(4, 5, 6).\n", 435 | "Factor matrices represent properties: ['mode-0', 'mode-1', 'mode-2']\n", 436 | "With corresponding latent components described by (5, 6, 7) features respectively.\n", 437 | "\n", 438 | "\tFactor matrices\n", 439 | "Mode-0 factor matrix is of shape (5, 4)\n", 440 | "Mode-1 factor matrix is of shape (6, 5)\n", 441 | "Mode-2 factor matrix is of shape (7, 6)\n", 442 | "\n", 443 | "\tCore tensor\n", 444 | "This tensor is of order 3 and consists of 120 elements.\n", 445 | "Sizes and names of its modes are (4, 5, 6) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n" 446 | ] 447 | } 448 | ], 449 | "source": [ 450 | "ml_rank = (4, 5, 6)\n", 451 | "tensor_tkd_hooi = alg.decompose(tensor, ml_rank)\n", 452 | "print(\"\\tOutput of the {} algorithm:\".format(alg.name))\n", 453 | "print(tensor_tkd_hooi)\n", 454 | "\n", 455 | "print('\\n\\tFactor matrices')\n", 456 | "for mode, fmat in enumerate(tensor_tkd_hooi.fmat):\n", 457 | " print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))\n", 458 | " \n", 459 | "print('\\n\\tCore tensor')\n", 460 | "print(tensor_tkd_hooi.core)" 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "metadata": {}, 466 | "source": [ 467 | "As we can see, the produced object of the **`TensorTKD`** class also contains general information about the underlying tensor, such as its shape, order etc, which can be accessed through the corresponding properties" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 11, 473 | "metadata": {}, 474 | "outputs": [ 475 | { 476 | "name": "stdout", 477 | "output_type": "stream", 478 | "text": [ 479 | "The shape of the underlying tensor is (5, 6, 7)\n", 480 | "The order of the underlying tensor is 3\n" 481 | ] 482 | } 483 | ], 484 | "source": [ 485 | "full_shape = tensor_tkd_hooi.ft_shape\n", 486 | "order = tensor_tkd_hooi.order\n", 487 | "print('The shape of the underlying tensor is {}'.format(full_shape))\n", 488 | "print('The order of the underlying tensor is {}'.format(order))" 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": {}, 494 | "source": [ 495 | "# Tensor Train Decomposition via SVD\n", 496 | "\n", 497 | "![tensortt](./images/TensorTT.png)" 498 | ] 499 | }, 500 | { 501 | "cell_type": "markdown", 502 | "metadata": {}, 503 | "source": [ 504 | "## Theoretical background\n", 505 | "\n", 506 | "**Tensor train decomposition** represents a given tensor a set of sparsely interconnected lower-order tensors and factor matrices. Mathematically speaking, the obtained TT representation of an $N$-th order tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I_1 \\times I_2 \\times \\cdots \\times I_N}$ can be expressed as a TT as\n", 507 | "\n", 508 | "$$\n", 509 | "\\begin{aligned}\n", 510 | "\\mathbf{\\underline{X}}\n", 511 | "&= \\Big[ \\mathbf{A}, \\mathbf{\\underline{G}}^{(1)}, \\mathbf{\\underline{G}}^{(2)}, \\cdots, \\mathbf{\\underline{G}}^{(N-1)}, \\mathbf{B} \\Big]\\\\\n", 512 | "&= \\mathbf{A} \\times^1_2 \\mathbf{\\underline{G}}^{(1)} \\times^1_3 \\mathbf{\\underline{G}}^{(2)} \\times^1_3 \\cdots \\times^1_3 \\mathbf{\\underline{G}}^{(N-1)} \\times^1_3 \\mathbf{B} \n", 513 | "\\end{aligned}\n", 514 | "$$\n", 515 | "\n", 516 | "Each element of a TT is generally referred to as **tt-core** with sizesof its dimensions: $\\mathbf{A} \\in \\mathbb{R}^{I_1 \\times R_1}$, $\\mathbf{B} \\in \\mathbb{R}^{R_{N-1}\\times I_N}$, $\\mathbf{\\underline{G}}^{(n)} \\in \\mathbb{R}^{R_n \\times I_{n+1} \\times R_{n+1}}$\n", 517 | "\n", 518 | "\n", 519 | "The TTSVD algorithm involves iteratively performing a series of foldings and unfoldings on an original tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I_1 \\times I_2 \\times \\cdots \\times I_N}$ in conjunction with SVD. At every iteration a core $\\mathbf{\\underline{G}}^{(n)} \\in \\mathbb{R}^{R_n \\times I_{n+1} \\times R_{n+1}}$ is computed, where the TT-rank $(R_1, R_2, \\dots, R_N)$ has been specified a priori. " 520 | ] 521 | }, 522 | { 523 | "cell_type": "markdown", 524 | "metadata": {}, 525 | "source": [ 526 | "## TTSVD class in hottbox\n", 527 | "\n", 528 | "In **`hottbox`**, the TTSVD algorithm is implemented by the **`TTSVD`** class. Despite of the parameters used to initialise this algorithm, it outputs an instance of **`TensorTT`** class after each call of the **`decompose`** method. This method takes an object of **`Tensor`** class and desired values of tt-rank passed as a tuple. " 529 | ] 530 | }, 531 | { 532 | "cell_type": "code", 533 | "execution_count": 12, 534 | "metadata": {}, 535 | "outputs": [ 536 | { 537 | "data": { 538 | "text/plain": [ 539 | "TTSVD(verbose=False)" 540 | ] 541 | }, 542 | "execution_count": 12, 543 | "metadata": {}, 544 | "output_type": "execute_result" 545 | } 546 | ], 547 | "source": [ 548 | "alg = TTSVD()\n", 549 | "alg" 550 | ] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "execution_count": 13, 555 | "metadata": {}, 556 | "outputs": [ 557 | { 558 | "name": "stdout", 559 | "output_type": "stream", 560 | "text": [ 561 | "\tOutput of the TTSVD algorithm:\n", 562 | "Tensor train representation of a tensor with tt-rank=(2, 3).\n", 563 | "Shape of this representation in the full format is (5, 6, 7).\n", 564 | "Physical modes of its cores represent properties: ['mode-0', 'mode-1', 'mode-2']\n", 565 | "\n", 566 | "\tTT-Core #0\n", 567 | "This tensor is of order 2 and consists of 10 elements.\n", 568 | "Sizes and names of its modes are (5, 2) and ['mode-0', 'mode-1'] respectively.\n", 569 | "\n", 570 | "\tTT-Core #1\n", 571 | "This tensor is of order 3 and consists of 36 elements.\n", 572 | "Sizes and names of its modes are (2, 6, 3) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n", 573 | "\n", 574 | "\tTT-Core #2\n", 575 | "This tensor is of order 2 and consists of 21 elements.\n", 576 | "Sizes and names of its modes are (3, 7) and ['mode-0', 'mode-1'] respectively.\n" 577 | ] 578 | } 579 | ], 580 | "source": [ 581 | "tt_rank = (2,3)\n", 582 | "\n", 583 | "tensor_tt = alg.decompose(tensor, tt_rank)\n", 584 | "print(\"\\tOutput of the {} algorithm:\".format(alg.name))\n", 585 | "print(tensor_tt)\n", 586 | "\n", 587 | "for i, core in enumerate(tensor_tt.cores):\n", 588 | " print('\\n\\tTT-Core #{}'.format(i))\n", 589 | " print(core)" 590 | ] 591 | }, 592 | { 593 | "cell_type": "markdown", 594 | "metadata": {}, 595 | "source": [ 596 | "As we can see, the produced object of the **`TensorTT`** class also contains general information about the underlying tensor, such as its shape, order etc, which can be accessed through the corresponding properties" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 14, 602 | "metadata": {}, 603 | "outputs": [ 604 | { 605 | "name": "stdout", 606 | "output_type": "stream", 607 | "text": [ 608 | "The shape of the underlying tensor is (5, 6, 7)\n", 609 | "The order of the underlying tensor is 3\n" 610 | ] 611 | } 612 | ], 613 | "source": [ 614 | "full_shape = tensor_tt.ft_shape\n", 615 | "order = tensor_tt.order\n", 616 | "print('The shape of the underlying tensor is {}'.format(full_shape))\n", 617 | "print('The order of the underlying tensor is {}'.format(order))" 618 | ] 619 | }, 620 | { 621 | "cell_type": "markdown", 622 | "metadata": {}, 623 | "source": [ 624 | "# Evaluating results of tensor decompositions\n", 625 | "\n", 626 | "For each result of the tensor decomposition we can compute a residual tensor and calculate relative error of approximation:\n", 627 | "```python\n", 628 | " tensor_res = residual_tensor(tensor, tensor_cpd)\n", 629 | " rel_error = tensor_res.frob_norm / tensor.frob_norm \n", 630 | "```\n", 631 | "Or can do it in one line:\n", 632 | "```python\n", 633 | " rel_error = residual_rel_error(tensor, tensor_cpd)\n", 634 | "```" 635 | ] 636 | }, 637 | { 638 | "cell_type": "code", 639 | "execution_count": 15, 640 | "metadata": {}, 641 | "outputs": [ 642 | { 643 | "name": "stdout", 644 | "output_type": "stream", 645 | "text": [ 646 | "\tResidual tensor\n", 647 | "This tensor is of order 3 and consists of 210 elements.\n", 648 | "Sizes and names of its modes are (5, 6, 7) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n" 649 | ] 650 | } 651 | ], 652 | "source": [ 653 | "tensor_cpd_res = residual_tensor(tensor, tensor_cpd)\n", 654 | "print('\\tResidual tensor')\n", 655 | "print(tensor_cpd_res)" 656 | ] 657 | }, 658 | { 659 | "cell_type": "code", 660 | "execution_count": 16, 661 | "metadata": {}, 662 | "outputs": [ 663 | { 664 | "name": "stdout", 665 | "output_type": "stream", 666 | "text": [ 667 | "Relative error of CPD approximation = 0.31\n", 668 | "Relative error of CPD approximation = 0.31\n" 669 | ] 670 | } 671 | ], 672 | "source": [ 673 | "rel_error = tensor_cpd_res.frob_norm / tensor.frob_norm \n", 674 | "print('Relative error of CPD approximation = {:.2f}'.format(rel_error))\n", 675 | "\n", 676 | "rel_error = residual_rel_error(tensor, tensor_cpd)\n", 677 | "print('Relative error of CPD approximation = {:.2f}'.format(rel_error))" 678 | ] 679 | }, 680 | { 681 | "cell_type": "code", 682 | "execution_count": 17, 683 | "metadata": {}, 684 | "outputs": [ 685 | { 686 | "name": "stdout", 687 | "output_type": "stream", 688 | "text": [ 689 | "Relative error of HOSVD approximation = 0.21\n" 690 | ] 691 | } 692 | ], 693 | "source": [ 694 | "rel_error = residual_rel_error(tensor, tensor_tkd_hosvd)\n", 695 | "print('Relative error of HOSVD approximation = {:.2f}'.format(rel_error))" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": 18, 701 | "metadata": {}, 702 | "outputs": [ 703 | { 704 | "name": "stdout", 705 | "output_type": "stream", 706 | "text": [ 707 | "Relative error of HOOI approximation = 0.21\n" 708 | ] 709 | } 710 | ], 711 | "source": [ 712 | "rel_error = residual_rel_error(tensor, tensor_tkd_hooi)\n", 713 | "print('Relative error of HOOI approximation = {:.2f}'.format(rel_error))" 714 | ] 715 | }, 716 | { 717 | "cell_type": "code", 718 | "execution_count": 19, 719 | "metadata": {}, 720 | "outputs": [ 721 | { 722 | "name": "stdout", 723 | "output_type": "stream", 724 | "text": [ 725 | "Relative error of TT approximation = 0.39\n" 726 | ] 727 | } 728 | ], 729 | "source": [ 730 | "rel_error = residual_rel_error(tensor, tensor_tt)\n", 731 | "print('Relative error of TT approximation = {:.2f}'.format(rel_error))" 732 | ] 733 | }, 734 | { 735 | "cell_type": "markdown", 736 | "metadata": {}, 737 | "source": [ 738 | "# Further reading list\n", 739 | "- Tamara G. Kolda and Brett W. Bader, \"Tensor decompositions and applications.\" SIAM REVIEW, 51(3):455–500, 2009.\n", 740 | "\n", 741 | "- Lieven De Lathauwer, Bart De Moor, and Joos Vandewalle, \"A multilinear singular value decomposition.\" SIAM journal on Matrix Analysis and Applications 21.4 (2000): 1253-1278.\n", 742 | "\n", 743 | "- Ivan V. Oseledets, \"Tensor-train decomposition.\" SIAM Journal on Scientific Computing 33.5 (2011): 2295-2317." 744 | ] 745 | } 746 | ], 747 | "metadata": { 748 | "kernelspec": { 749 | "display_name": "hottbox-tutorials", 750 | "language": "python", 751 | "name": "hottbox-tutorials" 752 | }, 753 | "language_info": { 754 | "codemirror_mode": { 755 | "name": "ipython", 756 | "version": 3 757 | }, 758 | "file_extension": ".py", 759 | "mimetype": "text/x-python", 760 | "name": "python", 761 | "nbconvert_exporter": "python", 762 | "pygments_lexer": "ipython3", 763 | "version": "3.6.6" 764 | } 765 | }, 766 | "nbformat": 4, 767 | "nbformat_minor": 2 768 | } 769 | -------------------------------------------------------------------------------- /4_Ecosystem_of_Tensor_class.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ecosystem of a Tensor: N-dimensional arrays, their descriptions and meta infromation\n", 8 | "### Last modification (08.06.2018).\n", 9 | "\n", 10 | "\n", 11 | "**Note:** this tutorial assumes that you are familiar with the notion of N-dimensional arrays and their efficient representaitons. The related material can be found in out previous tutorials: [tutorial_1](https://github.com/hottbox/hottbox-tutorials/blob/master/1_N-dimensional_arrays_and_Tensor_class.ipynb) and [tutorial_2](https://github.com/hottbox/hottbox-tutorials/blob/master/2_Efficient_representations_of_tensors.ipynb).\n", 12 | "\n", 13 | "\n", 14 | "**Requirements:** ``hottbox==0.1.3``\n", 15 | "\n", 16 | "**Authors:** \n", 17 | "Ilya Kisil (ilyakisil@gmail.com); " 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import numpy as np\n", 27 | "from hottbox.core import Tensor" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "def show_meta_information(tensor, data=True, shapes=True, modes=True, state=True):\n", 37 | " \"\"\" Quick util for showing relevant information for this tutorial\n", 38 | " \n", 39 | " Parameters\n", 40 | " ----------\n", 41 | " tensor : Tensor\n", 42 | " data : bool\n", 43 | " If True, show data array\n", 44 | " shapes : bool\n", 45 | " If True, show current shape and normal shape\n", 46 | " modes : bool\n", 47 | " If True, show mode information\n", 48 | " state : bool \n", 49 | " If True, show state information\n", 50 | " \"\"\"\n", 51 | " print(tensor)\n", 52 | " \n", 53 | " if data:\n", 54 | " print(\"\\n\\tThe underlying data array is:\")\n", 55 | " print(tensor.data)\n", 56 | " \n", 57 | " if shapes:\n", 58 | " print(\"\\n\\tIs this tensor in normal state: {}\".format(tensor.in_normal_state))\n", 59 | " print(\"Current shape of the data array: {}\".format(tensor.shape))\n", 60 | " print(\"Normal shape of the data array: {}\".format(tensor.ft_shape))\n", 61 | " \n", 62 | " if modes:\n", 63 | " print(\"\\n\\tInformation about its modes:\")\n", 64 | " for i, tensor_mode in enumerate(tensor.modes):\n", 65 | " print(\"#{}: {}\".format(i, tensor_mode))\n", 66 | "\n", 67 | " if state:\n", 68 | " print(\"\\n\\tInformation about its current state:\") \n", 69 | " tensor.show_state()\n", 70 | " \n", 71 | "def print_sep_line():\n", 72 | " print(\"\\n===========================\"\n", 73 | " \"=============================\"\n", 74 | " \"===========================\\n\")" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "Recall tha the collected raw data in form of N-dimensional array represents different characteristics. Here are couple of examples:\n", 82 | "\n", 83 | "![different_tensors](./images/different-tensors.png)\n", 84 | "\n", 85 | "N-dimensional arrays of data can be represented in various different forms. By applying numerical methods (algorithms for tensor decompositions) to the raw data we can obtain, for example, Kruskal or Tucker representation. At the same time, simple data rearrangement procedures (e.g. folding, unfolding) of the raw data also yields different representation.\n", 86 | "\n", 87 | "![different_representations](./images/different-forms-of-data.png)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "Each dimension of an N-dimensional array is associated with a certain property, **mode**, of the raw data. At the same time, this characterisc is described by certain features. The relation between these properties defines **state** of this N-dimensional array. In other words, modes and state could be seen as the meta information about the tensor.\n", 95 | "\n", 96 | "**Mode** of the tensor is defined by name of the property it represents and features that describe this property.\n", 97 | "\n", 98 | "**State** of the tensor is defined by transformations applied to the data array. \n", 99 | "\n", 100 | "**Normal state** of the tensor is such state of the tensor when the underlying raw data array is in its original form. This means that it has not been folded, unfolded or rotated.\n", 101 | "\n", 102 | "Thus, the tensor is described by two different shapes: \n", 103 | "1. Shape of the data array in the current state of the tensor\n", 104 | "2. Normal shape (full shape) - shape of the data array in the normal state.\n", 105 | "\n", 106 | "Each transformation can be characterised by the mode order and type of reshaping. This information is enough in order to be able to revert applied transformation of the data array.\n", 107 | "\n", 108 | "Transformations such as folding or unfolding does not change the original properties of the underlying data array, but they change relashionship between these properties.\n", 109 | "\n", 110 | "![data_modes_state](./images/data-modes-state.png)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "By default, an object of **Tensor** class is created in **normal state** with generic mode names that describe properties of dimensions of data array." 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 3, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "This tensor is of order 3 and consists of 24 elements.\n", 130 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n", 131 | "\n", 132 | "\tThe underlying data array is:\n", 133 | "[[[ 0 1 2 3]\n", 134 | " [ 4 5 6 7]\n", 135 | " [ 8 9 10 11]]\n", 136 | "\n", 137 | " [[12 13 14 15]\n", 138 | " [16 17 18 19]\n", 139 | " [20 21 22 23]]]\n", 140 | "\n", 141 | "\tIs this tensor in normal state: True\n", 142 | "Current shape of the data array: (2, 3, 4)\n", 143 | "Normal shape of the data array: (2, 3, 4)\n", 144 | "\n", 145 | "\tInformation about its modes:\n", 146 | "#0: Mode(name='mode-0', index=None)\n", 147 | "#1: Mode(name='mode-1', index=None)\n", 148 | "#2: Mode(name='mode-2', index=None)\n", 149 | "\n", 150 | "\tInformation about its current state:\n", 151 | "State(normal_shape=(2, 3, 4), rtype='Init', mode_order=([0], [1], [2]))\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "data_array = np.arange(24).reshape(2, 3, 4)\n", 157 | "\n", 158 | "tensor = Tensor(data_array)\n", 159 | "\n", 160 | "show_meta_information(tensor)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "## Meta information after applying data transformations\n", 168 | "\n", 169 | "Next, we will show changes in the meta information of the tensor when different transformations are applied to it. \n", 170 | "\n", 171 | "**Note:** at the moment, only one data transformation can be applied at the time. This will be generalised in a future releases of **hottbox** and will be outlined in the [CHANGELOG](https://github.com/hottbox/hottbox/blob/master/CHANGELOG.md)." 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "### Unfolding of the data" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 4, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "name": "stdout", 188 | "output_type": "stream", 189 | "text": [ 190 | "This tensor is of order 2 and consists of 24 elements.\n", 191 | "Sizes and names of its modes are (3, 8) and ['mode-1', 'mode-0_mode-2'] respectively.\n", 192 | "\n", 193 | "\tThe underlying data array is:\n", 194 | "[[ 0 1 2 3 12 13 14 15]\n", 195 | " [ 4 5 6 7 16 17 18 19]\n", 196 | " [ 8 9 10 11 20 21 22 23]]\n", 197 | "\n", 198 | "\tIs this tensor in normal state: False\n", 199 | "Current shape of the data array: (3, 8)\n", 200 | "Normal shape of the data array: (2, 3, 4)\n", 201 | "\n", 202 | "\tInformation about its modes:\n", 203 | "#0: Mode(name='mode-0', index=None)\n", 204 | "#1: Mode(name='mode-1', index=None)\n", 205 | "#2: Mode(name='mode-2', index=None)\n", 206 | "\n", 207 | "\tInformation about its current state:\n", 208 | "State(normal_shape=(2, 3, 4), rtype='T', mode_order=([1], [0, 2]))\n" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "tensor.unfold(mode=1)\n", 214 | "\n", 215 | "show_meta_information(tensor)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "### Folding of the data" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 5, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "name": "stdout", 232 | "output_type": "stream", 233 | "text": [ 234 | "This tensor is of order 3 and consists of 24 elements.\n", 235 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n", 236 | "\n", 237 | "\tThe underlying data array is:\n", 238 | "[[[ 0 1 2 3]\n", 239 | " [ 4 5 6 7]\n", 240 | " [ 8 9 10 11]]\n", 241 | "\n", 242 | " [[12 13 14 15]\n", 243 | " [16 17 18 19]\n", 244 | " [20 21 22 23]]]\n", 245 | "\n", 246 | "\tIs this tensor in normal state: True\n", 247 | "Current shape of the data array: (2, 3, 4)\n", 248 | "Normal shape of the data array: (2, 3, 4)\n", 249 | "\n", 250 | "\tInformation about its modes:\n", 251 | "#0: Mode(name='mode-0', index=None)\n", 252 | "#1: Mode(name='mode-1', index=None)\n", 253 | "#2: Mode(name='mode-2', index=None)\n", 254 | "\n", 255 | "\tInformation about its current state:\n", 256 | "State(normal_shape=(2, 3, 4), rtype='Init', mode_order=([0], [1], [2]))\n" 257 | ] 258 | } 259 | ], 260 | "source": [ 261 | "tensor.fold()\n", 262 | "\n", 263 | "show_meta_information(tensor)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "### Vectorisation of the data" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 6, 276 | "metadata": {}, 277 | "outputs": [ 278 | { 279 | "name": "stdout", 280 | "output_type": "stream", 281 | "text": [ 282 | "This tensor is of order 1 and consists of 24 elements.\n", 283 | "Sizes and names of its modes are (24,) and ['mode-0_mode-1_mode-2'] respectively.\n", 284 | "\n", 285 | "\tThe underlying data array is:\n", 286 | "[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]\n", 287 | "\n", 288 | "\tIs this tensor in normal state: False\n", 289 | "Current shape of the data array: (24,)\n", 290 | "Normal shape of the data array: (2, 3, 4)\n", 291 | "\n", 292 | "\tInformation about its modes:\n", 293 | "#0: Mode(name='mode-0', index=None)\n", 294 | "#1: Mode(name='mode-1', index=None)\n", 295 | "#2: Mode(name='mode-2', index=None)\n", 296 | "\n", 297 | "\tInformation about its current state:\n", 298 | "State(normal_shape=(2, 3, 4), rtype='T', mode_order=([0, 1, 2],))\n" 299 | ] 300 | } 301 | ], 302 | "source": [ 303 | "tensor.vectorise()\n", 304 | "\n", 305 | "show_meta_information(tensor)" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": {}, 311 | "source": [ 312 | "As wee can see, the applied transformations rearrange values of the underlying data array. Also they change relations between mode names and modifies state of the tensor. However, the normal shape, information about original modes remains the same." 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "## Different reshaping convensions\n", 320 | "\n", 321 | "In computing, row-major order and column-major order are methods for storing multidimensional arrays in linear storage such as random access memory. For example, for the array\n", 322 | "$$\n", 323 | "\\mathbf{A} = \n", 324 | "\\begin{bmatrix}\n", 325 | " a_{11} & a_{12} & a_{13}\\\\ \n", 326 | " a_{21} & a_{22} & a_{23} \n", 327 | "\\end{bmatrix}\n", 328 | "$$\n", 329 | "the two possible ways are:\n", 330 | "\n", 331 | "![data_ordering](./images/C_Fortran_ordering.png)\n", 332 | "\n", 333 | "Therefore, there are several conventions for reshaping (unfolding/folding/vectorising) data.\n", 334 | "Both of them are available in the **hottbox**. They produce arrays of the same shape, but with values being permuted. The state of the tensor memorises which convention has been applied and will use it for reverting the applied transformation." 335 | ] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "### Row and column major unfolding" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 7, 347 | "metadata": {}, 348 | "outputs": [ 349 | { 350 | "name": "stdout", 351 | "output_type": "stream", 352 | "text": [ 353 | "\tRow-major unfolding\n", 354 | "This tensor is of order 2 and consists of 24 elements.\n", 355 | "Sizes and names of its modes are (3, 8) and ['mode-1', 'mode-0_mode-2'] respectively.\n", 356 | "\n", 357 | "\tThe underlying data array is:\n", 358 | "[[ 0 1 2 3 12 13 14 15]\n", 359 | " [ 4 5 6 7 16 17 18 19]\n", 360 | " [ 8 9 10 11 20 21 22 23]]\n", 361 | "\n", 362 | "\tInformation about its current state:\n", 363 | "State(normal_shape=(2, 3, 4), rtype='T', mode_order=([1], [0, 2]))\n", 364 | "\n", 365 | "===================================================================================\n", 366 | "\n", 367 | "\tColumn-major unfolding\n", 368 | "This tensor is of order 2 and consists of 24 elements.\n", 369 | "Sizes and names of its modes are (3, 8) and ['mode-1', 'mode-0_mode-2'] respectively.\n", 370 | "\n", 371 | "\tThe underlying data array is:\n", 372 | "[[ 0 12 1 13 2 14 3 15]\n", 373 | " [ 4 16 5 17 6 18 7 19]\n", 374 | " [ 8 20 9 21 10 22 11 23]]\n", 375 | "\n", 376 | "\tInformation about its current state:\n", 377 | "State(normal_shape=(2, 3, 4), rtype='K', mode_order=([1], [0, 2]))\n" 378 | ] 379 | } 380 | ], 381 | "source": [ 382 | "data_array = np.arange(24).reshape(2, 3, 4)\n", 383 | "\n", 384 | "tensor_1 = Tensor(data_array)\n", 385 | "tensor_2 = Tensor(data_array)\n", 386 | "\n", 387 | "tensor_1.unfold(mode=1, rtype=\"T\")\n", 388 | "tensor_2.unfold(mode=1, rtype=\"K\")\n", 389 | "\n", 390 | "print(\"\\tRow-major unfolding\")\n", 391 | "show_meta_information(tensor_1, shapes=False, modes=False)\n", 392 | "\n", 393 | "print_sep_line()\n", 394 | "\n", 395 | "print(\"\\tColumn-major unfolding\")\n", 396 | "show_meta_information(tensor_2, shapes=False, modes=False)" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "### Row and column major folding" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 8, 409 | "metadata": {}, 410 | "outputs": [ 411 | { 412 | "name": "stdout", 413 | "output_type": "stream", 414 | "text": [ 415 | "\tReverting Row-major unfolding\n", 416 | "This tensor is of order 3 and consists of 24 elements.\n", 417 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n", 418 | "\n", 419 | "\tThe underlying data array is:\n", 420 | "[[[ 0 1 2 3]\n", 421 | " [ 4 5 6 7]\n", 422 | " [ 8 9 10 11]]\n", 423 | "\n", 424 | " [[12 13 14 15]\n", 425 | " [16 17 18 19]\n", 426 | " [20 21 22 23]]]\n", 427 | "\n", 428 | "\tInformation about its current state:\n", 429 | "State(normal_shape=(2, 3, 4), rtype='Init', mode_order=([0], [1], [2]))\n", 430 | "\n", 431 | "===================================================================================\n", 432 | "\n", 433 | "\tReverting Column-major unfolding\n", 434 | "This tensor is of order 3 and consists of 24 elements.\n", 435 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n", 436 | "\n", 437 | "\tThe underlying data array is:\n", 438 | "[[[ 0 1 2 3]\n", 439 | " [ 4 5 6 7]\n", 440 | " [ 8 9 10 11]]\n", 441 | "\n", 442 | " [[12 13 14 15]\n", 443 | " [16 17 18 19]\n", 444 | " [20 21 22 23]]]\n", 445 | "\n", 446 | "\tInformation about its current state:\n", 447 | "State(normal_shape=(2, 3, 4), rtype='Init', mode_order=([0], [1], [2]))\n" 448 | ] 449 | } 450 | ], 451 | "source": [ 452 | "tensor_1.fold()\n", 453 | "tensor_2.fold()\n", 454 | "print(\"\\tReverting Row-major unfolding\")\n", 455 | "show_meta_information(tensor_1, shapes=False, modes=False)\n", 456 | "\n", 457 | "print_sep_line()\n", 458 | "\n", 459 | "print(\"\\tReverting Column-major unfolding\")\n", 460 | "show_meta_information(tensor_2, shapes=False, modes=False)" 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "metadata": {}, 466 | "source": [ 467 | "As we can see, the different approaches to reshaping uderlying data affect only the data array itself, whereas other properties remain the same. Similarly to the ufolding along different mode, the **state** of the tensor keeps track of this transformation as well. \n", 468 | "\n", 469 | "**Note:** the same type of unfolding and folding should be applied to the data array, in order not to mix up the values that describe different properties of the tensor. But don't worry about it, since this is handled automatically under the hood." 470 | ] 471 | }, 472 | { 473 | "cell_type": "markdown", 474 | "metadata": {}, 475 | "source": [ 476 | "## Creating Tensor with custom meta information\n", 477 | "\n", 478 | "The **state** and list of **mode** are created at the initialisation of the **Tensor** object: \n", 479 | "1. **State** of the tensor is created. By default, this step assumes that data is passed in normal shape (was not folded or unfolded before).\n", 480 | "2. List of **modes** is created based on **state**. By default, it extracts from **state** the number of modes to be created and assigns default names to each of them.\n", 481 | "\n", 482 | "The **hottbox** provides flexibility for this procedure. The **Tensor** can be created with cutom names for the modes and in state that is not inferred (defined) from the provided data. \n", 483 | "\n", 484 | "If both customisation are passed to the **Tensor** constructor, the the list of mode names is dependent on the provided state. If only mode names are provided then its length should be consistent witht the number of dimensions of the data array.\n", 485 | "\n", 486 | "Defining a custom state is little bit more trickier, but there is nothing to be scared of. Because **state** and **modes** are crucial parts of **Tensor** ecosystem. Even though there is quit a bit of input validation involded, which will point you to the right direction in case something was not specified correctly, custom state should be specified with caution.\n", 487 | "\n", 488 | "**Note:** The usefullness of the custom mode names is not fully exploited in **hottbox** at the moment, but we work on that." 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": 9, 494 | "metadata": {}, 495 | "outputs": [], 496 | "source": [ 497 | "I, J, K = 2, 3, 4\n", 498 | "\n", 499 | "# Provied with 3D array\n", 500 | "data_3d = np.arange(I*J*K).reshape(I, J, K)\n", 501 | "\n", 502 | "# Provied with 3D array that had been unfoled\n", 503 | "data_2d = np.arange(I*J*K).reshape(I, (J*K))" 504 | ] 505 | }, 506 | { 507 | "cell_type": "markdown", 508 | "metadata": {}, 509 | "source": [ 510 | "### Custom mode names" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 10, 516 | "metadata": {}, 517 | "outputs": [ 518 | { 519 | "name": "stdout", 520 | "output_type": "stream", 521 | "text": [ 522 | "This tensor is of order 3 and consists of 24 elements.\n", 523 | "Sizes and names of its modes are (2, 3, 4) and ['Frequency', 'Time', 'Subject'] respectively.\n", 524 | "\n", 525 | "\tInformation about its modes:\n", 526 | "#0: Mode(name='Frequency', index=None)\n", 527 | "#1: Mode(name='Time', index=None)\n", 528 | "#2: Mode(name='Subject', index=None)\n" 529 | ] 530 | } 531 | ], 532 | "source": [ 533 | "tensor_1 = Tensor(data_3d, mode_names=[\"Frequency\", \"Time\", \"Subject\"])\n", 534 | "\n", 535 | "show_meta_information(tensor_1, data=False, shapes=False, state=False)" 536 | ] 537 | }, 538 | { 539 | "cell_type": "markdown", 540 | "metadata": {}, 541 | "source": [ 542 | "### Custom state: different mode order" 543 | ] 544 | }, 545 | { 546 | "cell_type": "code", 547 | "execution_count": 11, 548 | "metadata": {}, 549 | "outputs": [ 550 | { 551 | "name": "stdout", 552 | "output_type": "stream", 553 | "text": [ 554 | "This tensor is of order 2 and consists of 24 elements.\n", 555 | "Sizes and names of its modes are (2, 12) and ['mode-0', 'mode-1_mode-2'] respectively.\n", 556 | "\n", 557 | "\tThe underlying data array is:\n", 558 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n", 559 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n", 560 | "\n", 561 | "===================================================================================\n", 562 | "\n", 563 | "This tensor is of order 2 and consists of 24 elements.\n", 564 | "Sizes and names of its modes are (2, 12) and ['mode-1', 'mode-0_mode-2'] respectively.\n", 565 | "\n", 566 | "\tThe underlying data array is:\n", 567 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n", 568 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n" 569 | ] 570 | } 571 | ], 572 | "source": [ 573 | "custom_state_1 = dict(mode_order=([0], [1, 2]),\n", 574 | " normal_shape=(2, 3, 4),\n", 575 | " rtype=\"T\"\n", 576 | " )\n", 577 | "custom_state_2 = dict(mode_order=([1], [0, 2]),\n", 578 | " normal_shape=(2, 3, 4),\n", 579 | " rtype=\"T\"\n", 580 | " )\n", 581 | "\n", 582 | "tensor_1 = Tensor(data_2d, custom_state=custom_state_1)\n", 583 | "tensor_2 = Tensor(data_2d, custom_state=custom_state_2)\n", 584 | "\n", 585 | "show_meta_information(tensor_1, modes=False, shapes=False, state=False)\n", 586 | "\n", 587 | "print_sep_line()\n", 588 | "\n", 589 | "show_meta_information(tensor_2, modes=False, shapes=False, state=False)" 590 | ] 591 | }, 592 | { 593 | "cell_type": "code", 594 | "execution_count": 12, 595 | "metadata": {}, 596 | "outputs": [ 597 | { 598 | "name": "stdout", 599 | "output_type": "stream", 600 | "text": [ 601 | "This tensor is of order 3 and consists of 24 elements.\n", 602 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n", 603 | "\n", 604 | "\tThe underlying data array is:\n", 605 | "[[[ 0 1 2 3]\n", 606 | " [ 4 5 6 7]\n", 607 | " [ 8 9 10 11]]\n", 608 | "\n", 609 | " [[12 13 14 15]\n", 610 | " [16 17 18 19]\n", 611 | " [20 21 22 23]]]\n", 612 | "\n", 613 | "===================================================================================\n", 614 | "\n", 615 | "This tensor is of order 3 and consists of 24 elements.\n", 616 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n", 617 | "\n", 618 | "\tThe underlying data array is:\n", 619 | "[[[ 0 1 2 3]\n", 620 | " [ 8 9 10 11]\n", 621 | " [16 17 18 19]]\n", 622 | "\n", 623 | " [[ 4 5 6 7]\n", 624 | " [12 13 14 15]\n", 625 | " [20 21 22 23]]]\n" 626 | ] 627 | } 628 | ], 629 | "source": [ 630 | "tensor_1.fold()\n", 631 | "tensor_2.fold()\n", 632 | "\n", 633 | "show_meta_information(tensor_1, modes=False, shapes=False, state=False)\n", 634 | "\n", 635 | "print_sep_line()\n", 636 | "\n", 637 | "show_meta_information(tensor_2, modes=False, shapes=False, state=False)" 638 | ] 639 | }, 640 | { 641 | "cell_type": "markdown", 642 | "metadata": {}, 643 | "source": [ 644 | "**Note:** this example is for illustration purposes only, since it does not follow true unfolding/folding expressions that is:\n", 645 | "\n", 646 | "```python\n", 647 | "unfolded_along = mode_order[0][0]\n", 648 | "data_2d.shape[0] != normal_shape[unfolded_along]\n", 649 | "```" 650 | ] 651 | }, 652 | { 653 | "cell_type": "markdown", 654 | "metadata": {}, 655 | "source": [ 656 | "### Custom state: different reshaping type" 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": 13, 662 | "metadata": {}, 663 | "outputs": [ 664 | { 665 | "name": "stdout", 666 | "output_type": "stream", 667 | "text": [ 668 | "This tensor is of order 2 and consists of 24 elements.\n", 669 | "Sizes and names of its modes are (2, 12) and ['mode-0', 'mode-1_mode-2'] respectively.\n", 670 | "\n", 671 | "\tThe underlying data array is:\n", 672 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n", 673 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n", 674 | "\n", 675 | "===================================================================================\n", 676 | "\n", 677 | "This tensor is of order 2 and consists of 24 elements.\n", 678 | "Sizes and names of its modes are (2, 12) and ['mode-0', 'mode-1_mode-2'] respectively.\n", 679 | "\n", 680 | "\tThe underlying data array is:\n", 681 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n", 682 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n" 683 | ] 684 | } 685 | ], 686 | "source": [ 687 | "custom_state_1 = dict(mode_order=([0], [1, 2]),\n", 688 | " normal_shape=(2, 3, 4),\n", 689 | " rtype=\"T\"\n", 690 | " )\n", 691 | "custom_state_2 = dict(mode_order=([0], [1, 2]),\n", 692 | " normal_shape=(2, 3, 4),\n", 693 | " rtype=\"K\"\n", 694 | " )\n", 695 | "\n", 696 | "tensor_1 = Tensor(data_2d, custom_state=custom_state_1)\n", 697 | "tensor_2 = Tensor(data_2d, custom_state=custom_state_2)\n", 698 | "\n", 699 | "show_meta_information(tensor_1, modes=False, shapes=False, state=False)\n", 700 | "\n", 701 | "print_sep_line()\n", 702 | "\n", 703 | "show_meta_information(tensor_2, modes=False, shapes=False, state=False)" 704 | ] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "execution_count": 14, 709 | "metadata": {}, 710 | "outputs": [ 711 | { 712 | "name": "stdout", 713 | "output_type": "stream", 714 | "text": [ 715 | "This tensor is of order 3 and consists of 24 elements.\n", 716 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n", 717 | "\n", 718 | "\tThe underlying data array is:\n", 719 | "[[[ 0 1 2 3]\n", 720 | " [ 4 5 6 7]\n", 721 | " [ 8 9 10 11]]\n", 722 | "\n", 723 | " [[12 13 14 15]\n", 724 | " [16 17 18 19]\n", 725 | " [20 21 22 23]]]\n", 726 | "\n", 727 | "===================================================================================\n", 728 | "\n", 729 | "This tensor is of order 3 and consists of 24 elements.\n", 730 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n", 731 | "\n", 732 | "\tThe underlying data array is:\n", 733 | "[[[ 0 3 6 9]\n", 734 | " [ 1 4 7 10]\n", 735 | " [ 2 5 8 11]]\n", 736 | "\n", 737 | " [[12 15 18 21]\n", 738 | " [13 16 19 22]\n", 739 | " [14 17 20 23]]]\n" 740 | ] 741 | } 742 | ], 743 | "source": [ 744 | "tensor_1.fold()\n", 745 | "tensor_2.fold()\n", 746 | "\n", 747 | "show_meta_information(tensor_1, modes=False, shapes=False, state=False)\n", 748 | "\n", 749 | "print_sep_line()\n", 750 | "\n", 751 | "show_meta_information(tensor_2, modes=False, shapes=False, state=False)" 752 | ] 753 | }, 754 | { 755 | "cell_type": "markdown", 756 | "metadata": {}, 757 | "source": [ 758 | "### Custom state: different normal shape" 759 | ] 760 | }, 761 | { 762 | "cell_type": "code", 763 | "execution_count": 15, 764 | "metadata": {}, 765 | "outputs": [ 766 | { 767 | "name": "stdout", 768 | "output_type": "stream", 769 | "text": [ 770 | "This tensor is of order 2 and consists of 24 elements.\n", 771 | "Sizes and names of its modes are (2, 12) and ['mode-0', 'mode-1_mode-2'] respectively.\n", 772 | "\n", 773 | "\tThe underlying data array is:\n", 774 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n", 775 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n", 776 | "\n", 777 | "===================================================================================\n", 778 | "\n", 779 | "This tensor is of order 2 and consists of 24 elements.\n", 780 | "Sizes and names of its modes are (2, 12) and ['mode-0', 'mode-1_mode-2'] respectively.\n", 781 | "\n", 782 | "\tThe underlying data array is:\n", 783 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n", 784 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n" 785 | ] 786 | } 787 | ], 788 | "source": [ 789 | "custom_state_1 = dict(mode_order=([0], [1, 2]),\n", 790 | " normal_shape=(2, 3, 4),\n", 791 | " rtype=\"T\"\n", 792 | " )\n", 793 | "custom_state_2 = dict(mode_order=([0], [1, 2]),\n", 794 | " normal_shape=(2, 4, 3),\n", 795 | " rtype=\"T\"\n", 796 | " )\n", 797 | "\n", 798 | "tensor_1 = Tensor(data_2d, custom_state=custom_state_1)\n", 799 | "tensor_2 = Tensor(data_2d, custom_state=custom_state_2)\n", 800 | "\n", 801 | "show_meta_information(tensor_1, modes=False, shapes=False, state=False)\n", 802 | "\n", 803 | "print_sep_line()\n", 804 | "\n", 805 | "show_meta_information(tensor_2, modes=False, shapes=False, state=False)" 806 | ] 807 | }, 808 | { 809 | "cell_type": "code", 810 | "execution_count": 16, 811 | "metadata": {}, 812 | "outputs": [ 813 | { 814 | "name": "stdout", 815 | "output_type": "stream", 816 | "text": [ 817 | "This tensor is of order 3 and consists of 24 elements.\n", 818 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n", 819 | "\n", 820 | "\tThe underlying data array is:\n", 821 | "[[[ 0 1 2 3]\n", 822 | " [ 4 5 6 7]\n", 823 | " [ 8 9 10 11]]\n", 824 | "\n", 825 | " [[12 13 14 15]\n", 826 | " [16 17 18 19]\n", 827 | " [20 21 22 23]]]\n", 828 | "\n", 829 | "===================================================================================\n", 830 | "\n", 831 | "This tensor is of order 3 and consists of 24 elements.\n", 832 | "Sizes and names of its modes are (2, 4, 3) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n", 833 | "\n", 834 | "\tThe underlying data array is:\n", 835 | "[[[ 0 1 2]\n", 836 | " [ 3 4 5]\n", 837 | " [ 6 7 8]\n", 838 | " [ 9 10 11]]\n", 839 | "\n", 840 | " [[12 13 14]\n", 841 | " [15 16 17]\n", 842 | " [18 19 20]\n", 843 | " [21 22 23]]]\n" 844 | ] 845 | } 846 | ], 847 | "source": [ 848 | "tensor_1.fold()\n", 849 | "tensor_2.fold()\n", 850 | "\n", 851 | "show_meta_information(tensor_1, modes=False, shapes=False, state=False)\n", 852 | "\n", 853 | "print_sep_line()\n", 854 | "\n", 855 | "show_meta_information(tensor_2, modes=False, shapes=False, state=False)" 856 | ] 857 | }, 858 | { 859 | "cell_type": "markdown", 860 | "metadata": {}, 861 | "source": [ 862 | "### Custom state and mode names" 863 | ] 864 | }, 865 | { 866 | "cell_type": "code", 867 | "execution_count": 17, 868 | "metadata": {}, 869 | "outputs": [ 870 | { 871 | "name": "stdout", 872 | "output_type": "stream", 873 | "text": [ 874 | "This tensor is of order 2 and consists of 24 elements.\n", 875 | "Sizes and names of its modes are (3, 8) and ['Time', 'Frequency_Subject'] respectively.\n", 876 | "\n", 877 | "\tThe underlying data array is:\n", 878 | "[[ 0 1 2 3 4 5 6 7]\n", 879 | " [ 8 9 10 11 12 13 14 15]\n", 880 | " [16 17 18 19 20 21 22 23]]\n", 881 | "\n", 882 | "\tInformation about its modes:\n", 883 | "#0: Mode(name='Frequency', index=None)\n", 884 | "#1: Mode(name='Time', index=None)\n", 885 | "#2: Mode(name='Subject', index=None)\n", 886 | "\n", 887 | "\tInformation about its current state:\n", 888 | "State(normal_shape=(3, 2, 4), rtype='T', mode_order=([1], [0, 2]))\n", 889 | "\n", 890 | "===================================================================================\n", 891 | "\n", 892 | "This tensor is of order 3 and consists of 24 elements.\n", 893 | "Sizes and names of its modes are (3, 2, 4) and ['Frequency', 'Time', 'Subject'] respectively.\n", 894 | "\n", 895 | "\tThe underlying data array is:\n", 896 | "[[[ 0 1 2 3]\n", 897 | " [12 13 14 15]]\n", 898 | "\n", 899 | " [[ 4 5 6 7]\n", 900 | " [16 17 18 19]]\n", 901 | "\n", 902 | " [[ 8 9 10 11]\n", 903 | " [20 21 22 23]]]\n", 904 | "\n", 905 | "\tInformation about its modes:\n", 906 | "#0: Mode(name='Frequency', index=None)\n", 907 | "#1: Mode(name='Time', index=None)\n", 908 | "#2: Mode(name='Subject', index=None)\n", 909 | "\n", 910 | "\tInformation about its current state:\n", 911 | "State(normal_shape=(3, 2, 4), rtype='Init', mode_order=([0], [1], [2]))\n" 912 | ] 913 | } 914 | ], 915 | "source": [ 916 | "I, J, K = 2, 3, 4\n", 917 | "data_2d = np.arange(I*J*K).reshape(J, (I*K))\n", 918 | "\n", 919 | "custom_state = dict(mode_order=([1], [0, 2]),\n", 920 | " normal_shape=(3, 2, 4),\n", 921 | " rtype=\"T\"\n", 922 | " )\n", 923 | "tensor_1 = Tensor(data_2d, custom_state, mode_names=[\"Frequency\", \"Time\", \"Subject\"])\n", 924 | "show_meta_information(tensor_1, shapes=False)\n", 925 | "\n", 926 | "print_sep_line()\n", 927 | "\n", 928 | "tensor_1.fold()\n", 929 | "show_meta_information(tensor_1, shapes=False)" 930 | ] 931 | } 932 | ], 933 | "metadata": { 934 | "kernelspec": { 935 | "display_name": "hottbox-tutorials", 936 | "language": "python", 937 | "name": "hottbox-tutorials" 938 | }, 939 | "language_info": { 940 | "codemirror_mode": { 941 | "name": "ipython", 942 | "version": 3 943 | }, 944 | "file_extension": ".py", 945 | "mimetype": "text/x-python", 946 | "name": "python", 947 | "nbconvert_exporter": "python", 948 | "pygments_lexer": "ipython3", 949 | "version": "3.6.6" 950 | } 951 | }, 952 | "nbformat": 4, 953 | "nbformat_minor": 2 954 | } 955 | -------------------------------------------------------------------------------- /5_Tensor_meta_information_and_pandas_integration.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# State and Mode of the Tensor: Main part of meta information\n", 8 | "\n", 9 | "**Note:** this tutorial assumes that you are familiar with the notion of N-dimensional arrays and basic definitions. The related material can be found in out previous tutorials: [tutorial_1](https://github.com/hottbox/hottbox-tutorials/blob/master/1_N-dimensional_arrays_and_Tensor_class.ipynb) and [tutorial_4](https://github.com/hottbox/hottbox-tutorials/blob/master/4_Ecosystem_of_Tensor_class.ipynb).\n", 10 | "\n", 11 | "\n", 12 | "**Requirements:** ``hottbox==0.1.3``\n", 13 | "\n", 14 | "**Authors:** \n", 15 | "Ilya Kisil (ilyakisil@gmail.com); " 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "Meta information about the tensor is represented by the **State** and **Mode** classes.\n", 23 | "\n", 24 | "1. **State** keeps track of transformation applied to the underlying data array and can be seen as a link between current form of data array and current interpretatin of its original modes. \n", 25 | "2. **Mode** brings interpretability of the values for the underlying data array.\n", 26 | "\n", 27 | "Without the data array, both of them are standalone classes. But within an ecosystem of **Tensor** class they interact with each other and the data array itself.\n", 28 | "\n", 29 | "Any tensor that created using **hottbox** is assigined a default state which depends on data array. Each mode of the tensor will always have the accosiated names." 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 1, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "import numpy as np\n", 39 | "import pandas as pd\n", 40 | "from hottbox.core import Tensor\n", 41 | "from hottbox.pdtools import tensor_to_pd, pd_to_tensor\n", 42 | "\n", 43 | "\n", 44 | "def print_tensor_state(tensor, data=True, modes=True, transforms=True):\n", 45 | " \"\"\" Quick util for showing relevant information for this tutorial\n", 46 | " \n", 47 | " Parameters\n", 48 | " ----------\n", 49 | " tensor : Tensor \n", 50 | " data : bool\n", 51 | " If True, show data array \n", 52 | " modes : bool\n", 53 | " If True, show mode information\n", 54 | " \"\"\"\n", 55 | " state = tensor._state\n", 56 | " \n", 57 | " if data:\n", 58 | " print(\"\\tUnderlying data array:\")\n", 59 | " print(tensor.data) \n", 60 | " \n", 61 | " if modes:\n", 62 | " print(\"\\n\\tInformation about its modes:\")\n", 63 | " for i, tensor_mode in enumerate(tensor.modes):\n", 64 | " print(\"#{}: {}\".format(i, tensor_mode)) \n", 65 | " \n", 66 | " print(\"\\nProperties described by modes: {}\".format(tensor.mode_names))\n", 67 | " print(\"Associated normal shape: {}\".format(state.normal_shape)) \n", 68 | " \n", 69 | " if transforms:\n", 70 | " print(\"\\n\\t\\tApplied transformations:\")\n", 71 | " for i, transformation in enumerate(state.transformations):\n", 72 | " print(\"\\tTransformation #{}:\".format(i))\n", 73 | " print(\"Reshaping type: {}\".format(transformation[0]))\n", 74 | " print(\"New mode order: {}\\n\".format(transformation[1]))\n", 75 | "\n", 76 | "\n", 77 | "def print_sep_line():\n", 78 | " print(\"\\n===========================\"\n", 79 | " \"=============================\"\n", 80 | " \"===========================\\n\")" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "## Tensor state: Default VS Custom\n", 88 | "\n", 89 | "The same data values can be characterised by different states. By specifying custom state we implicitly apply transformation to the state of the tensor during its creation.\n", 90 | "Each transformation is represented by the used reshaping type and the resulting order of the modes. List of **modes** of the tensor is created at the tensor initialisation. It depends on the normal shape if custom state is provided, otherwise it dependes on the shape of the data array." 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 2, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "\t\t2-D array as a tensor\n", 103 | "\tUnderlying data array:\n", 104 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n", 105 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n", 106 | "\n", 107 | "\tInformation about its modes:\n", 108 | "#0: Mode(name='mode-0', index=None)\n", 109 | "#1: Mode(name='mode-1', index=None)\n", 110 | "\n", 111 | "Properties described by modes: ['mode-0', 'mode-1']\n", 112 | "Associated normal shape: (2, 12)\n", 113 | "\n", 114 | "\t\tApplied transformations:\n", 115 | "\tTransformation #0:\n", 116 | "Reshaping type: Init\n", 117 | "New mode order: ([0], [1])\n", 118 | "\n", 119 | "\n", 120 | "===================================================================================\n", 121 | "\n", 122 | "\t\t3-D array as an unfolded tensor\n", 123 | "\tUnderlying data array:\n", 124 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n", 125 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n", 126 | "\n", 127 | "\tInformation about its modes:\n", 128 | "#0: Mode(name='mode-0', index=None)\n", 129 | "#1: Mode(name='mode-1', index=None)\n", 130 | "#2: Mode(name='mode-2', index=None)\n", 131 | "\n", 132 | "Properties described by modes: ['mode-0', 'mode-1_mode-2']\n", 133 | "Associated normal shape: (2, 3, 4)\n", 134 | "\n", 135 | "\t\tApplied transformations:\n", 136 | "\tTransformation #0:\n", 137 | "Reshaping type: Init\n", 138 | "New mode order: ([0], [1], [2])\n", 139 | "\n", 140 | "\tTransformation #1:\n", 141 | "Reshaping type: T\n", 142 | "New mode order: ([0], [1, 2])\n", 143 | "\n", 144 | "\n", 145 | "===================================================================================\n", 146 | "\n", 147 | "\t\t4-D array as an unfolded tensor\n", 148 | "\tUnderlying data array:\n", 149 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n", 150 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n", 151 | "\n", 152 | "\tInformation about its modes:\n", 153 | "#0: Mode(name='mode-0', index=None)\n", 154 | "#1: Mode(name='mode-1', index=None)\n", 155 | "#2: Mode(name='mode-2', index=None)\n", 156 | "#3: Mode(name='mode-3', index=None)\n", 157 | "\n", 158 | "Properties described by modes: ['mode-0', 'mode-1_mode-2_mode-3']\n", 159 | "Associated normal shape: (2, 3, 2, 2)\n", 160 | "\n", 161 | "\t\tApplied transformations:\n", 162 | "\tTransformation #0:\n", 163 | "Reshaping type: Init\n", 164 | "New mode order: ([0], [1], [2], [3])\n", 165 | "\n", 166 | "\tTransformation #1:\n", 167 | "Reshaping type: T\n", 168 | "New mode order: ([0], [1, 2, 3])\n", 169 | "\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "I, J, K, L = 2, 3, 2, 2\n", 175 | "\n", 176 | "data = np.arange(I*J*K*L).reshape(I, (J*K*L))\n", 177 | "\n", 178 | "\n", 179 | "custom_state_1 = dict(mode_order=([0], [1, 2]),\n", 180 | " normal_shape=(I, J, K*L),\n", 181 | " rtype=\"T\"\n", 182 | " )\n", 183 | "custom_state_2 = dict(mode_order=([0], [1, 2, 3]),\n", 184 | " normal_shape=(I, J, K, L),\n", 185 | " rtype=\"T\"\n", 186 | " )\n", 187 | "\n", 188 | "tensor = Tensor(data)\n", 189 | "tensor_1 = Tensor(data, custom_state_1)\n", 190 | "tensor_2 = Tensor(data, custom_state_2)\n", 191 | "\n", 192 | "print(\"\\t\\t2-D array as a tensor\")\n", 193 | "print_tensor_state(tensor)\n", 194 | "\n", 195 | "print_sep_line()\n", 196 | "\n", 197 | "print(\"\\t\\t3-D array as an unfolded tensor\")\n", 198 | "print_tensor_state(tensor_1)\n", 199 | "\n", 200 | "print_sep_line()\n", 201 | "\n", 202 | "print(\"\\t\\t4-D array as an unfolded tensor\")\n", 203 | "print_tensor_state(tensor_2)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "Here we can see, that the tensors with same data values are actually in different states and have different number of modes. These modes have default names by can be changed during object creation or by calling **set_mode_names()** - the designated method of **Tensor** class to changes their names.\n", 211 | "\n", 212 | "Next, we can bring tensor (for which we specified **custom state**) to the normal form by calling **fold()** method." 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 3, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "name": "stdout", 222 | "output_type": "stream", 223 | "text": [ 224 | "\tUnderlying data array:\n", 225 | "[[[ 0 1 2 3]\n", 226 | " [ 4 5 6 7]\n", 227 | " [ 8 9 10 11]]\n", 228 | "\n", 229 | " [[12 13 14 15]\n", 230 | " [16 17 18 19]\n", 231 | " [20 21 22 23]]]\n", 232 | "\n", 233 | "\tInformation about its modes:\n", 234 | "#0: Mode(name='mode-0', index=None)\n", 235 | "#1: Mode(name='mode-1', index=None)\n", 236 | "#2: Mode(name='mode-2', index=None)\n", 237 | "\n", 238 | "Properties described by modes: ['mode-0', 'mode-1', 'mode-2']\n", 239 | "Associated normal shape: (2, 3, 4)\n", 240 | "\n", 241 | "\t\tApplied transformations:\n", 242 | "\tTransformation #0:\n", 243 | "Reshaping type: Init\n", 244 | "New mode order: ([0], [1], [2])\n", 245 | "\n", 246 | "\n", 247 | "===================================================================================\n", 248 | "\n", 249 | "\tUnderlying data array:\n", 250 | "[[[[ 0 1]\n", 251 | " [ 2 3]]\n", 252 | "\n", 253 | " [[ 4 5]\n", 254 | " [ 6 7]]\n", 255 | "\n", 256 | " [[ 8 9]\n", 257 | " [10 11]]]\n", 258 | "\n", 259 | "\n", 260 | " [[[12 13]\n", 261 | " [14 15]]\n", 262 | "\n", 263 | " [[16 17]\n", 264 | " [18 19]]\n", 265 | "\n", 266 | " [[20 21]\n", 267 | " [22 23]]]]\n", 268 | "\n", 269 | "\tInformation about its modes:\n", 270 | "#0: Mode(name='mode-0', index=None)\n", 271 | "#1: Mode(name='mode-1', index=None)\n", 272 | "#2: Mode(name='mode-2', index=None)\n", 273 | "#3: Mode(name='mode-3', index=None)\n", 274 | "\n", 275 | "Properties described by modes: ['mode-0', 'mode-1', 'mode-2', 'mode-3']\n", 276 | "Associated normal shape: (2, 3, 2, 2)\n", 277 | "\n", 278 | "\t\tApplied transformations:\n", 279 | "\tTransformation #0:\n", 280 | "Reshaping type: Init\n", 281 | "New mode order: ([0], [1], [2], [3])\n", 282 | "\n" 283 | ] 284 | } 285 | ], 286 | "source": [ 287 | "tensor_1.fold()\n", 288 | "tensor_2.fold()\n", 289 | "\n", 290 | "print_tensor_state(tensor_1)\n", 291 | "\n", 292 | "print_sep_line()\n", 293 | "\n", 294 | "print_tensor_state(tensor_2)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "**Note:** at the moment, only one transformation can be applied to a tensor. This will be generalised in the future. " 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "## Tensor modes: integration with pandas library\n", 309 | "\n", 310 | "**Hottbox** is equipped with tools to convert multi-index pandas dataframe to tensors and vice versa. You can keep all meta information, only mode names or drop all of it." 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "### Multi-index dataframe to Tensor" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 4, 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "data": { 327 | "text/html": [ 328 | "
\n", 329 | "\n", 342 | "\n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | "
Population
YearMonthDay
2005JanMon0
Wed1
FebMon2
Wed3
2010JanMon4
Wed5
FebMon6
Wed7
\n", 398 | "
" 399 | ], 400 | "text/plain": [ 401 | " Population\n", 402 | "Year Month Day \n", 403 | "2005 Jan Mon 0\n", 404 | " Wed 1\n", 405 | " Feb Mon 2\n", 406 | " Wed 3\n", 407 | "2010 Jan Mon 4\n", 408 | " Wed 5\n", 409 | " Feb Mon 6\n", 410 | " Wed 7" 411 | ] 412 | }, 413 | "execution_count": 4, 414 | "metadata": {}, 415 | "output_type": "execute_result" 416 | } 417 | ], 418 | "source": [ 419 | "data = {'Year': [2005, 2005, 2005, 2005, 2010, 2010, 2010, 2010],\n", 420 | " 'Month': ['Jan', 'Jan', 'Feb', 'Feb', 'Jan', 'Jan', 'Feb', 'Feb'],\n", 421 | " 'Day': ['Mon', 'Wed', 'Mon', 'Wed', 'Mon', 'Wed', 'Mon', 'Wed'],\n", 422 | " 'Population': np.arange(8)\n", 423 | " }\n", 424 | "df = pd.DataFrame.from_dict(data)\n", 425 | "df.set_index([\"Year\", \"Month\", \"Day\"], inplace=True)\n", 426 | "df" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": 5, 432 | "metadata": {}, 433 | "outputs": [ 434 | { 435 | "name": "stdout", 436 | "output_type": "stream", 437 | "text": [ 438 | "\tUnderlying data array:\n", 439 | "[[[0 1]\n", 440 | " [2 3]]\n", 441 | "\n", 442 | " [[4 5]\n", 443 | " [6 7]]]\n", 444 | "\n", 445 | "\tInformation about its modes:\n", 446 | "#0: Mode(name='Year', index=[2005, 2010])\n", 447 | "#1: Mode(name='Month', index=['Jan', 'Feb'])\n", 448 | "#2: Mode(name='Day', index=['Mon', 'Wed'])\n", 449 | "\n", 450 | "Properties described by modes: ['Year', 'Month', 'Day']\n", 451 | "Associated normal shape: (2, 2, 2)\n" 452 | ] 453 | } 454 | ], 455 | "source": [ 456 | "tensor_1 = pd_to_tensor(df)\n", 457 | "print_tensor_state(tensor_1, transforms=False)" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": 6, 463 | "metadata": {}, 464 | "outputs": [ 465 | { 466 | "name": "stdout", 467 | "output_type": "stream", 468 | "text": [ 469 | "\tUnderlying data array:\n", 470 | "[[[0 1]\n", 471 | " [2 3]]\n", 472 | "\n", 473 | " [[4 5]\n", 474 | " [6 7]]]\n", 475 | "\n", 476 | "\tInformation about its modes:\n", 477 | "#0: Mode(name='Year', index=None)\n", 478 | "#1: Mode(name='Month', index=None)\n", 479 | "#2: Mode(name='Day', index=None)\n", 480 | "\n", 481 | "Properties described by modes: ['Year', 'Month', 'Day']\n", 482 | "Associated normal shape: (2, 2, 2)\n" 483 | ] 484 | } 485 | ], 486 | "source": [ 487 | "tensor_2 = pd_to_tensor(df, keep_index=False)\n", 488 | "print_tensor_state(tensor_2, transforms=False)" 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": {}, 494 | "source": [ 495 | "### Tensor to Multi-index dataframe\n", 496 | "\n", 497 | "When tensor is converted to multi-index dataframe, the information about its modes is extracted, which then is used for column name and index values of the resulting dataframe. Next we show, various ways of specifying names/indecies for modes of the tensor and how this affects the result of the conversion." 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 7, 503 | "metadata": {}, 504 | "outputs": [ 505 | { 506 | "name": "stdout", 507 | "output_type": "stream", 508 | "text": [ 509 | "\n", 510 | "\tInformation about its modes:\n", 511 | "#0: Mode(name='mode-0', index=None)\n", 512 | "#1: Mode(name='mode-1', index=None)\n", 513 | "#2: Mode(name='mode-2', index=None)\n", 514 | "\n", 515 | "Properties described by modes: ['mode-0', 'mode-1', 'mode-2']\n", 516 | "Associated normal shape: (2, 2, 2)\n" 517 | ] 518 | }, 519 | { 520 | "data": { 521 | "text/html": [ 522 | "
\n", 523 | "\n", 536 | "\n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | "
Values
mode-0mode-1mode-2
0000
11
102
13
1004
15
106
17
\n", 592 | "
" 593 | ], 594 | "text/plain": [ 595 | " Values\n", 596 | "mode-0 mode-1 mode-2 \n", 597 | "0 0 0 0\n", 598 | " 1 1\n", 599 | " 1 0 2\n", 600 | " 1 3\n", 601 | "1 0 0 4\n", 602 | " 1 5\n", 603 | " 1 0 6\n", 604 | " 1 7" 605 | ] 606 | }, 607 | "execution_count": 7, 608 | "metadata": {}, 609 | "output_type": "execute_result" 610 | } 611 | ], 612 | "source": [ 613 | "# Default meta information\n", 614 | "data = np.arange(8).reshape(2, 2, 2)\n", 615 | "tensor = Tensor(data)\n", 616 | "df = tensor_to_pd(tensor)\n", 617 | "\n", 618 | "print_tensor_state(tensor, data=False, transforms=False)\n", 619 | "df" 620 | ] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "execution_count": 8, 625 | "metadata": {}, 626 | "outputs": [ 627 | { 628 | "name": "stdout", 629 | "output_type": "stream", 630 | "text": [ 631 | "\n", 632 | "\tInformation about its modes:\n", 633 | "#0: Mode(name='Year', index=None)\n", 634 | "#1: Mode(name='Month', index=None)\n", 635 | "#2: Mode(name='Day', index=None)\n", 636 | "\n", 637 | "Properties described by modes: ['Year', 'Month', 'Day']\n", 638 | "Associated normal shape: (2, 2, 2)\n" 639 | ] 640 | }, 641 | { 642 | "data": { 643 | "text/html": [ 644 | "
\n", 645 | "\n", 658 | "\n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | "
Values
YearMonthDay
0000
11
102
13
1004
15
106
17
\n", 714 | "
" 715 | ], 716 | "text/plain": [ 717 | " Values\n", 718 | "Year Month Day \n", 719 | "0 0 0 0\n", 720 | " 1 1\n", 721 | " 1 0 2\n", 722 | " 1 3\n", 723 | "1 0 0 4\n", 724 | " 1 5\n", 725 | " 1 0 6\n", 726 | " 1 7" 727 | ] 728 | }, 729 | "execution_count": 8, 730 | "metadata": {}, 731 | "output_type": "execute_result" 732 | } 733 | ], 734 | "source": [ 735 | "# Custom mode names\n", 736 | "# Can also be passed as a list of names during creation of the tensor\n", 737 | "data = np.arange(8).reshape(2, 2, 2)\n", 738 | "new_mode_names = {0: \"Year\",\n", 739 | " 1: \"Month\",\n", 740 | " 2: \"Day\"\n", 741 | " }\n", 742 | "tensor = Tensor(data)\n", 743 | "tensor.set_mode_names(new_mode_names)\n", 744 | "df = tensor_to_pd(tensor)\n", 745 | "\n", 746 | "print_tensor_state(tensor, data=False, transforms=False)\n", 747 | "df" 748 | ] 749 | }, 750 | { 751 | "cell_type": "code", 752 | "execution_count": 9, 753 | "metadata": {}, 754 | "outputs": [ 755 | { 756 | "name": "stdout", 757 | "output_type": "stream", 758 | "text": [ 759 | "\n", 760 | "\tInformation about its modes:\n", 761 | "#0: Mode(name='mode-0', index=[2005, 2010])\n", 762 | "#1: Mode(name='mode-1', index=['Jan', 'Feb'])\n", 763 | "#2: Mode(name='mode-2', index=['Mon', 'Wed'])\n", 764 | "\n", 765 | "Properties described by modes: ['mode-0', 'mode-1', 'mode-2']\n", 766 | "Associated normal shape: (2, 2, 2)\n" 767 | ] 768 | }, 769 | { 770 | "data": { 771 | "text/html": [ 772 | "
\n", 773 | "\n", 786 | "\n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | "
Values
mode-0mode-1mode-2
2005JanMon0
Wed1
FebMon2
Wed3
2010JanMon4
Wed5
FebMon6
Wed7
\n", 842 | "
" 843 | ], 844 | "text/plain": [ 845 | " Values\n", 846 | "mode-0 mode-1 mode-2 \n", 847 | "2005 Jan Mon 0\n", 848 | " Wed 1\n", 849 | " Feb Mon 2\n", 850 | " Wed 3\n", 851 | "2010 Jan Mon 4\n", 852 | " Wed 5\n", 853 | " Feb Mon 6\n", 854 | " Wed 7" 855 | ] 856 | }, 857 | "execution_count": 9, 858 | "metadata": {}, 859 | "output_type": "execute_result" 860 | } 861 | ], 862 | "source": [ 863 | "# Custom mode index\n", 864 | "data = np.arange(8).reshape(2, 2, 2)\n", 865 | "tensor = Tensor(data)\n", 866 | "new_mode_index = {0: [2005, 2010],\n", 867 | " 1: [\"Jan\", \"Feb\"],\n", 868 | " 2: [\"Mon\", \"Wed\"],\n", 869 | " }\n", 870 | "tensor.set_mode_index(new_mode_index)\n", 871 | "df = tensor_to_pd(tensor)\n", 872 | "\n", 873 | "print_tensor_state(tensor, data=False, transforms=False)\n", 874 | "df" 875 | ] 876 | }, 877 | { 878 | "cell_type": "code", 879 | "execution_count": 10, 880 | "metadata": {}, 881 | "outputs": [ 882 | { 883 | "name": "stdout", 884 | "output_type": "stream", 885 | "text": [ 886 | "\n", 887 | "\tInformation about its modes:\n", 888 | "#0: Mode(name='Year', index=[2005, 2010])\n", 889 | "#1: Mode(name='Month', index=['Jan', 'Feb'])\n", 890 | "#2: Mode(name='Day', index=['Mon', 'Wed'])\n", 891 | "\n", 892 | "Properties described by modes: ['Year', 'Month', 'Day']\n", 893 | "Associated normal shape: (2, 2, 2)\n" 894 | ] 895 | }, 896 | { 897 | "data": { 898 | "text/html": [ 899 | "
\n", 900 | "\n", 913 | "\n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | "
Population
YearMonthDay
2005JanMon0
Wed1
FebMon2
Wed3
2010JanMon4
Wed5
FebMon6
Wed7
\n", 969 | "
" 970 | ], 971 | "text/plain": [ 972 | " Population\n", 973 | "Year Month Day \n", 974 | "2005 Jan Mon 0\n", 975 | " Wed 1\n", 976 | " Feb Mon 2\n", 977 | " Wed 3\n", 978 | "2010 Jan Mon 4\n", 979 | " Wed 5\n", 980 | " Feb Mon 6\n", 981 | " Wed 7" 982 | ] 983 | }, 984 | "execution_count": 10, 985 | "metadata": {}, 986 | "output_type": "execute_result" 987 | } 988 | ], 989 | "source": [ 990 | "# Custom mode names, mode index and column name for dataframe\n", 991 | "data = np.arange(8).reshape(2, 2, 2)\n", 992 | "new_mode_index = {0: [2005, 2010],\n", 993 | " 1: [\"Jan\", \"Feb\"],\n", 994 | " 2: [\"Mon\", \"Wed\"],\n", 995 | " }\n", 996 | "tensor = Tensor(data, mode_names=[\"Year\", \"Month\", \"Day\"])\n", 997 | "tensor.set_mode_index(new_mode_index)\n", 998 | "df = tensor_to_pd(tensor, col_name=\"Population\")\n", 999 | "\n", 1000 | "print_tensor_state(tensor, data=False, transforms=False)\n", 1001 | "df" 1002 | ] 1003 | } 1004 | ], 1005 | "metadata": { 1006 | "kernelspec": { 1007 | "display_name": "hottbox-tutorials", 1008 | "language": "python", 1009 | "name": "hottbox-tutorials" 1010 | }, 1011 | "language_info": { 1012 | "codemirror_mode": { 1013 | "name": "ipython", 1014 | "version": 3 1015 | }, 1016 | "file_extension": ".py", 1017 | "mimetype": "text/x-python", 1018 | "name": "python", 1019 | "nbconvert_exporter": "python", 1020 | "pygments_lexer": "ipython3", 1021 | "version": "3.6.6" 1022 | } 1023 | }, 1024 | "nbformat": 4, 1025 | "nbformat_minor": 2 1026 | } 1027 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | HOTTBOX tutorials 2 | ================= 3 | |Binder|_ 4 | 5 | .. |Binder| image:: https://mybinder.org/badge.svg 6 | .. _Binder: https://mybinder.org/v2/gh/hottbox/hottbox-tutorials/master 7 | 8 | This repository contains a series of tutorials on how to use `hottbox `_. 9 | 10 | 11 | Local Installation 12 | ================== 13 | 14 | In order to get started you need to clone this repository and install 15 | packages specified in ``requirements.txt``:: 16 | 17 | git clone https://github.com/hottbox/hottbox-tutorials 18 | 19 | cd hottbox-tutorials 20 | 21 | pip install -r requirements.txt 22 | 23 | 24 | If you are on Unix and have anaconda installed, you can execute ``bootstrap_venv.sh``. 25 | This script will prepare a new virtual environment for these tutorials.:: 26 | 27 | git clone https://github.com/hottbox/hottbox-tutorials 28 | 29 | source bootstrap_venv.sh 30 | 31 | 32 | Table of contents: 33 | ================== 34 | .. |ti1| image:: https://mybinder.org/badge.svg 35 | .. _ti1: https://mybinder.org/v2/gh/hottbox/hottbox-tutorials/master?filepath=1_N-dimensional_arrays_and_Tensor_class.ipynb 36 | .. _Tutorial1: https://github.com/hottbox/hottbox-tutorials/blob/master/1_N-dimensional_arrays_and_Tensor_class.ipynb 37 | 38 | .. |ti2| image:: https://mybinder.org/badge.svg 39 | .. _ti2: https://mybinder.org/v2/gh/hottbox/hottbox-tutorials/master?filepath=2_Efficient_representations_of_tensors.ipynb 40 | .. _Tutorial2: https://github.com/hottbox/hottbox-tutorials/blob/master/2_Efficient_representations_of_tensors.ipynb 41 | 42 | 43 | .. |ti3| image:: https://mybinder.org/badge.svg 44 | .. _ti3: https://mybinder.org/v2/gh/hottbox/hottbox-tutorials/master?filepath=3_Fundamental_tensor_decompositions.ipynb 45 | .. _Tutorial3: https://github.com/hottbox/hottbox-tutorials/blob/master/3_Fundamental_tensor_decompositions.ipynb 46 | 47 | .. |ti4| image:: https://mybinder.org/badge.svg 48 | .. _ti4: https://mybinder.org/v2/gh/hottbox/hottbox-tutorials/master?filepath=4_Ecosystem_of_Tensor_class.ipynb 49 | .. _Tutorial4: https://github.com/hottbox/hottbox-tutorials/blob/master/4_Ecosystem_of_Tensor_class.ipynb 50 | 51 | 52 | .. |ti5| image:: https://mybinder.org/badge.svg 53 | .. _ti5: https://mybinder.org/v2/gh/hottbox/hottbox-tutorials/master?filepath=5_Tensor_meta_information_and_pandas_integration.ipynb 54 | .. _Tutorial5: https://github.com/hottbox/hottbox-tutorials/blob/master/5_Tensor_meta_information_and_pandas_integration.ipynb 55 | 56 | 57 | +--------------------------------------------------------------------------------------+-------------------------------+--------------------------------------+ 58 | | Focus of the tutorial | Static notebook on github.com | Interactive notebook on mybinder.org | 59 | +======================================================================================+===============================+======================================+ 60 | | 1. N-dimensional arrays and its functionality: Tensor | `Tutorial1`_ | |ti1|_ | 61 | +--------------------------------------------------------------------------------------+-------------------------------+--------------------------------------+ 62 | | 2. Efficient representation of N-dimensional arrays: TensorCPD, TensorTKD, TensorTT | `Tutorial2`_ | |ti2|_ | 63 | +--------------------------------------------------------------------------------------+-------------------------------+--------------------------------------+ 64 | | 3. Fundamental tensor decompositions: CPD, HOSVD, HOOI, TTSVD | `Tutorial3`_ | |ti3|_ | 65 | +--------------------------------------------------------------------------------------+-------------------------------+--------------------------------------+ 66 | | 4. Ecosystem of Tensor class and transformations | `Tutorial4`_ | |ti4|_ | 67 | +--------------------------------------------------------------------------------------+-------------------------------+--------------------------------------+ 68 | | 5. Tensor meta information and pandas integration | `Tutorial5`_ | |ti5|_ | 69 | +--------------------------------------------------------------------------------------+-------------------------------+--------------------------------------+ 70 | 71 | 72 | Data used in these tutorials 73 | ============================ 74 | 75 | All data for these tutorials can be found under ``data/`` directory. 76 | 77 | 78 | 79 | Short description of datasets 80 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 81 | 82 | - **ETH80** dataset 83 | 84 | This dataset consists of 3,280 images of natural objects from 8 categories (apple, car, cow, cup, dog, horse, pera, tomato), each containing 10 objects with 41 views per object. More info about this dataset can be found on `here `_. 85 | 86 | 87 | 88 | Short description of files with data 89 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 90 | 91 | 1. ``data/ETH80/basic_066-063.npy`` 92 | 93 | Contains only one RGB image of one object from each category, which makes it a total of 8 samples. The view point identifier for all of them is ``066-063``. These images are 128 by 128 pixes and are stored in the unfolded form. Thus, when this file is read by ``numpy`` it outputs array with 8 rows and 128\*128\*3 = 49152 columns. 94 | -------------------------------------------------------------------------------- /bootstrap_venv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | RED="\033[0;31m" 4 | GREEN="\033[0;32m" 5 | CYAN="\033[0;36m" 6 | BROWN="\033[0;33m" 7 | WHITE="\033[0;0m" 8 | 9 | VENV_NAME="hottbox-tutorials" 10 | proceed_installation=1 11 | 12 | welcome_message(){ 13 | printf "\n====================================================================\n" 14 | 15 | 16 | printf "\n\tWelcome to a series of tutorials on HOTTBOX. \n\n" 17 | printf "This script will prepare a virtual environment for these tutorials. \n" 18 | printf "Here is what is going to happen during this process: \n\n" 19 | printf "1) Use anaconda to create a new venv: ${GREEN}${VENV_NAME}${WHITE} \n\n" 20 | printf "2) Install required packages: \n" 21 | cat requirements.txt 22 | printf "\n\n" 23 | printf "3) Install ipykernel: ${GREEN}${VENV_NAME}${WHITE}\n" 24 | 25 | printf "\n====================================================================\n\n" 26 | 27 | } 28 | 29 | 30 | ################################################################## 31 | ###-------------------- Main --------------------### 32 | ################################################################## 33 | 34 | 35 | welcome_message 36 | 37 | 38 | printf "Do you want to proceed? (y/n) " 39 | answer=$( while ! head -c 1 | grep -i '[ny]' ;do true ;done ) 40 | if echo "$answer" | grep -iq "^y" ;then 41 | echo -e "\nFingers crossed and start $GREEN :-/ $WHITE" 42 | else 43 | echo -e "\nQuitting $RED :-( $WHITE\n" 44 | proceed_installation=0 45 | fi 46 | 47 | ###-------- Check if conda installation exists 48 | if [[ ($proceed_installation == 1) ]]; then 49 | printf "\nChecking to see if ${GREEN}anaconda${WHITE} is installed: " 50 | if ! [ -x "$(command -v conda)" ]; then 51 | echo -e "${RED}not installed${WHITE}.\n" 52 | echo -e "You need have ${RED}anaconda${WHITE} to proceed with this script." 53 | echo -e "Abort installation, nothing has been configured.\n" 54 | proceed_installation=0 55 | else 56 | echo -e "${GREEN}yes, it is${WHITE}." 57 | fi 58 | fi 59 | 60 | ###-------- Environment creation 61 | if [[ ($proceed_installation == 1) ]]; then 62 | conda create --name ${VENV_NAME} python=3.6 63 | source activate ${VENV_NAME} 64 | VENV_HOME="$(which python)" 65 | 66 | pip install --upgrade pip 67 | pip install -r requirements.txt 68 | python -m ipykernel install --user --name ${VENV_NAME} --display-name ${VENV_NAME} 69 | 70 | printf "\n" 71 | printf "====================================================================\n" 72 | printf "============ ============\n" 73 | printf "============ Working environment is ready ============\n" 74 | printf "============ ============\n" 75 | printf "====================================================================\n" 76 | printf "\n" 77 | printf "1) Python interpreter for '$VENV_NAME' is located in: \n\n\t" 78 | printf "${VENV_HOME}\n\n" 79 | 80 | printf "2) The associate ipython kernel (used in the tutorials by default) is located in: \n\n\t" 81 | jupyter kernelspec list | grep "$VENV_NAME" 82 | 83 | printf "\n" 84 | printf "3) You can also select this kernel (${VENV_NAME}), if you want to play around in your own notebook.\n\n" 85 | 86 | source deactivate 87 | fi 88 | -------------------------------------------------------------------------------- /data/ETH80/basic_066-063.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/data/ETH80/basic_066-063.npy -------------------------------------------------------------------------------- /images/C_Fortran_ordering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/C_Fortran_ordering.png -------------------------------------------------------------------------------- /images/TensorCPD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/TensorCPD.png -------------------------------------------------------------------------------- /images/TensorTKD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/TensorTKD.png -------------------------------------------------------------------------------- /images/TensorTT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/TensorTT.png -------------------------------------------------------------------------------- /images/cpd_as_rank_one.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/cpd_as_rank_one.png -------------------------------------------------------------------------------- /images/data-modes-state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/data-modes-state.png -------------------------------------------------------------------------------- /images/different-forms-of-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/different-forms-of-data.png -------------------------------------------------------------------------------- /images/different-tensors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/different-tensors.png -------------------------------------------------------------------------------- /images/folding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/folding.png -------------------------------------------------------------------------------- /images/mode_n_product.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/mode_n_product.png -------------------------------------------------------------------------------- /images/outerproduct_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/outerproduct_3.png -------------------------------------------------------------------------------- /images/storage_complexity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/storage_complexity.png -------------------------------------------------------------------------------- /images/tensor_substructures.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/tensor_substructures.png -------------------------------------------------------------------------------- /images/tensors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/tensors.png -------------------------------------------------------------------------------- /images/unfolding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/unfolding.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.14.2 2 | scipy==1.0.1 3 | pandas==0.22.0 4 | hottbox==0.1.3 5 | ipykernel --------------------------------------------------------------------------------