├── .gitignore
├── 1_N-dimensional_arrays_and_Tensor_class.ipynb
├── 2_Efficient_representations_of_tensors.ipynb
├── 3_Fundamental_tensor_decompositions.ipynb
├── 4_Ecosystem_of_Tensor_class.ipynb
├── 5_Tensor_meta_information_and_pandas_integration.ipynb
├── README.rst
├── bootstrap_venv.sh
├── data
└── ETH80
│ └── basic_066-063.npy
├── images
├── C_Fortran_ordering.png
├── TensorCPD.png
├── TensorTKD.png
├── TensorTT.png
├── cpd_as_rank_one.png
├── data-modes-state.png
├── different-forms-of-data.png
├── different-tensors.png
├── folding.png
├── mode_n_product.png
├── outerproduct_3.png
├── storage_complexity.png
├── tensor_substructures.png
├── tensors.png
└── unfolding.png
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | test_[0-9]*
3 | Untitled*.ipynb
4 |
5 |
6 |
7 | ########################
8 | ### TEMPLATE FOR PYTHON
9 | ########################
10 | # Byte-compiled / optimized / DLL files
11 | __pycache__/
12 | *.py[cod]
13 | *$py.class
14 |
15 | # C extensions
16 | *.so
17 |
18 | # Distribution / packaging
19 | .Python
20 | build/
21 | develop-eggs/
22 | dist/
23 | downloads/
24 | eggs/
25 | .eggs/
26 | lib/
27 | lib64/
28 | parts/
29 | sdist/
30 | var/
31 | wheels/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | MANIFEST
36 |
37 | # PyInstaller
38 | # Usually these files are written by a python script from a template
39 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
40 | *.manifest
41 | *.spec
42 |
43 | # Installer logs
44 | pip-log.txt
45 | pip-delete-this-directory.txt
46 |
47 | # Unit test / coverage reports
48 | .pytest_cache/
49 | htmlcov/
50 | .tox/
51 | .coverage
52 | .coverage.*
53 | .cache
54 | nosetests.xml
55 | coverage.xml
56 | *.cover
57 | .hypothesis/
58 |
59 | # Translations
60 | *.mo
61 | *.pot
62 |
63 | # Django stuff:
64 | *.log
65 | .static_storage/
66 | .media/
67 | local_settings.py
68 |
69 | # Flask stuff:
70 | instance/
71 | .webassets-cache
72 |
73 | # Scrapy stuff:
74 | .scrapy
75 |
76 | # Sphinx documentation
77 | docs/_build/
78 |
79 | # PyBuilder
80 | target/
81 |
82 | # Jupyter Notebook
83 | .ipynb_checkpoints
84 |
85 | # pyenv
86 | .python-version
87 |
88 | # celery beat schedule file
89 | celerybeat-schedule
90 |
91 | # SageMath parsed files
92 | *.sage.py
93 |
94 | # Environments
95 | .env
96 | .venv
97 | env/
98 | venv/
99 | ENV/
100 | env.bak/
101 | venv.bak/
102 |
103 | # Spyder project settings
104 | .spyderproject
105 | .spyproject
106 |
107 | # Rope project settings
108 | .ropeproject
109 |
110 | # mkdocs documentation
111 | /site
112 |
113 | # mypy
114 | .mypy_cache/
115 |
116 |
117 |
118 | ########################
119 | ### TEMPLATE TEX
120 | ########################
121 | ## Core latex/pdflatex auxiliary files:
122 | *.aux
123 | *.lof
124 | *.log
125 | *.lot
126 | *.fls
127 | *.out
128 | *.toc
129 | *.fmt
130 | *.fot
131 | *.cb
132 | *.cb2
133 |
134 | ## Intermediate documents:
135 | *.dvi
136 | *.xdv
137 | *-converted-to.*
138 | # these rules might exclude image files for figures etc.
139 | # *.ps
140 | # *.eps
141 | # *.pdf
142 |
143 | ## Generated if empty string is given at "Please type another file name for output:"
144 | .pdf
145 |
146 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
147 | *.bbl
148 | *.bcf
149 | *.blg
150 | *-blx.aux
151 | *-blx.bib
152 | *.run.xml
153 |
154 | ## Build tool auxiliary files:
155 | *.fdb_latexmk
156 | *.synctex
157 | *.synctex(busy)
158 | *.synctex.gz
159 | *.synctex.gz(busy)
160 | *.pdfsync
161 |
162 | ## Auxiliary and intermediate files from other packages:
163 | # algorithms
164 | *.alg
165 | *.loa
166 |
167 | # achemso
168 | acs-*.bib
169 |
170 | # amsthm
171 | *.thm
172 |
173 | # beamer
174 | *.nav
175 | *.pre
176 | *.snm
177 | *.vrb
178 |
179 | # changes
180 | *.soc
181 |
182 | # cprotect
183 | *.cpt
184 |
185 | # elsarticle (documentclass of Elsevier journals)
186 | *.spl
187 |
188 | # endnotes
189 | *.ent
190 |
191 | # fixme
192 | *.lox
193 |
194 | # feynmf/feynmp
195 | *.mf
196 | *.mp
197 | *.t[1-9]
198 | *.t[1-9][0-9]
199 | *.tfm
200 |
201 | #(r)(e)ledmac/(r)(e)ledpar
202 | *.end
203 | *.?end
204 | *.[1-9]
205 | *.[1-9][0-9]
206 | *.[1-9][0-9][0-9]
207 | *.[1-9]R
208 | *.[1-9][0-9]R
209 | *.[1-9][0-9][0-9]R
210 | *.eledsec[1-9]
211 | *.eledsec[1-9]R
212 | *.eledsec[1-9][0-9]
213 | *.eledsec[1-9][0-9]R
214 | *.eledsec[1-9][0-9][0-9]
215 | *.eledsec[1-9][0-9][0-9]R
216 |
217 | # glossaries
218 | *.acn
219 | *.acr
220 | *.glg
221 | *.glo
222 | *.gls
223 | *.glsdefs
224 |
225 | # gnuplottex
226 | *-gnuplottex-*
227 |
228 | # gregoriotex
229 | *.gaux
230 | *.gtex
231 |
232 | # hyperref
233 | *.brf
234 |
235 | # knitr
236 | *-concordance.tex
237 | # TODO Comment the next line if you want to keep your tikz graphics files
238 | *.tikz
239 | *-tikzDictionary
240 |
241 | # listings
242 | *.lol
243 |
244 | # makeidx
245 | *.idx
246 | *.ilg
247 | *.ind
248 | *.ist
249 |
250 | # minitoc
251 | *.maf
252 | *.mlf
253 | *.mlt
254 | *.mtc[0-9]*
255 | *.slf[0-9]*
256 | *.slt[0-9]*
257 | *.stc[0-9]*
258 |
259 | # minted
260 | _minted*
261 | *.pyg
262 |
263 | # morewrites
264 | *.mw
265 |
266 | # nomencl
267 | *.nlo
268 |
269 | # pax
270 | *.pax
271 |
272 | # pdfpcnotes
273 | *.pdfpc
274 |
275 | # sagetex
276 | *.sagetex.sage
277 | *.sagetex.py
278 | *.sagetex.scmd
279 |
280 | # scrwfile
281 | *.wrt
282 |
283 | # sympy
284 | *.sout
285 | *.sympy
286 | sympy-plots-for-*.tex/
287 |
288 | # pdfcomment
289 | *.upa
290 | *.upb
291 |
292 | # pythontex
293 | *.pytxcode
294 | pythontex-files-*/
295 |
296 | # thmtools
297 | *.loe
298 |
299 | # TikZ & PGF
300 | *.dpth
301 | *.md5
302 | *.auxlock
303 |
304 | # todonotes
305 | *.tdo
306 |
307 | # easy-todo
308 | *.lod
309 |
310 | # xindy
311 | *.xdy
312 |
313 | # xypic precompiled matrices
314 | *.xyc
315 |
316 | # endfloat
317 | *.ttt
318 | *.fff
319 |
320 | # Latexian
321 | TSWLatexianTemp*
322 |
323 | ## Editors:
324 | # WinEdt
325 | *.bak
326 | *.sav
327 |
328 | # Texpad
329 | .texpadtmp
330 |
331 | # Kile
332 | *.backup
333 |
334 | # KBibTeX
335 | *~[0-9]*
336 |
337 | # auto folder when using emacs and auctex
338 | /auto/*
339 |
340 | # expex forward references with \gathertags
341 | *-tags.tex
342 |
343 |
344 |
345 | ########################
346 | ### TEMPLATE MATLAB
347 | ########################
348 | ##---------------------------------------------------
349 | ## Remove autosaves generated by the Matlab editor
350 | ## We have git for backups!
351 | ##---------------------------------------------------
352 |
353 | # Windows default autosave extension
354 | *.asv
355 |
356 | # OSX / *nix default autosave extension
357 | *.m~
358 |
359 | # Compiled MEX binaries (all platforms)
360 | *.mex*
361 |
362 | # Simulink Code Generation
363 | slprj/
364 |
365 | # Session info
366 | octave-workspace
367 |
368 | # Simulink autosave extension
369 | *.autosave
370 |
371 |
372 |
373 | ########################
374 | ### TEMPLATE JETBRAINS
375 | ########################
376 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
377 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
378 |
379 | # User-specific stuff:
380 | .idea/**/workspace.xml
381 | .idea/**/tasks.xml
382 | .idea/dictionaries
383 |
384 | # Sensitive or high-churn files:
385 | .idea/**/dataSources/
386 | .idea/**/dataSources.ids
387 | .idea/**/dataSources.xml
388 | .idea/**/dataSources.local.xml
389 | .idea/**/sqlDataSources.xml
390 | .idea/**/dynamic.xml
391 | .idea/**/uiDesigner.xml
392 |
393 | # Gradle:
394 | .idea/**/gradle.xml
395 | .idea/**/libraries
396 |
397 | # CMake
398 | cmake-build-debug/
399 |
400 | # Mongo Explorer plugin:
401 | .idea/**/mongoSettings.xml
402 |
403 | ## File-based project format:
404 | *.iws
405 |
406 | ## Plugin-specific files:
407 |
408 | # IntelliJ
409 | out/
410 |
411 | # mpeltonen/sbt-idea plugin
412 | .idea_modules/
413 |
414 | # JIRA plugin
415 | atlassian-ide-plugin.xml
416 |
417 | # Cursive Clojure plugin
418 | .idea/replstate.xml
419 |
420 | # Crashlytics plugin (for Android Studio and IntelliJ)
421 | com_crashlytics_export_strings.xml
422 | crashlytics.properties
423 | crashlytics-build.properties
424 | fabric.properties
425 |
426 |
427 |
428 | ########################
429 | ### TEMPLATE MS AND LIBRE OFFICE
430 | ########################
431 | *.tmp
432 |
433 | # Word temporary
434 | ~$*.doc*
435 |
436 | # Excel temporary
437 | ~$*.xls*
438 |
439 | # Excel Backup File
440 | *.xlk
441 |
442 | # PowerPoint temporary
443 | ~$*.ppt*
444 |
445 | # Visio autosave temporary files
446 | *.~vsd*
447 |
448 |
449 | # LibreOffice locks
450 | .~lock.*#
451 |
452 |
453 |
454 | ########################
455 | ### TEMPLATE DROPBOX
456 | ########################
457 | # Dropbox settings and caches
458 | .dropbox
459 | .dropbox.attr
460 | .dropbox.cache
461 |
462 |
463 |
464 | ########################
465 | ### TEMPLATE LINUX
466 | ########################
467 | *~
468 |
469 | # temporary files which can be created if a process still has a handle open of a deleted file
470 | .fuse_hidden*
471 |
472 | # KDE directory preferences
473 | .directory
474 |
475 | # Linux trash folder which might appear on any partition or disk
476 | .Trash-*
477 |
478 | # .nfs files are created when an open file is removed but is still being accessed
479 | .nfs*
480 |
481 |
482 |
483 | ########################
484 | ### TEMPLATE MAC OS
485 | ########################
486 | # General
487 | .DS_Store
488 | .AppleDouble
489 | .LSOverride
490 |
491 | # Icon must end with two \r
492 | Icon
493 |
494 |
495 | # Thumbnails
496 | ._*
497 |
498 | # Files that might appear in the root of a volume
499 | .DocumentRevisions-V100
500 | .fseventsd
501 | .Spotlight-V100
502 | .TemporaryItems
503 | .Trashes
504 | .VolumeIcon.icns
505 | .com.apple.timemachine.donotpresent
506 |
507 | # Directories potentially created on remote AFP share
508 | .AppleDB
509 | .AppleDesktop
510 | Network Trash Folder
511 | Temporary Items
512 | .apdisk
513 |
514 |
515 |
516 | ########################
517 | ### TEMPLATE FOR WINDOWS FILES
518 | ########################
519 | # Windows thumbnail cache files
520 | Thumbs.db
521 | ehthumbs.db
522 | ehthumbs_vista.db
523 |
524 | # Dump file
525 | *.stackdump
526 |
527 | # Folder config file
528 | [Dd]esktop.ini
529 |
530 | # Recycle Bin used on file shares
531 | $RECYCLE.BIN/
532 |
533 | # Windows Installer files
534 | *.cab
535 | *.msi
536 | *.msm
537 | *.msp
538 |
539 | # Windows shortcuts
540 | *.lnk
541 |
--------------------------------------------------------------------------------
/1_N-dimensional_arrays_and_Tensor_class.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Multi-dimensional arrays and Tensor class\n",
8 | "### Last modification (05.06.2018).\n",
9 | "\n",
10 | "\n",
11 | "In this tutorial we will show the core data structures of multidimenaional arrays within tensor algebra and illustrate how they are integrated into [hottbox](https://github.com/hottbox/hottbox). For more details visit our [documentation page](https://hottbox.github.io/stable/api/hottbox.core.html#module-hottbox.core).\n",
12 | "\n",
13 | "**Requirements:** ``hottbox==0.1.3``\n",
14 | "\n",
15 | "**Authors:** \n",
16 | "Ilya Kisil (ilyakisil@gmail.com); \n",
17 | "Giuseppe G. Calvi (ggc115@ic.ac.uk)"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 1,
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "import numpy as np\n",
27 | "from hottbox.core import Tensor"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "\n",
35 | "\n",
36 | "Tensor is a multi-dimenaional array of data where each dimension is conventionally referred to as **mode**. Its order is defined by the number of its modes which is equivivalent to the number of indices required to identify a particular entry of a multi-dimensional array. For example, an element of a third order tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I \\times J \\times K}$ can be written in general form as:\n",
37 | "\n",
38 | "$$ x_{ijk} = \\mathbf{\\underline{X}}[i, j, k]$$\n",
39 | "\n",
40 | "\n",
41 | "## Tensor class in hottbox\n",
42 | "In order to create tensor using **`hottbox`**, you simply need to pass numpy ndarray to the constructor of the **`Tensor`** class. This will allow you to use top level API for the most common properties and operations on the tensor itself that correspond to the conventional definitions. \n",
43 | "\n",
44 | "**Note:** In order to be consistent with python indexing, count of modes starts from zeros."
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 2,
50 | "metadata": {},
51 | "outputs": [
52 | {
53 | "name": "stdout",
54 | "output_type": "stream",
55 | "text": [
56 | "This tensor is of order 3 and consists of 24 elements.\n",
57 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n"
58 | ]
59 | },
60 | {
61 | "data": {
62 | "text/plain": [
63 | "array([[[ 0, 1, 2, 3],\n",
64 | " [ 4, 5, 6, 7],\n",
65 | " [ 8, 9, 10, 11]],\n",
66 | "\n",
67 | " [[12, 13, 14, 15],\n",
68 | " [16, 17, 18, 19],\n",
69 | " [20, 21, 22, 23]]])"
70 | ]
71 | },
72 | "execution_count": 2,
73 | "metadata": {},
74 | "output_type": "execute_result"
75 | }
76 | ],
77 | "source": [
78 | "array_3d = np.arange(24).reshape((2, 3, 4))\n",
79 | "tensor = Tensor(array_3d)\n",
80 | "print(tensor)\n",
81 | "tensor.data"
82 | ]
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "metadata": {},
87 | "source": [
88 | "As mentioned previously, the conventional names of the tensor characteristics (e.g. order, shape, size) are preserved for the objects of **`Tensor`** class."
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 3,
94 | "metadata": {},
95 | "outputs": [
96 | {
97 | "name": "stdout",
98 | "output_type": "stream",
99 | "text": [
100 | "This tensor is of order 3.\n",
101 | "The sizes of its modes are (2, 3, 4) respectively.\n",
102 | "It consists of 24 elemetns.\n",
103 | "Its Frobenious norm = 65.76\n"
104 | ]
105 | }
106 | ],
107 | "source": [
108 | "print('This tensor is of order {}.'.format(tensor.order))\n",
109 | "print('The sizes of its modes are {} respectively.'.format(tensor.shape))\n",
110 | "print('It consists of {} elemetns.'.format(tensor.size))\n",
111 | "print('Its Frobenious norm = {:.2f}'.format(tensor.frob_norm))"
112 | ]
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {},
117 | "source": [
118 | "# Fundamental operations with the obejcts of Tensor class\n",
119 | "\n",
120 | "Next, let's have a look at the fundamental operation with a tensor and how to apply them to the object of class **`Tensor`**. We shall start from defining the main substructures of a tensor. \n",
121 | "For ease of visualisation and compact notation, we consider a third order tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I \\times J \\times K}$.\n",
122 | "\n",
123 | "\n",
124 | "\n",
125 | "1. A **fiber** is a vector obtained by fixing all but one of the indices, e.g. $\\mathbf{\\underline{X}}[i,:,k]$ is the mode-2 fiber (usually refered to as row fiber). \n",
126 | "\n",
127 | "- Fixing all but two of the indices yields a matrix called a **slice** of a tensor, e.g. $\\mathbf{\\underline{X}}[:,:,k]$ is the mode-[1,2] slice (usually refered to as frontal slice).\n",
128 | "\n",
129 | "**Note:** The same principals and definitions can be applied to a tensor of arbitrarily large order. On top of that, one can obtain a **subtensor** by fixing at least three indecies and let other vary.\n",
130 | "\n",
131 | "## Unfolding a tensor\n",
132 | "\n",
133 | "Conventionally, unfolding is considered to be a process of element mapping from a tensor to a matrix. In other words, it arranges the mode-$n$ fibers of a tensor to be the columns of the matrix and denoted as:\n",
134 | "\n",
135 | "$$\\mathbf{\\underline{A}} \\xrightarrow{n} \\mathbf{A}_{(n)}$$\n",
136 | "\n",
137 | "Thus, this operations requires to specify a mode along which a tensor will be unfolded. For a third order tensor, a visually representation of such operation is as following:\n",
138 | "\n",
139 | "\n",
140 | "\n",
141 | "**Note:** it can be extended to a more general case, when one converts a tensor of order $N$ into a tensor of order $M$ where $N > M$. In this case, one would need to specify a set of modes along which a tensor will be unfolded. \n",
142 | "\n",
143 | "In **`hottbox`** this functionality is available through the corresponding methods of the **`Tensor`** class:\n",
144 | "\n",
145 | "```python\n",
146 | "tensor.unfold(mode=0)\n",
147 | "```\n",
148 | "\n",
149 | "By default, it changes the data array of a tensor. If you want to get unfolded tensor as a new object then use the following:\n",
150 | "\n",
151 | "```python\n",
152 | "tensor_unfolded = tensor.unfold(mode=0, inplace=False)\n",
153 | "```"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 4,
159 | "metadata": {},
160 | "outputs": [
161 | {
162 | "data": {
163 | "text/plain": [
164 | "array([[[ 0, 1, 2, 3],\n",
165 | " [ 4, 5, 6, 7],\n",
166 | " [ 8, 9, 10, 11]],\n",
167 | "\n",
168 | " [[12, 13, 14, 15],\n",
169 | " [16, 17, 18, 19],\n",
170 | " [20, 21, 22, 23]]])"
171 | ]
172 | },
173 | "execution_count": 4,
174 | "metadata": {},
175 | "output_type": "execute_result"
176 | }
177 | ],
178 | "source": [
179 | "array_3d = np.arange(24).reshape((2, 3, 4))\n",
180 | "tensor = Tensor(array_3d)\n",
181 | "tensor.data"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": 5,
187 | "metadata": {},
188 | "outputs": [
189 | {
190 | "data": {
191 | "text/plain": [
192 | "array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],\n",
193 | " [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]])"
194 | ]
195 | },
196 | "execution_count": 5,
197 | "metadata": {},
198 | "output_type": "execute_result"
199 | }
200 | ],
201 | "source": [
202 | "tensor_unfolded = tensor.unfold(mode=0, inplace=False)\n",
203 | "tensor_unfolded.data"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": 6,
209 | "metadata": {},
210 | "outputs": [
211 | {
212 | "data": {
213 | "text/plain": [
214 | "array([[[ 0, 1, 2, 3],\n",
215 | " [ 4, 5, 6, 7],\n",
216 | " [ 8, 9, 10, 11]],\n",
217 | "\n",
218 | " [[12, 13, 14, 15],\n",
219 | " [16, 17, 18, 19],\n",
220 | " [20, 21, 22, 23]]])"
221 | ]
222 | },
223 | "execution_count": 6,
224 | "metadata": {},
225 | "output_type": "execute_result"
226 | }
227 | ],
228 | "source": [
229 | "tensor.data"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 7,
235 | "metadata": {},
236 | "outputs": [
237 | {
238 | "data": {
239 | "text/plain": [
240 | "array([[ 0, 1, 2, 3, 12, 13, 14, 15],\n",
241 | " [ 4, 5, 6, 7, 16, 17, 18, 19],\n",
242 | " [ 8, 9, 10, 11, 20, 21, 22, 23]])"
243 | ]
244 | },
245 | "execution_count": 7,
246 | "metadata": {},
247 | "output_type": "execute_result"
248 | }
249 | ],
250 | "source": [
251 | "tensor.unfold(mode=1)\n",
252 | "tensor.data"
253 | ]
254 | },
255 | {
256 | "cell_type": "markdown",
257 | "metadata": {},
258 | "source": [
259 | "## Folding of a tensor\n",
260 | "\n",
261 | "Folding is most commonly referred to as a process of element mapping from a matrix or a vector to a tensor. However, it can be extended to a more general case, when one converts a tensor of order $N$ into a tensor of order $M$ where $N < M$.\n",
262 | "\n",
263 | "\n",
264 | "\n",
265 | "In **`hottbox`** this functionality is available through the corresponding methods of the **`Tensor`** class:\n",
266 | "\n",
267 | "```python\n",
268 | "tensor_unfolded.fold()\n",
269 | "```\n",
270 | "\n",
271 | "By default, it changes the data array of a tensor. If you want to get folded tensor as a new object then use the following:\n",
272 | "\n",
273 | "```python\n",
274 | "tensor_folded = tensor_unfolded.fold(inplace=False)\n",
275 | "```\n",
276 | "\n",
277 | "In **`hottbox`** this operation merely reverts the unfolding operation. Thus, there is no need to pass any parameters (all relevant information is extracted behind the scenes) and can be used only for a tensor in an unfolded state.\n",
278 | "\n",
279 | "**Note:** Canonical folding and unfolding will be implemented in a future releases of **`hottbox`**."
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": 8,
285 | "metadata": {},
286 | "outputs": [
287 | {
288 | "data": {
289 | "text/plain": [
290 | "array([[[ 0, 1, 2, 3],\n",
291 | " [ 4, 5, 6, 7],\n",
292 | " [ 8, 9, 10, 11]],\n",
293 | "\n",
294 | " [[12, 13, 14, 15],\n",
295 | " [16, 17, 18, 19],\n",
296 | " [20, 21, 22, 23]]])"
297 | ]
298 | },
299 | "execution_count": 8,
300 | "metadata": {},
301 | "output_type": "execute_result"
302 | }
303 | ],
304 | "source": [
305 | "array_3d = np.arange(24).reshape((2, 3, 4))\n",
306 | "tensor = Tensor(array_3d)\n",
307 | "tensor.data"
308 | ]
309 | },
310 | {
311 | "cell_type": "code",
312 | "execution_count": 9,
313 | "metadata": {},
314 | "outputs": [
315 | {
316 | "data": {
317 | "text/plain": [
318 | "array([[ 0, 1, 2, 3, 12, 13, 14, 15],\n",
319 | " [ 4, 5, 6, 7, 16, 17, 18, 19],\n",
320 | " [ 8, 9, 10, 11, 20, 21, 22, 23]])"
321 | ]
322 | },
323 | "execution_count": 9,
324 | "metadata": {},
325 | "output_type": "execute_result"
326 | }
327 | ],
328 | "source": [
329 | "tensor.unfold(mode=1)\n",
330 | "tensor.data"
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 10,
336 | "metadata": {},
337 | "outputs": [
338 | {
339 | "data": {
340 | "text/plain": [
341 | "array([[[ 0, 1, 2, 3],\n",
342 | " [ 4, 5, 6, 7],\n",
343 | " [ 8, 9, 10, 11]],\n",
344 | "\n",
345 | " [[12, 13, 14, 15],\n",
346 | " [16, 17, 18, 19],\n",
347 | " [20, 21, 22, 23]]])"
348 | ]
349 | },
350 | "execution_count": 10,
351 | "metadata": {},
352 | "output_type": "execute_result"
353 | }
354 | ],
355 | "source": [
356 | "tensor.fold()\n",
357 | "tensor.data"
358 | ]
359 | },
360 | {
361 | "cell_type": "code",
362 | "execution_count": 11,
363 | "metadata": {},
364 | "outputs": [
365 | {
366 | "data": {
367 | "text/plain": [
368 | "array([[[ 0, 1, 2, 3],\n",
369 | " [ 4, 5, 6, 7],\n",
370 | " [ 8, 9, 10, 11]],\n",
371 | "\n",
372 | " [[12, 13, 14, 15],\n",
373 | " [16, 17, 18, 19],\n",
374 | " [20, 21, 22, 23]]])"
375 | ]
376 | },
377 | "execution_count": 11,
378 | "metadata": {},
379 | "output_type": "execute_result"
380 | }
381 | ],
382 | "source": [
383 | "tensor_unfolded = tensor.unfold(mode=1, inplace=False)\n",
384 | "tensor_folded = tensor_unfolded.fold(inplace=False)\n",
385 | "tensor_folded.data"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": 12,
391 | "metadata": {},
392 | "outputs": [
393 | {
394 | "data": {
395 | "text/plain": [
396 | "array([[ 0, 1, 2, 3, 12, 13, 14, 15],\n",
397 | " [ 4, 5, 6, 7, 16, 17, 18, 19],\n",
398 | " [ 8, 9, 10, 11, 20, 21, 22, 23]])"
399 | ]
400 | },
401 | "execution_count": 12,
402 | "metadata": {},
403 | "output_type": "execute_result"
404 | }
405 | ],
406 | "source": [
407 | "tensor_unfolded.data"
408 | ]
409 | },
410 | {
411 | "cell_type": "markdown",
412 | "metadata": {},
413 | "source": [
414 | "## Mode-n product\n",
415 | "\n",
416 | "The mode-$n$ product is the multiplication of a tensor by a matrix along the $n^{th}$ mode of a tensor. This essentially means that each mode-$n$ fiber should be multiplied by this matrix. Mathematically, this is expressed as:\n",
417 | "\n",
418 | "$$\\mathbf{\\underline{X}} \\times_n \\mathbf{A} = \\mathbf{\\underline{Y}} \\quad \\Leftrightarrow \\quad \\mathbf{Y}_{(n)} = \\mathbf{A} \\mathbf{X}_{(n)} $$\n",
419 | "\n",
420 | "\n",
421 | "\n",
422 | "Important properties of the mode-$n$ product:\n",
423 | "\n",
424 | "1. For distinct modes in a series of multiplications, the order of the multiplication is irrelevent: \n",
425 | "\n",
426 | " $$\\mathbf{\\underline{X}} \\times_n \\mathbf{A} \\times_m \\mathbf{B} = \\mathbf{\\underline{X}} \\times_m \\mathbf{B} \\times_n \\mathbf{A} \\quad (m \\neq n)$$\n",
427 | "\n",
428 | "- However, it does not hold if the modes are the same :\n",
429 | "\n",
430 | " $$\\mathbf{\\underline{X}} \\times_n \\mathbf{A} \\times_n \\mathbf{B} = \\mathbf{\\underline{X}} \\times_n (\\mathbf{B}\\mathbf{A})$$\n",
431 | "\n",
432 | "In **`hottbox`**, mode-$n$ product is available through the corresponding method of the **`Tensor`** class:\n",
433 | "\n",
434 | "```python\n",
435 | "tensor.mode_n_product(matrix, mode=n)\n",
436 | "```\n",
437 | "\n",
438 | "By default, it changes the data array of a tensor. If you want to get a resulting tensor as a new object use the following:\n",
439 | "\n",
440 | "```python\n",
441 | "tensor.mode_n_product(matrix, mode=n, inplace=False)\n",
442 | "```\n",
443 | "\n",
444 | "Starting from **`hottbox v0.1.3`**, you can perform mode-n product with a **`matrix`** represented either as a **`numpy array`** or as an object of **`Tensor`** class.\n",
445 | "\n",
446 | "In the following example, we will consider the sequence of mode-$n$ products:\n",
447 | "\n",
448 | "$$\\mathbf{\\underline{Y}} = \\mathbf{\\underline{X}} \\times_2 \\mathbf{A} \\times_3 \\mathbf{B}$$\n",
449 | "$$\\mathbf{\\underline{Z}} = \\mathbf{\\underline{X}} \\times_3 \\mathbf{B} \\times_2 \\mathbf{A}$$\n",
450 | "\n",
451 | "Where $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{2 \\times 3 \\times 4}, \\mathbf{A} \\in \\mathbb{R}^{5 \\times 3}$ and $\\mathbf{B} \\in \\mathbb{R}^{6 \\times 4}$. Thus, the resulting tensors $\\mathbf{\\underline{Y}}, \\mathbf{\\underline{Z}}$ will be equal and of shape (2,5,6), e.g. $\\mathbf{\\underline{Y}} \\in \\mathbb{R}^{2 \\times 6 \\times 5}$\n",
452 | "\n",
453 | "In order to perform a sequence of mode-$n$ products, methods can be chained. "
454 | ]
455 | },
456 | {
457 | "cell_type": "code",
458 | "execution_count": 13,
459 | "metadata": {},
460 | "outputs": [
461 | {
462 | "name": "stdout",
463 | "output_type": "stream",
464 | "text": [
465 | "The initial shape of tensor X is (2, 3, 4)\n",
466 | "The shape of tensor Y is (2, 5, 6)\n",
467 | "The shape of tensor Z is (2, 5, 6)\n"
468 | ]
469 | }
470 | ],
471 | "source": [
472 | "I, J, K = 2, 3, 4\n",
473 | "J_new, K_new = 5, 6\n",
474 | "\n",
475 | "array_3d = np.arange(I * J * K).reshape(I, J ,K)\n",
476 | "X = Tensor(array_3d)\n",
477 | "A = np.arange(J_new * J).reshape(J_new, J)\n",
478 | "B = np.arange(K_new * K).reshape(K_new, K)\n",
479 | "\n",
480 | "Y = X.mode_n_product(A, mode=1, inplace=False).mode_n_product(B, mode=2, inplace=False)\n",
481 | "\n",
482 | "# Perform mode-n product in reversed order\n",
483 | "Z = X.mode_n_product(B, mode=2, inplace=False).mode_n_product(A, mode=1, inplace=False)\n",
484 | "\n",
485 | "print('The initial shape of tensor X is {}'.format(X.shape))\n",
486 | "print('The shape of tensor Y is {}'.format(Y.shape))\n",
487 | "print('The shape of tensor Z is {}'.format(Z.shape))"
488 | ]
489 | },
490 | {
491 | "cell_type": "markdown",
492 | "metadata": {},
493 | "source": [
494 | "Next, we will change a tensor data itself by applying the same mode-$n$ products to it."
495 | ]
496 | },
497 | {
498 | "cell_type": "code",
499 | "execution_count": 14,
500 | "metadata": {},
501 | "outputs": [
502 | {
503 | "name": "stdout",
504 | "output_type": "stream",
505 | "text": [
506 | "The shape of tensor X is (2, 5, 6)\n"
507 | ]
508 | }
509 | ],
510 | "source": [
511 | "X.mode_n_product(A, mode=1).mode_n_product(B, mode=2)\n",
512 | "print('The shape of tensor X is {}'.format(X.shape))"
513 | ]
514 | },
515 | {
516 | "cell_type": "markdown",
517 | "metadata": {},
518 | "source": [
519 | "Here, despite the **`X`**, **`Y`** and **`Z`** are being different objects, their data values will remain the same since that same operation were applied to them. We can verify that by:\n",
520 | "1. Substraction of their data arrays which should result in an array filled with zeros\n",
521 | "- Using numpy assertion utility which should not raise an **`AssertionError`**.\n",
522 | "\n",
523 | "We will use the second option."
524 | ]
525 | },
526 | {
527 | "cell_type": "code",
528 | "execution_count": 15,
529 | "metadata": {},
530 | "outputs": [
531 | {
532 | "name": "stdout",
533 | "output_type": "stream",
534 | "text": [
535 | "The underlying data arrays are equal for all of them.\n"
536 | ]
537 | }
538 | ],
539 | "source": [
540 | "np.testing.assert_array_equal(Y.data, Z.data)\n",
541 | "np.testing.assert_array_equal(X.data, Y.data)\n",
542 | "np.testing.assert_array_equal(X.data, Z.data)\n",
543 | "print('The underlying data arrays are equal for all of them.')"
544 | ]
545 | },
546 | {
547 | "cell_type": "markdown",
548 | "metadata": {},
549 | "source": [
550 | "# Additional notes on API of Tensor class\n",
551 | "\n",
552 | "1. When object of **`Tensor`** class is created, the numy array with data values is stored in **`_data`** placeholder with the correspndong property **`data`** for accessing it. If you want to modify these values, then call the corresponding transformation methods available for the **`Tensor`** class."
553 | ]
554 | },
555 | {
556 | "cell_type": "markdown",
557 | "metadata": {},
558 | "source": [
559 | "# Further reading list\n",
560 | "- Tamara G. Kolda and Brett W. Bader, \"Tensor decompositions and applications.\" SIAM REVIEW, 51(3):455–500, 2009."
561 | ]
562 | }
563 | ],
564 | "metadata": {
565 | "kernelspec": {
566 | "display_name": "hottbox-tutorials",
567 | "language": "python",
568 | "name": "hottbox-tutorials"
569 | },
570 | "language_info": {
571 | "codemirror_mode": {
572 | "name": "ipython",
573 | "version": 3
574 | },
575 | "file_extension": ".py",
576 | "mimetype": "text/x-python",
577 | "name": "python",
578 | "nbconvert_exporter": "python",
579 | "pygments_lexer": "ipython3",
580 | "version": "3.6.6"
581 | }
582 | },
583 | "nbformat": 4,
584 | "nbformat_minor": 2
585 | }
586 |
--------------------------------------------------------------------------------
/2_Efficient_representations_of_tensors.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Efficient representation of multidimensional arrays.\n",
8 | "### Last modification (05.06.2018)\n",
9 | "\n",
10 | "\n",
11 | "\n",
12 | "\n",
13 | "In this tutorial we provide a theoretical backgound on efficient representation of multidimensional arrays and show how these data structures are integrated into [hottbox](https://github.com/hottbox/hottbox) through **TensorCPD**, **TensorTKD** and **TensorTT** classes.\n",
14 | "\n",
15 | "More details on **TensorCPD**, **TensorTKD** and **TensorTT** classes can be found on our [documentation page](https://hottbox.github.io/stable/api/hottbox.core.html#module-hottbox.core).\n",
16 | "\n",
17 | "**Note:** this tutorial assumes that you are familiar with the basics of tensor algebra and the corresponding conventional notation. If you are new to this area, the required background is covered in our [introductory notebook](https://github.com/hottbox/hottbox-tutorials/blob/master/1_N-dimensional_arrays_and_Tensor_class.ipynb).\n",
18 | "\n",
19 | "**Requirements:** ``hottbox==0.1.3``\n",
20 | "\n",
21 | "**Authors:** \n",
22 | "Ilya Kisil (ilyakisil@gmail.com); \n",
23 | "Giuseppe G. Calvi (ggc115@ic.ac.uk)"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 1,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "import numpy as np\n",
33 | "from hottbox.core import Tensor, TensorCPD, TensorTKD, TensorTT"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "# Outer product, rank-1 tensor and definitions of rank of a multi-dimensional array.\n",
41 | "\n",
42 | "\n",
43 | "The central operator in tensor analysis is the outer product (sometimes refered to as the tensor product). \n",
44 | "Consider tensors $\\mathbf{\\underline{A}} \\in \\mathbb{R}^{I_1 \\times \\cdots \\times I_N}$ and $\\mathbf{\\underline{B}} \\in \\mathbb{R}^{J_1 \\times \\cdots \\times J_M}$, then their outer product yeilds a tensor of higher order then both of them:\n",
45 | "\n",
46 | "$$\n",
47 | "\\begin{equation}\n",
48 | "\\begin{aligned}\n",
49 | " \\mathbf{\\underline{A}} \\circ \\mathbf{\\underline{B}} &= \\mathbf{\\underline{C}} \\in \\mathbb{R}^{I_1 \\times \\cdots \\times I_N \\times J_1 \\times \\cdots \\times J_M} \\\\\n",
50 | " a_{i_1,\\dots,i_N}b_{j_1,\\dots,j_M} &= c_{i_1,\\dots,i_N,j_1,\\dots,j_M} \n",
51 | "\\end{aligned} \n",
52 | "\\end{equation}\n",
53 | "$$\n",
54 | "\n",
55 | "Most of the time we deal with the outer product of vectors, which significanlty simplifies the general form expressed above and establishes one the of the most fundamenatal definitions. A tensor of order $N$ is said to be of **rank-1** if it can be represented as an outer product of $N$ vectors. The figure below illustrates an example of rank-1 tensor $\\mathbf{\\underline{X}}$ and provides intuition of how operation of outer product is computed:\n",
56 | "\n",
57 | "\n",
58 | "\n",
59 | "There are several forms of the rank of N-dimensional arrays each of which is accosiated with a representation of a tensor in a particular form:\n",
60 | "\n",
61 | "1. Kruskal rank $\\rightarrow$ canonical polyadic form.\n",
62 | "\n",
63 | "- Multi-linear rank $\\rightarrow$ tucker form.\n",
64 | "\n",
65 | "- TT rank $\\rightarrow$ tensor train form.\n",
66 | "\n",
67 | "Each of this representations has the correposing class: **``TensorCPD``**, **``TensorTKD``**, **``TensorTT``**. All of them come with almost identical API except for obejct creation and, as a result, the names for some attributes. But before, we can proceed, it is crucial to get acquainted with the following definitions."
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "# Canonical Polydiac representation (CP), Kruskal rank and TensorCPD class\n",
75 | "\n",
76 | "\n",
77 | "\n",
78 | "## Kryskal rank\n",
79 | "This figure illustrates a tensor $\\mathbf{\\underline{X}}$ of rank $R$. The **rank** of a tensor $\\mathbf{\\underline{X}}$ is defined as the smallest number of rank-one tensors that produce $\\mathbf{\\underline{X}}$ as their linear combination. This definition of a tensor rank is also known as the **Kruskal rank**.\n",
80 | "\n",
81 | "## CP representation\n",
82 | "For a third order tensor or rank $R$ it can be expressed as follows:\n",
83 | "\n",
84 | "$$\\mathbf{\\underline{X}} = \\sum_{r=1}^R \\mathbf{\\underline{X}}_r = \\sum_{r=1}^R \\lambda_{r} \\cdot \\mathbf{a}_r \\circ \\mathbf{b}_r \\circ \\mathbf{c}_r$$\n",
85 | "\n",
86 | "The vectors $\\mathbf{a}_r, \\mathbf{b}_r$ and $\\mathbf{c}_r$ are oftentime combined into corresponding **factor matrices**:\n",
87 | "\n",
88 | "$$\n",
89 | "\\mathbf{A} = \\Big[ \\mathbf{a}_1 \\cdots \\mathbf{a}_R \\Big] \\quad\n",
90 | "\\mathbf{B} = \\Big[ \\mathbf{b}_1 \\cdots \\mathbf{b}_R \\Big] \\quad\n",
91 | "\\mathbf{C} = \\Big[ \\mathbf{c}_1 \\cdots \\mathbf{c}_R \\Big] \\quad\n",
92 | "$$\n",
93 | "\n",
94 | "Thus, if we employ the mode-$n$ product, the canonical polyadic representation takes form:\n",
95 | "\n",
96 | "$$\n",
97 | "\\mathbf{\\underline{X}} = \\mathbf{\\underline{\\Lambda}} \\times_1 \\mathbf{A} \\times_2 \\mathbf{B} \\times_3 \\mathbf{C} = \\Big[\\mathbf{\\underline{\\Lambda}}; \\mathbf{A}, \\mathbf{B}, \\mathbf{C} \\Big]\n",
98 | "$$\n",
99 | "\n",
100 | "where the elements on the super-diagonal of $\\mathbf{\\underline{\\Lambda}}$ are occupied by the values $\\lambda_r$ and all other equal to zero. This is the **canonical polyadic (CP)** representation of the original tensor\n",
101 | "and can be visualised as shown on figure below:\n",
102 | "\n",
103 | "\n",
104 | "\n",
105 | "\n",
106 | "## TensorCPD class in hottbox\n",
107 | "\n",
108 | "In **`hottbox`**, this form is available through the **``TensorCPD``** class. In order to create such object, you need to pass a list of factor matrices (2d numpy arrays) and a vector of values (as 1d numpy array) for the main diagonal:\n",
109 | "\n",
110 | "```python\n",
111 | "tensor_cpd = TensorCPD(fmat=[A, B, C], core_values=values)\n",
112 | "```\n",
113 | "\n",
114 | "**Note:** all matrices should have the same number of columns and be equal to the length of ``values``"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 2,
120 | "metadata": {},
121 | "outputs": [
122 | {
123 | "name": "stdout",
124 | "output_type": "stream",
125 | "text": [
126 | "Kruskal representation of a tensor with rank=(2,).\n",
127 | "Factor matrices represent properties: ['mode-0', 'mode-1', 'mode-2']\n",
128 | "With corresponding latent components described by (3, 4, 5) features respectively.\n"
129 | ]
130 | }
131 | ],
132 | "source": [
133 | "I, J, K = 3, 4, 5 # define shape of the tensor in full form\n",
134 | "R = 2 # define Kryskal rank of a tensor in CP form \n",
135 | "\n",
136 | "A = np.arange(I * R).reshape(I, R)\n",
137 | "B = np.arange(J * R).reshape(J, R)\n",
138 | "C = np.arange(K * R).reshape(K, R)\n",
139 | "values = np.arange(R)\n",
140 | "\n",
141 | "tensor_cpd = TensorCPD(fmat=[A, B, C], core_values=values)\n",
142 | "print(tensor_cpd)"
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {},
148 | "source": [
149 | "The list of factor matrices **[A, B, C]** is stored in **`_fmat`** placeholder which can (should) be accessed through the correspodning property **`fmat`**. The values for the super-diagonal are stored in **`_core_values`** placeholder. But there is no direct access to them, because they are used fore creation of the core tensor:\n",
150 | "\n",
151 | "```python\n",
152 | "tensor_cpd.core\n",
153 | "```\n",
154 | "\n",
155 | "This returns an object of the **``Tensor``** class with the **``_core_values``** placed on its super-diagonal."
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": 3,
161 | "metadata": {},
162 | "outputs": [
163 | {
164 | "name": "stdout",
165 | "output_type": "stream",
166 | "text": [
167 | "\tFactor matrices\n",
168 | "Mode-0 factor matrix is of shape (3, 2)\n",
169 | "Mode-1 factor matrix is of shape (4, 2)\n",
170 | "Mode-2 factor matrix is of shape (5, 2)\n",
171 | "\n",
172 | "\tCore tensor\n",
173 | "This tensor is of order 3 and consists of 8 elements.\n",
174 | "Sizes and names of its modes are (2, 2, 2) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n"
175 | ]
176 | },
177 | {
178 | "data": {
179 | "text/plain": [
180 | "array([[[0., 0.],\n",
181 | " [0., 0.]],\n",
182 | "\n",
183 | " [[0., 0.],\n",
184 | " [0., 1.]]])"
185 | ]
186 | },
187 | "execution_count": 3,
188 | "metadata": {},
189 | "output_type": "execute_result"
190 | }
191 | ],
192 | "source": [
193 | "print('\\tFactor matrices')\n",
194 | "for mode, fmat in enumerate(tensor_cpd.fmat):\n",
195 | " print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))\n",
196 | " \n",
197 | "print('\\n\\tCore tensor')\n",
198 | "print(tensor_cpd.core)\n",
199 | "tensor_cpd.core.data"
200 | ]
201 | },
202 | {
203 | "cell_type": "markdown",
204 | "metadata": {},
205 | "source": [
206 | "In order to convert **``TensorCPD``** into the full representation, simply call: \n",
207 | "\n",
208 | "```python\n",
209 | "tensor_cpd.reconstruct()\n",
210 | "```\n",
211 | "\n",
212 | "This returns an object of the **``Tensor``** class with N-dimensional array calculated as described above and being assinged to the **``_data``** attibute."
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": 4,
218 | "metadata": {},
219 | "outputs": [
220 | {
221 | "name": "stdout",
222 | "output_type": "stream",
223 | "text": [
224 | "This tensor is of order 3 and consists of 60 elements.\n",
225 | "Sizes and names of its modes are (3, 4, 5) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n"
226 | ]
227 | },
228 | {
229 | "data": {
230 | "text/plain": [
231 | "array([[[ 1., 3., 5., 7., 9.],\n",
232 | " [ 3., 9., 15., 21., 27.],\n",
233 | " [ 5., 15., 25., 35., 45.],\n",
234 | " [ 7., 21., 35., 49., 63.]],\n",
235 | "\n",
236 | " [[ 3., 9., 15., 21., 27.],\n",
237 | " [ 9., 27., 45., 63., 81.],\n",
238 | " [ 15., 45., 75., 105., 135.],\n",
239 | " [ 21., 63., 105., 147., 189.]],\n",
240 | "\n",
241 | " [[ 5., 15., 25., 35., 45.],\n",
242 | " [ 15., 45., 75., 105., 135.],\n",
243 | " [ 25., 75., 125., 175., 225.],\n",
244 | " [ 35., 105., 175., 245., 315.]]])"
245 | ]
246 | },
247 | "execution_count": 4,
248 | "metadata": {},
249 | "output_type": "execute_result"
250 | }
251 | ],
252 | "source": [
253 | "tensor_full = tensor_cpd.reconstruct()\n",
254 | "print(tensor_full)\n",
255 | "tensor_full.data"
256 | ]
257 | },
258 | {
259 | "cell_type": "markdown",
260 | "metadata": {},
261 | "source": [
262 | "# Tucker representation, Multi-linear rank and TensorTKD class\n",
263 | "\n",
264 | "## Multi-linear rank\n",
265 | "\n",
266 | "The **multi-linear rank** of a tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I_1 \\times \\cdots \\times I_N}$ is the $N$-tuple $(R_1, \\dots, R_N)$ where each $R_n$ is the rank of the subspace spanned by mode-$n$ fibers, i.e. $R_n = \\text{rank} \\big( \\mathbf{X}_{(n)} \\big)$. For a tensor of order $N$ the values $R_1, R_2, \\dots , R_N$ are not necessarily the same, whereas, for matrices (tensors of order 2) the equality $R_1 = R_2$ always holds, where $R_1$ and $R_2$ are the matrix column rank and row rank respectively.\n",
267 | "\n",
268 | "\n",
269 | "## Tucker representation\n",
270 | "\n",
271 | "\n",
272 | "For a tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I \\times J \\times K}$ illustrated above, the **tucker form** represents it as a dense core tensor $\\mathbf{\\underline{G}}$ with multi-linear rank ($Q, R, P$) and a set of factor matrices $\\mathbf{A} \\in \\mathbb{R}^{I \\times Q}, \\mathbf{B} \\in \\mathbb{R}^{J \\times R}$ and $\\mathbf{C} \\in \\mathbb{R}^{K \\times P}$.\n",
273 | "\n",
274 | "The tucker form of a tensor is closely related to the CP form and can be expressed through a \n",
275 | "sequence of mode-$n$ products in a similar way.\n",
276 | "\n",
277 | "$$\n",
278 | "\\mathbf{\\underline{X}} = \\mathbf{\\underline{G}} \\times_1 \\mathbf{A} \\times_2 \\mathbf{B} \\times_3 \\mathbf{C} = \\Big[\\mathbf{\\underline{G}}; \\mathbf{A}, \\mathbf{B}, \\mathbf{C} \\Big]\n",
279 | "$$\n",
280 | "\n",
281 | "## TensorTKD class in hottbox\n",
282 | "\n",
283 | "In **`hottbox`**, this form is available through the **``TensorTKD``** class. In order to create such object, you need to pass a list of $N$ factor matrices (2d numpy arrays) and values for the core tensor (as n-dimensional numpy array):\n",
284 | "\n",
285 | "```python\n",
286 | "tensor_tkd = TensorTKD(fmat=[A, B, C], core_values=values)\n",
287 | "```\n",
288 | "\n",
289 | "**Note:** the number of columns in each of the factor matrices should be the same as the corresponding size of the numpy array with the values for the core tensor"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": 5,
295 | "metadata": {},
296 | "outputs": [
297 | {
298 | "name": "stdout",
299 | "output_type": "stream",
300 | "text": [
301 | "Tucker representation of a tensor with multi-linear rank=(2, 3, 4).\n",
302 | "Factor matrices represent properties: ['mode-0', 'mode-1', 'mode-2']\n",
303 | "With corresponding latent components described by (5, 6, 7) features respectively.\n"
304 | ]
305 | }
306 | ],
307 | "source": [
308 | "I, J, K = 5, 6, 7 # define shape of the tensor in full form\n",
309 | "Q, R, P = 2, 3, 4 # define multi-linear rank of the tensor in Tucker form\n",
310 | "\n",
311 | "A = np.arange(I * Q).reshape(I, Q)\n",
312 | "B = np.arange(J * R).reshape(J, R)\n",
313 | "C = np.arange(K * P).reshape(K, P)\n",
314 | "values = np.arange(Q * R * P).reshape(Q, R, P)\n",
315 | "\n",
316 | "tensor_tkd = TensorTKD(fmat=[A, B, C], core_values=values)\n",
317 | "print(tensor_tkd)"
318 | ]
319 | },
320 | {
321 | "cell_type": "markdown",
322 | "metadata": {},
323 | "source": [
324 | "By analogy with the **`TensorCPD`**, the list of factor matrices **[A, B, C]** is stored in **`_fmat`** placeholder which can (should) be accessed through the correspodning property **`fmat`**. Similarly, the values of the core tensor are stored in **`_core_values`** placeholder and they cannot (should not) be accessed directly, because they are used to create a core tensors as an object of **`Tensor`** class, when the corresponding property is called:\n",
325 | "\n",
326 | "```python\n",
327 | "tensor_tkd.core\n",
328 | "```\n",
329 | "\n",
330 | "**Note:** the core values occupy all data values of a core tensor, as opposed to **`TensorCPD`** class where they are placed on the main diagonal."
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 6,
336 | "metadata": {},
337 | "outputs": [
338 | {
339 | "name": "stdout",
340 | "output_type": "stream",
341 | "text": [
342 | "\tFactor matrices\n",
343 | "Mode-0 factor matrix is of shape (5, 2)\n",
344 | "Mode-1 factor matrix is of shape (6, 3)\n",
345 | "Mode-2 factor matrix is of shape (7, 4)\n",
346 | "\n",
347 | "\tCore tensor\n",
348 | "This tensor is of order 3 and consists of 24 elements.\n",
349 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n"
350 | ]
351 | },
352 | {
353 | "data": {
354 | "text/plain": [
355 | "array([[[ 0, 1, 2, 3],\n",
356 | " [ 4, 5, 6, 7],\n",
357 | " [ 8, 9, 10, 11]],\n",
358 | "\n",
359 | " [[12, 13, 14, 15],\n",
360 | " [16, 17, 18, 19],\n",
361 | " [20, 21, 22, 23]]])"
362 | ]
363 | },
364 | "execution_count": 6,
365 | "metadata": {},
366 | "output_type": "execute_result"
367 | }
368 | ],
369 | "source": [
370 | "print('\\tFactor matrices')\n",
371 | "for mode, fmat in enumerate(tensor_tkd.fmat):\n",
372 | " print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))\n",
373 | " \n",
374 | "print('\\n\\tCore tensor')\n",
375 | "print(tensor_tkd.core)\n",
376 | "tensor_tkd.core.data"
377 | ]
378 | },
379 | {
380 | "cell_type": "markdown",
381 | "metadata": {},
382 | "source": [
383 | "In order to convert **``TensorTKD``** into the full representation, simply call: \n",
384 | "\n",
385 | "```python\n",
386 | "tensor_tkd.reconstruct()\n",
387 | "```\n",
388 | "\n",
389 | "This return an object of the **``Tensor``** class with N-dimensional array calculated as \n",
390 | "described above and being assinged to the **``_data``** attibute."
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": 7,
396 | "metadata": {},
397 | "outputs": [
398 | {
399 | "name": "stdout",
400 | "output_type": "stream",
401 | "text": [
402 | "This tensor is of order 3 and consists of 210 elements.\n",
403 | "Sizes and names of its modes are (5, 6, 7) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n"
404 | ]
405 | },
406 | {
407 | "data": {
408 | "text/plain": [
409 | "array([[[ 378, 1346, 2314, 3282, 4250, 5218, 6186],\n",
410 | " [ 1368, 4856, 8344, 11832, 15320, 18808, 22296],\n",
411 | " [ 2358, 8366, 14374, 20382, 26390, 32398, 38406],\n",
412 | " [ 3348, 11876, 20404, 28932, 37460, 45988, 54516],\n",
413 | " [ 4338, 15386, 26434, 37482, 48530, 59578, 70626],\n",
414 | " [ 5328, 18896, 32464, 46032, 59600, 73168, 86736]],\n",
415 | "\n",
416 | " [[ 1458, 5146, 8834, 12522, 16210, 19898, 23586],\n",
417 | " [ 5112, 17944, 30776, 43608, 56440, 69272, 82104],\n",
418 | " [ 8766, 30742, 52718, 74694, 96670, 118646, 140622],\n",
419 | " [ 12420, 43540, 74660, 105780, 136900, 168020, 199140],\n",
420 | " [ 16074, 56338, 96602, 136866, 177130, 217394, 257658],\n",
421 | " [ 19728, 69136, 118544, 167952, 217360, 266768, 316176]],\n",
422 | "\n",
423 | " [[ 2538, 8946, 15354, 21762, 28170, 34578, 40986],\n",
424 | " [ 8856, 31032, 53208, 75384, 97560, 119736, 141912],\n",
425 | " [ 15174, 53118, 91062, 129006, 166950, 204894, 242838],\n",
426 | " [ 21492, 75204, 128916, 182628, 236340, 290052, 343764],\n",
427 | " [ 27810, 97290, 166770, 236250, 305730, 375210, 444690],\n",
428 | " [ 34128, 119376, 204624, 289872, 375120, 460368, 545616]],\n",
429 | "\n",
430 | " [[ 3618, 12746, 21874, 31002, 40130, 49258, 58386],\n",
431 | " [ 12600, 44120, 75640, 107160, 138680, 170200, 201720],\n",
432 | " [ 21582, 75494, 129406, 183318, 237230, 291142, 345054],\n",
433 | " [ 30564, 106868, 183172, 259476, 335780, 412084, 488388],\n",
434 | " [ 39546, 138242, 236938, 335634, 434330, 533026, 631722],\n",
435 | " [ 48528, 169616, 290704, 411792, 532880, 653968, 775056]],\n",
436 | "\n",
437 | " [[ 4698, 16546, 28394, 40242, 52090, 63938, 75786],\n",
438 | " [ 16344, 57208, 98072, 138936, 179800, 220664, 261528],\n",
439 | " [ 27990, 97870, 167750, 237630, 307510, 377390, 447270],\n",
440 | " [ 39636, 138532, 237428, 336324, 435220, 534116, 633012],\n",
441 | " [ 51282, 179194, 307106, 435018, 562930, 690842, 818754],\n",
442 | " [ 62928, 219856, 376784, 533712, 690640, 847568, 1004496]]])"
443 | ]
444 | },
445 | "execution_count": 7,
446 | "metadata": {},
447 | "output_type": "execute_result"
448 | }
449 | ],
450 | "source": [
451 | "tensor_full = tensor_tkd.reconstruct()\n",
452 | "print(tensor_full)\n",
453 | "tensor_full.data"
454 | ]
455 | },
456 | {
457 | "cell_type": "markdown",
458 | "metadata": {},
459 | "source": [
460 | "# Tensor Train representation, TT-rank and TensorTT class\n",
461 | "\n",
462 | "## Tensor Train representation\n",
463 | "\n",
464 | ""
465 | ]
466 | },
467 | {
468 | "cell_type": "markdown",
469 | "metadata": {},
470 | "source": [
471 | "**Tensor trains (TTs)** are the simplest kinds of tensor networks, i.e. a decomposition of a high-order tensor in a set of sparsely interconnected lower-order tensors and factor matrices. Mathematically, an $N$-th order tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I_1 \\times I_2 \\times \\cdots \\times I_N}$ can be expressed as a TT as\n",
472 | "\n",
473 | "$$\n",
474 | "\\mathbf{\\underline{X}} = \\mathbf{A} \\times^1_2 \\mathbf{\\underline{G}}^{(1)} \\times^1_3 \\mathbf{\\underline{G}}^{(2)} \\times^1_3 \\cdots \\times^1_3 \\mathbf{\\underline{G}}^{(N-1)} \\times^1_3 \\mathbf{B} = \\Big[ \\mathbf{A}, \\mathbf{\\underline{G}}^{(1)}, \\mathbf{\\underline{G}}^{(2)}, \\cdots, \\mathbf{\\underline{G}}^{(N-1)}, \\mathbf{B} \\Big]\n",
475 | "$$\n",
476 | "\n",
477 | "Each element of a TT is generally referred to as **TT-core**, and $\\mathbf{A} \\in \\mathbb{R}^{I_1 \\times R_1}$, $\\mathbf{B} \\in \\mathbb{R}^{R_{N-1}\\times I_N}$, $\\mathbf{\\underline{G}}^{(n)} \\in \\mathbb{R}^{R_n \\times I_{n+1} \\times R_{n+1}}$ and the tuple $(R_1, R_2, \\dots, R_{N-1})$ is called the **TT-rank**.\n"
478 | ]
479 | },
480 | {
481 | "cell_type": "markdown",
482 | "metadata": {},
483 | "source": [
484 | "## TensorTT class in hottbox\n",
485 | "\n",
486 | "In **`hottbox`**, this form is available through the **``TensorTT``** class. In order to create such object, you need to pass a list of values (as numpy arrays) for \n",
487 | "cores:\n",
488 | "\n",
489 | "```python\n",
490 | "tensor_tt = TensorTT(core_values=values)\n",
491 | "```"
492 | ]
493 | },
494 | {
495 | "cell_type": "code",
496 | "execution_count": 8,
497 | "metadata": {},
498 | "outputs": [
499 | {
500 | "name": "stdout",
501 | "output_type": "stream",
502 | "text": [
503 | "Tensor train representation of a tensor with tt-rank=(2, 3).\n",
504 | "Shape of this representation in the full format is (4, 5, 6).\n",
505 | "Physical modes of its cores represent properties: ['mode-0', 'mode-1', 'mode-2']\n"
506 | ]
507 | }
508 | ],
509 | "source": [
510 | "I, J, K = 4, 5, 6 # define shape of the tensor in full form\n",
511 | "R1, R2 = 2, 3 # define tt rank of the tensor in Tensor train form\n",
512 | "\n",
513 | "values_1 = np.arange(I * R1).reshape(I, R1)\n",
514 | "values_2 = np.arange(R1 * J * R2).reshape(R1, J, R2)\n",
515 | "values_3 = np.arange(R2 * K).reshape(R2, K)\n",
516 | "\n",
517 | "tensor_tt = TensorTT(core_values=[values_1, values_2, values_3])\n",
518 | "print(tensor_tt)"
519 | ]
520 | },
521 | {
522 | "cell_type": "markdown",
523 | "metadata": {},
524 | "source": [
525 | "The list of values for these core tensors is stored in **`_core_values`** placeholder. They should not be accessed directly, because they are used\n",
526 | "for creation of **`Tensor`** class objects each of which represent a particular tt-core. The list of all cores can be accessed as \n",
527 | "\n",
528 | "```python\n",
529 | "tensor_tt.cores\n",
530 | "```\n",
531 | "\n",
532 | "**Note:** All components of the Tensor Train representation are conventionally considered to be a core therefore, even matrices are objects of **`Tensor`** class."
533 | ]
534 | },
535 | {
536 | "cell_type": "code",
537 | "execution_count": 9,
538 | "metadata": {},
539 | "outputs": [
540 | {
541 | "name": "stdout",
542 | "output_type": "stream",
543 | "text": [
544 | "\n",
545 | "\tCore tensor #0 of TT representation\n",
546 | "This tensor is of order 2 and consists of 8 elements.\n",
547 | "Sizes and names of its modes are (4, 2) and ['mode-0', 'mode-1'] respectively.\n",
548 | "[[0 1]\n",
549 | " [2 3]\n",
550 | " [4 5]\n",
551 | " [6 7]]\n",
552 | "\n",
553 | "\tCore tensor #1 of TT representation\n",
554 | "This tensor is of order 3 and consists of 30 elements.\n",
555 | "Sizes and names of its modes are (2, 5, 3) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n",
556 | "[[[ 0 1 2]\n",
557 | " [ 3 4 5]\n",
558 | " [ 6 7 8]\n",
559 | " [ 9 10 11]\n",
560 | " [12 13 14]]\n",
561 | "\n",
562 | " [[15 16 17]\n",
563 | " [18 19 20]\n",
564 | " [21 22 23]\n",
565 | " [24 25 26]\n",
566 | " [27 28 29]]]\n",
567 | "\n",
568 | "\tCore tensor #2 of TT representation\n",
569 | "This tensor is of order 2 and consists of 18 elements.\n",
570 | "Sizes and names of its modes are (3, 6) and ['mode-0', 'mode-1'] respectively.\n",
571 | "[[ 0 1 2 3 4 5]\n",
572 | " [ 6 7 8 9 10 11]\n",
573 | " [12 13 14 15 16 17]]\n"
574 | ]
575 | }
576 | ],
577 | "source": [
578 | "for i, tt_core in enumerate(tensor_tt.cores): \n",
579 | " print('\\n\\tCore tensor #{} of TT representation'.format(i)) \n",
580 | " print(tt_core) \n",
581 | " print(tt_core.data)"
582 | ]
583 | },
584 | {
585 | "cell_type": "markdown",
586 | "metadata": {},
587 | "source": [
588 | "If you what to access a specific tt-core of the TT representation, then it is more efficient to use a corresponding method which takes a positional number of desired core as input parameters\n",
589 | "\n",
590 | "```python\n",
591 | "tensor_tt.core(i=0)\n",
592 | "```\n",
593 | "\n",
594 | "**Note:** this parameter should not exceed the order of TT representation"
595 | ]
596 | },
597 | {
598 | "cell_type": "code",
599 | "execution_count": 10,
600 | "metadata": {},
601 | "outputs": [
602 | {
603 | "name": "stdout",
604 | "output_type": "stream",
605 | "text": [
606 | "\n",
607 | "\tCore tensor #0 of TT representation\n",
608 | "This tensor is of order 2 and consists of 8 elements.\n",
609 | "Sizes and names of its modes are (4, 2) and ['mode-0', 'mode-1'] respectively.\n",
610 | "[[0 1]\n",
611 | " [2 3]\n",
612 | " [4 5]\n",
613 | " [6 7]]\n",
614 | "\n",
615 | "\tCore tensor #1 of TT representation\n",
616 | "This tensor is of order 3 and consists of 30 elements.\n",
617 | "Sizes and names of its modes are (2, 5, 3) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n",
618 | "[[[ 0 1 2]\n",
619 | " [ 3 4 5]\n",
620 | " [ 6 7 8]\n",
621 | " [ 9 10 11]\n",
622 | " [12 13 14]]\n",
623 | "\n",
624 | " [[15 16 17]\n",
625 | " [18 19 20]\n",
626 | " [21 22 23]\n",
627 | " [24 25 26]\n",
628 | " [27 28 29]]]\n",
629 | "\n",
630 | "\tCore tensor #2 of TT representation\n",
631 | "This tensor is of order 2 and consists of 18 elements.\n",
632 | "Sizes and names of its modes are (3, 6) and ['mode-0', 'mode-1'] respectively.\n",
633 | "[[ 0 1 2 3 4 5]\n",
634 | " [ 6 7 8 9 10 11]\n",
635 | " [12 13 14 15 16 17]]\n"
636 | ]
637 | }
638 | ],
639 | "source": [
640 | "for i in range(tensor_tt.order):\n",
641 | " tt_core = tensor_tt.core(i)\n",
642 | " print('\\n\\tCore tensor #{} of TT representation'.format(i)) \n",
643 | " print(tt_core) \n",
644 | " print(tt_core.data)"
645 | ]
646 | },
647 | {
648 | "cell_type": "markdown",
649 | "metadata": {},
650 | "source": [
651 | "In order to convert **``TensorTT``** into the full representation, simply call: \n",
652 | "\n",
653 | "```python\n",
654 | "tensor_tt.reconstruct()\n",
655 | "```\n",
656 | "\n",
657 | "This return an object of the **``Tensor``** class with N-dimensional array calculated as described above and being assinged to the **``_data``** attibute."
658 | ]
659 | },
660 | {
661 | "cell_type": "code",
662 | "execution_count": 11,
663 | "metadata": {},
664 | "outputs": [
665 | {
666 | "name": "stdout",
667 | "output_type": "stream",
668 | "text": [
669 | "This tensor is of order 3 and consists of 120 elements.\n",
670 | "Sizes and names of its modes are (4, 5, 6) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n"
671 | ]
672 | },
673 | {
674 | "data": {
675 | "text/plain": [
676 | "array([[[ 300, 348, 396, 444, 492, 540],\n",
677 | " [ 354, 411, 468, 525, 582, 639],\n",
678 | " [ 408, 474, 540, 606, 672, 738],\n",
679 | " [ 462, 537, 612, 687, 762, 837],\n",
680 | " [ 516, 600, 684, 768, 852, 936]],\n",
681 | "\n",
682 | " [[ 960, 1110, 1260, 1410, 1560, 1710],\n",
683 | " [1230, 1425, 1620, 1815, 2010, 2205],\n",
684 | " [1500, 1740, 1980, 2220, 2460, 2700],\n",
685 | " [1770, 2055, 2340, 2625, 2910, 3195],\n",
686 | " [2040, 2370, 2700, 3030, 3360, 3690]],\n",
687 | "\n",
688 | " [[1620, 1872, 2124, 2376, 2628, 2880],\n",
689 | " [2106, 2439, 2772, 3105, 3438, 3771],\n",
690 | " [2592, 3006, 3420, 3834, 4248, 4662],\n",
691 | " [3078, 3573, 4068, 4563, 5058, 5553],\n",
692 | " [3564, 4140, 4716, 5292, 5868, 6444]],\n",
693 | "\n",
694 | " [[2280, 2634, 2988, 3342, 3696, 4050],\n",
695 | " [2982, 3453, 3924, 4395, 4866, 5337],\n",
696 | " [3684, 4272, 4860, 5448, 6036, 6624],\n",
697 | " [4386, 5091, 5796, 6501, 7206, 7911],\n",
698 | " [5088, 5910, 6732, 7554, 8376, 9198]]])"
699 | ]
700 | },
701 | "execution_count": 11,
702 | "metadata": {},
703 | "output_type": "execute_result"
704 | }
705 | ],
706 | "source": [
707 | "tensor_full = tensor_tt.reconstruct()\n",
708 | "print(tensor_full)\n",
709 | "tensor_full.data"
710 | ]
711 | },
712 | {
713 | "cell_type": "markdown",
714 | "metadata": {},
715 | "source": [
716 | "# Further reading list\n",
717 | "- Tamara G. Kolda and Brett W. Bader, \"Tensor decompositions and applications.\" SIAM REVIEW, 51(3):455–500, 2009.\n",
718 | "\n",
719 | "- Ivan V. Oseledets, \"Tensor-train decomposition.\" SIAM Journal on Scientific Computing 33.5 (2011): 2295-2317."
720 | ]
721 | }
722 | ],
723 | "metadata": {
724 | "kernelspec": {
725 | "display_name": "hottbox-tutorials",
726 | "language": "python",
727 | "name": "hottbox-tutorials"
728 | },
729 | "language_info": {
730 | "codemirror_mode": {
731 | "name": "ipython",
732 | "version": 3
733 | },
734 | "file_extension": ".py",
735 | "mimetype": "text/x-python",
736 | "name": "python",
737 | "nbconvert_exporter": "python",
738 | "pygments_lexer": "ipython3",
739 | "version": "3.6.6"
740 | }
741 | },
742 | "nbformat": 4,
743 | "nbformat_minor": 2
744 | }
745 |
--------------------------------------------------------------------------------
/3_Fundamental_tensor_decompositions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Fundamental tensor decompositions.\n",
8 | "### Last modification (05.06.2018)\n",
9 | "\n",
10 | "In this tutorial we provide a theoretical backgound on the fundamental tensor decompositions of multidimensional arrays and show how these data algorithms can be used with [hottbox](https://github.com/hottbox/hottbox) through **CPD**, **HOSVD**, **HOOI** and **TTSVD** classes.\n",
11 | "\n",
12 | "More details on **CPD**, **HOSVD**, **HOOI** and **TTSVD** classes can be found on the [documentation page](https://hottbox.github.io/stable/api/hottbox.algorithms.decomposition).\n",
13 | "\n",
14 | "**Note:** this tutorial assumes that you are familiar with the basics of tensor algebra, tensor representaitons in different forms and the corresponding conventional notation. If you are new to these topics, check out our previous tutorials: [tutorial_1](https://github.com/hottbox/hottbox-tutorials/blob/master/1_N-dimensional_arrays_and_Tensor_class.ipynb) and [tutorial_2](https://github.com/hottbox/hottbox-tutorials/blob/master/2_Efficient_representations_of_tensors.ipynb).\n",
15 | "\n",
16 | "**Requirements:** ``hottbox==0.1.3``\n",
17 | "\n",
18 | "**Authors:** \n",
19 | "Ilya Kisil (ilyakisil@gmail.com); \n",
20 | "Giuseppe G. Calvi (ggc115@ic.ac.uk)"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 1,
26 | "metadata": {},
27 | "outputs": [],
28 | "source": [
29 | "import numpy as np\n",
30 | "from hottbox.core import Tensor, residual_tensor\n",
31 | "from hottbox.algorithms.decomposition import TTSVD, HOSVD, HOOI, CPD\n",
32 | "from hottbox.metrics import residual_rel_error"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "# Tensor decompositions and their API\n",
40 | "\n",
41 | "In [previous tutorial](https://github.com/hottbox/hottbox-tutorials/blob/master/2_Efficient_representations_of_tensors.ipynb), we have introduced various efficient representations of the multi-dimensional arrays (tensors) and how they can be created using the **`hottbox`** API. Here were show how these representations can obtained for a given tensor.\n",
42 | "\n",
43 | "For these purposes, the following algorithms have been implemented in **``hottbox>=0.1.2``**:\n",
44 | "\n",
45 | "- CPD: produces instance of **TensorCPD** class\n",
46 | "- HOSVD: produces instance of **TensorTKD** class\n",
47 | "- HOOI: produces instance of **TensorTKD** class\n",
48 | "- TTSVD: produces instance of **TensorTT** class\n",
49 | "\n",
50 | "By analogy with the computation algorithms in **`sklearn`**, you first need to create an instance of this algorithm. Then you use its method **`decompose`** in order to obtain an efficient representation of the original tensor. See [tutorial_2](https://github.com/hottbox/hottbox-tutorials/blob/master/2_Efficient_representations_of_tensors.ipynb) for more information on various efficient resentations of multi-dimensional arrays. For simplicity and ease of visualisation, the following matrial is provided for the tensors of order $3$, but can be easily generalised to a case of $N$-th order.\n",
51 | "\n",
52 | "In all computational examples below we will decompose the same 3-D array with randomly generated values, while all algorithms will be initialised with default parameters."
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 2,
58 | "metadata": {},
59 | "outputs": [
60 | {
61 | "name": "stdout",
62 | "output_type": "stream",
63 | "text": [
64 | "This tensor is of order 3 and consists of 210 elements.\n",
65 | "Sizes and names of its modes are (5, 6, 7) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n"
66 | ]
67 | }
68 | ],
69 | "source": [
70 | "np.random.seed(0)\n",
71 | "I, J, K = 5, 6, 7\n",
72 | "\n",
73 | "array_3d = np.random.rand(I * J * K).reshape((I, J, K)).astype(np.float)\n",
74 | "\n",
75 | "tensor = Tensor(array_3d)\n",
76 | "print(tensor)"
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {},
82 | "source": [
83 | "# Canonical Polyadic Decomposition (CPD)\n",
84 | "\n",
85 | "## Theoretical background\n",
86 | "\n",
87 | "The **Canonical Polyadic Decomposition (CPD)** (also referred to as PARAFAC or CANDECOMP) is an algorithms that factorizes an $3$-rd order tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I \\times J \\times K}$ into a linear combination of terms $\\mathbf{\\underline{X}}_r = \\mathbf{a}_r \\circ \\mathbf{b}_r \\circ \\mathbf{c}_r$, which are rank-$1$ tensors. In other words the tensor $\\mathbf{\\underline{X}}$ is decomposed as\n",
88 | "\n",
89 | "$$\n",
90 | "\\begin{equation}\n",
91 | "\\begin{aligned}\n",
92 | "\\mathbf{\\underline{X}} & \\simeq \\sum_{r=1}^{R} \\lambda_r \\mathbf{a}_r \\circ \\mathbf{b}_r \\circ \\mathbf{c}_r\\\\\n",
93 | "& = \\mathbf{\\underline{\\Lambda}} \\times_1 \\mathbf{A} \\times_2 \\mathbf{B} \\times_3 \\mathbf{C}\\\\\n",
94 | "& = \\Big[ \\mathbf{\\underline{\\Lambda}} ; \\mathbf{A}, \\mathbf{B}, \\mathbf{C} \\Big]\n",
95 | "\\end{aligned}\n",
96 | "\\end{equation}\n",
97 | "$$\n",
98 | "\n",
99 | "where \n",
100 | "\n",
101 | "- $\\mathbf{\\underline{\\Lambda}}$ is an $3$-rd order core tensor having $\\lambda_r$ as entries in positions $\\mathbf{\\underline{\\Lambda}}[i, j, k]$, where $i = j = k$, and zeroes elsewhere\n",
102 | "\n",
103 | "- $\\mathbf{A}, \\mathbf{B}, \\mathbf{C}$ are factor matrix obtained as the concatenation of the corresponding factor vectors, i.e $ \\mathbf{A} = \\Big[ \\mathbf{a}_1 \\mathbf{a}_2 \\cdots \\mathbf{a}_R \\Big] $ \n",
104 | "\n",
105 | "Assuming the kruskal rank is fixed, there are many algorithms to compute a CPD. The most popular aproach is via the alternating least squares (ALS) method. The goal is to find such CP represenation $[ \\mathbf{\\underline{\\Lambda}} ; \\mathbf{A}, \\mathbf{B}, \\mathbf{C} ]$ which provides the best approximation of the original tensor $\\mathbf{\\underline{X}}$:\n",
106 | "\n",
107 | "$$\n",
108 | "\\text{min} \\| \\mathbf{\\underline{X}} - [ \\mathbf{\\underline{\\Lambda}} ; \\mathbf{A}, \\mathbf{B}, \\mathbf{C} ] \\|^2_F\n",
109 | "$$\n",
110 | "\n",
111 | "The alternating least squares approach fixes $\\mathbf{B}$ and $\\mathbf{C}$ to solve for $\\mathbf{A}$, then fixes $\\mathbf{A}$ and $\\mathbf{C}$ to solve for $\\mathbf{B}$, then fixes $\\mathbf{A}$ and $\\mathbf{B}$ to solve for $\\mathbf{C}$, and continues to repeat the\n",
112 | "entire procedure until some convergence criterion is satisfied.\n"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "## CPD class in hottbox\n",
120 | "\n",
121 | "In **`hottbox`**, the CPD-ALS algorithm is implemented by the **`CPD`** class. Despite of the parameters used to initialise this algorithm, it outputs an instance of **`TensorCPD`** class after each call of the **`decompose`** method. This method takes an object of **`Tensor`** class and desired value of kruskal rank passed as a tuple of length 1. \n",
122 | "\n",
123 | "**Note:** the Kruskal rank is passed as a tuple so to keep the same format with other algorithms for tensor decompositions."
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": 3,
129 | "metadata": {},
130 | "outputs": [
131 | {
132 | "data": {
133 | "text/plain": [
134 | "CPD(epsilon=0.01, init='svd', max_iter=50, random_state=None, tol=0.0001,\n",
135 | " verbose=False)"
136 | ]
137 | },
138 | "execution_count": 3,
139 | "metadata": {},
140 | "output_type": "execute_result"
141 | }
142 | ],
143 | "source": [
144 | "alg = CPD()\n",
145 | "alg"
146 | ]
147 | },
148 | {
149 | "cell_type": "code",
150 | "execution_count": 4,
151 | "metadata": {},
152 | "outputs": [
153 | {
154 | "name": "stdout",
155 | "output_type": "stream",
156 | "text": [
157 | "\tOutput of the CPD algorithm:\n",
158 | "Kruskal representation of a tensor with rank=(5,).\n",
159 | "Factor matrices represent properties: ['mode-0', 'mode-1', 'mode-2']\n",
160 | "With corresponding latent components described by (5, 6, 7) features respectively.\n",
161 | "\n",
162 | "\tFactor matrices\n",
163 | "Mode-0 factor matrix is of shape (5, 5)\n",
164 | "Mode-1 factor matrix is of shape (6, 5)\n",
165 | "Mode-2 factor matrix is of shape (7, 5)\n",
166 | "\n",
167 | "\tCore tensor\n",
168 | "This tensor is of order 3 and consists of 125 elements.\n",
169 | "Sizes and names of its modes are (5, 5, 5) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n"
170 | ]
171 | }
172 | ],
173 | "source": [
174 | "kruskal_rank = (5,)\n",
175 | "\n",
176 | "tensor_cpd = alg.decompose(tensor, rank=kruskal_rank)\n",
177 | "print(\"\\tOutput of the {} algorithm:\".format(alg.name))\n",
178 | "print(tensor_cpd)\n",
179 | "\n",
180 | "print('\\n\\tFactor matrices')\n",
181 | "for mode, fmat in enumerate(tensor_cpd.fmat):\n",
182 | " print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))\n",
183 | " \n",
184 | "print('\\n\\tCore tensor')\n",
185 | "print(tensor_cpd.core)"
186 | ]
187 | },
188 | {
189 | "cell_type": "markdown",
190 | "metadata": {},
191 | "source": [
192 | "As we can see, the produced object of the **`TensorCPD`** class also contains general information about the underlying tensor, such as its shape, order etc, which can be accessed through the corresponding properties"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": 5,
198 | "metadata": {},
199 | "outputs": [
200 | {
201 | "name": "stdout",
202 | "output_type": "stream",
203 | "text": [
204 | "The shape of the underlying tensor is (5, 6, 7)\n",
205 | "The order of the underlying tensor is 3\n"
206 | ]
207 | }
208 | ],
209 | "source": [
210 | "full_shape = tensor_cpd.ft_shape\n",
211 | "order = tensor_cpd.order\n",
212 | "print('The shape of the underlying tensor is {}'.format(full_shape))\n",
213 | "print('The order of the underlying tensor is {}'.format(order))"
214 | ]
215 | },
216 | {
217 | "cell_type": "markdown",
218 | "metadata": {},
219 | "source": [
220 | "# Tucker Decomposition\n",
221 | "\n",
222 | "\n",
223 | "\n",
224 | "**Tucker Decomposition** represents a given tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I \\times J \\times K}$ if the form of a dense core tensor $\\mathbf{\\underline{G}}$ with multi-linear rank $(Q, R, P)$ and a set of\n",
225 | "factor matrices $\\mathbf{A} \\in \\mathbb{R}^{I \\times Q}, \\mathbf{B} \\in \\mathbb{R}^{J \\times R}$ and $\\mathbf{C} \\in\n",
226 | "\\mathbb{R}^{K \\times P}$ as illustrated above. In other words, the tensor $\\mathbf{\\underline{X}}$ can represented in tucker form as\n",
227 | "\n",
228 | "$$\n",
229 | "\\begin{equation}\n",
230 | "\\begin{aligned}\n",
231 | "\\mathbf{\\underline{X}} & \\simeq \\sum_{q=1}^{Q} \\sum_{r=1}^{R} \\sum_{p=1}^{P} g_{qrp} \\mathbf{a}_q \\circ \\mathbf{b}_r \\circ \\mathbf{c}_p\\\\\n",
232 | "& = \\mathbf{\\underline{G}} \\times_1 \\mathbf{A} \\times_2 \\mathbf{B} \\times_3 \\mathbf{C}\\\\\n",
233 | "& = \\Big[ \\mathbf{\\underline{G}} ; \\mathbf{A}, \\mathbf{B}, \\mathbf{C} \\Big]\n",
234 | "\\end{aligned}\n",
235 | "\\end{equation}\n",
236 | "$$\n",
237 | "\n",
238 | "On practice, there exist several algorithms to represent a given tensor in the Tucker format. The two most used ones are Higher Order Singular Value Decomposition (HOSVD), and Higher Order Orthogonal Iteration (HOOI), which are implemented through the **`HOSVD`** and **`HOOI`** classes respectively."
239 | ]
240 | },
241 | {
242 | "cell_type": "markdown",
243 | "metadata": {},
244 | "source": [
245 | "## Higher Order Singular Value Decomposition (HOSVD)\n",
246 | "\n",
247 | "Consider an $3$-rd order tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I \\times J \\times K}$, decomposed in the Tucker format as\n",
248 | "\n",
249 | "$$\n",
250 | "\\mathbf{\\underline{X}} = \\mathbf{\\underline{G}} \\times_1 \\mathbf{A} \\times_2 \\mathbf{B} \\times_3 \\mathbf{C}\n",
251 | "$$\n",
252 | "\n",
253 | "The HOSVD is a special case of the Tucker decomposition, in which all the factor matrices are constrained to be orthogonal. They are computed as truncated version of the left singular matrices of all possible mode-$n$ unfoldings of tensor $\\mathbf{\\underline{X}}$:\n",
254 | "\n",
255 | "$$\n",
256 | "\\begin{aligned}\n",
257 | "\\mathbf{X}_{(1)} &= \\mathbf{U}_1 \\mathbf{\\Sigma}_1 \\mathbf{V}_1^T \\quad \\rightarrow \\quad \\mathbf{A} = \\mathbf{U}_1[1:R_1]\\\\\n",
258 | "\\mathbf{X}_{(2)} &= \\mathbf{U}_2 \\mathbf{\\Sigma}_2 \\mathbf{V}_2^T \\quad \\rightarrow \\quad \\mathbf{B} = \\mathbf{U}_2[1:R_2] \\\\\n",
259 | "\\mathbf{X}_{(3)} &= \\mathbf{U}_3 \\mathbf{\\Sigma}_3 \\mathbf{V}_3^T \\quad \\rightarrow \\quad \\mathbf{C} = \\mathbf{U}_3[1:R_3] \\\\\n",
260 | "\\end{aligned}\n",
261 | "$$\n",
262 | "\n",
263 | "For a general order-$N$ tensor, the $N$-tuple $(R_1, \\ldots, R_N)$ is called the **multi-linear rank** and provides flexibility in compression and approximation of the original tensor. For our order-$3$ tensor in the multilinear rank is therefore $(R_1, R_2, R_3)$. After factor matrices are obtained, the core tensor $\\mathbf{\\underline{G}}$ is computed as\n",
264 | "$$\n",
265 | "\\mathbf{\\underline{G}} = \\mathbf{\\underline{X}} \\times_1 \\mathbf{A}^T \\times_2 \\mathbf{B}^T \\times_3 \\mathbf{C}^T \n",
266 | "$$"
267 | ]
268 | },
269 | {
270 | "cell_type": "markdown",
271 | "metadata": {},
272 | "source": [
273 | "## HOSVD class in hottbox\n",
274 | "\n",
275 | "In **`hottbox`**, the HOSVD algorithm is implemented by the **`HOSVD`** class. Despite of the parameters used to initialise this algorithm, it outputs an instance of **`TensorTKD`** class after each call of the **`decompose`** method. This method takes an object of **`Tensor`** class and desired values of multi-linear rank passed as a tuple. "
276 | ]
277 | },
278 | {
279 | "cell_type": "code",
280 | "execution_count": 6,
281 | "metadata": {},
282 | "outputs": [
283 | {
284 | "data": {
285 | "text/plain": [
286 | "HOSVD(process=(), verbose=False)"
287 | ]
288 | },
289 | "execution_count": 6,
290 | "metadata": {},
291 | "output_type": "execute_result"
292 | }
293 | ],
294 | "source": [
295 | "alg = HOSVD()\n",
296 | "alg"
297 | ]
298 | },
299 | {
300 | "cell_type": "code",
301 | "execution_count": 7,
302 | "metadata": {},
303 | "outputs": [
304 | {
305 | "name": "stdout",
306 | "output_type": "stream",
307 | "text": [
308 | "\tOutput of the HOSVD algorithm:\n",
309 | "Tucker representation of a tensor with multi-linear rank=(4, 5, 6).\n",
310 | "Factor matrices represent properties: ['mode-0', 'mode-1', 'mode-2']\n",
311 | "With corresponding latent components described by (5, 6, 7) features respectively.\n",
312 | "\n",
313 | "\tFactor matrices\n",
314 | "Mode-0 factor matrix is of shape (5, 4)\n",
315 | "Mode-1 factor matrix is of shape (6, 5)\n",
316 | "Mode-2 factor matrix is of shape (7, 6)\n",
317 | "\n",
318 | "\tCore tensor\n",
319 | "This tensor is of order 3 and consists of 120 elements.\n",
320 | "Sizes and names of its modes are (4, 5, 6) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n"
321 | ]
322 | }
323 | ],
324 | "source": [
325 | "ml_rank = (4, 5, 6)\n",
326 | "tensor_tkd_hosvd = alg.decompose(tensor, ml_rank)\n",
327 | "print(\"\\tOutput of the {} algorithm:\".format(alg.name))\n",
328 | "print(tensor_tkd_hosvd)\n",
329 | "\n",
330 | "print('\\n\\tFactor matrices')\n",
331 | "for mode, fmat in enumerate(tensor_tkd_hosvd.fmat):\n",
332 | " print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))\n",
333 | " \n",
334 | "print('\\n\\tCore tensor')\n",
335 | "print(tensor_tkd_hosvd.core)"
336 | ]
337 | },
338 | {
339 | "cell_type": "markdown",
340 | "metadata": {},
341 | "source": [
342 | "As we can see, the produced object of the **`TensorTKD`** class also contains general information about the underlying tensor, such as its shape, order etc, which can be accessed through the corresponding properties"
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": 8,
348 | "metadata": {},
349 | "outputs": [
350 | {
351 | "name": "stdout",
352 | "output_type": "stream",
353 | "text": [
354 | "The shape of the underlying tensor is (5, 6, 7)\n",
355 | "The order of the underlying tensor is 3\n"
356 | ]
357 | }
358 | ],
359 | "source": [
360 | "full_shape = tensor_tkd_hosvd.ft_shape\n",
361 | "order = tensor_tkd_hosvd.order\n",
362 | "print('The shape of the underlying tensor is {}'.format(full_shape))\n",
363 | "print('The order of the underlying tensor is {}'.format(order))"
364 | ]
365 | },
366 | {
367 | "cell_type": "markdown",
368 | "metadata": {},
369 | "source": [
370 | "## Higher Order Orthogonal Iteration (HOOI)"
371 | ]
372 | },
373 | {
374 | "cell_type": "markdown",
375 | "metadata": {},
376 | "source": [
377 | "HOOI algorithm is another special case of the Tuker decomposition. Like HOSVD, it decomposes a tensor into a dense core tensor and orthogonal factor matrices. The difference between the two lies in the fact that in HOOI the factor matrices are optimized iteratively using an Alternating Least Squares (ALS) approach. (In practice HOSVD is usually used within HOOI to initialize the factor matrices). In other words, the tucker representation $[ \\mathbf{\\underline{G}};\\mathbf{A}^{(1)}, \\mathbf{A}^{(2)}, \\cdots,\\mathbf{A}^{(N)} ]$ of the given tensor $\\mathbf{\\underline{X}}$ is obtained through the HOOI as follows\n",
378 | "\n",
379 | "$$\n",
380 | "\\begin{aligned}\n",
381 | "&\\mathbf{\\underline{Y}} = \\mathbf{\\underline{X}} \\times_1 \\mathbf{A}^{(1)T} \\times_2 \\cdots \\times_{n-1} \\mathbf{A}^{(n-1)T} \\times_{n+1} \\mathbf{A}^{(n+1)} \\times \\cdots \\times_N \\mathbf{A}^{(N)} \\\\\n",
382 | "&\\mathbf{A}^{(n)} \\leftarrow R_n \\text{ leftmost singular vectors of } \\mathbf{Y}_{(n)}\n",
383 | "\\end{aligned}\n",
384 | "$$\n",
385 | "\n",
386 | "The above is repeated until convergence, then the core tensor $\\mathbf{\\underline{G}} \\in \\mathbb{R}^{R_1 \\times R_2 \\times \\cdots \\times R_N}$ is computed as\n",
387 | "\n",
388 | "$$\n",
389 | "\\mathbf{\\underline{G}} = \\mathbf{\\underline{X}} \\times_1 \\mathbf{A}^{(1)T} \\times_2 \\mathbf{A}^{(2)T} \\times_3 \\cdots \\times_N \\mathbf{A}^{(N)T}\n",
390 | "$$"
391 | ]
392 | },
393 | {
394 | "cell_type": "markdown",
395 | "metadata": {},
396 | "source": [
397 | "## HOOI class in hottbox\n",
398 | "\n",
399 | "In **`hottbox`**, the HOOI algorithm is implemented by the **`HOOI`** class. Despite of the parameters used to initialise this algorithm, it outputs an instance of **`TensorTKD`** class after each call of the **`decompose`** method. This method takes an object of **`Tensor`** class and desired values of multi-linear rank passed as a tuple. "
400 | ]
401 | },
402 | {
403 | "cell_type": "code",
404 | "execution_count": 9,
405 | "metadata": {},
406 | "outputs": [
407 | {
408 | "data": {
409 | "text/plain": [
410 | "HOOI(epsilon=0.01, init='hosvd', max_iter=50, process=(),\n",
411 | " random_state=None, tol=0.0001, verbose=False)"
412 | ]
413 | },
414 | "execution_count": 9,
415 | "metadata": {},
416 | "output_type": "execute_result"
417 | }
418 | ],
419 | "source": [
420 | "alg = HOOI()\n",
421 | "alg"
422 | ]
423 | },
424 | {
425 | "cell_type": "code",
426 | "execution_count": 10,
427 | "metadata": {},
428 | "outputs": [
429 | {
430 | "name": "stdout",
431 | "output_type": "stream",
432 | "text": [
433 | "\tOutput of the HOOI algorithm:\n",
434 | "Tucker representation of a tensor with multi-linear rank=(4, 5, 6).\n",
435 | "Factor matrices represent properties: ['mode-0', 'mode-1', 'mode-2']\n",
436 | "With corresponding latent components described by (5, 6, 7) features respectively.\n",
437 | "\n",
438 | "\tFactor matrices\n",
439 | "Mode-0 factor matrix is of shape (5, 4)\n",
440 | "Mode-1 factor matrix is of shape (6, 5)\n",
441 | "Mode-2 factor matrix is of shape (7, 6)\n",
442 | "\n",
443 | "\tCore tensor\n",
444 | "This tensor is of order 3 and consists of 120 elements.\n",
445 | "Sizes and names of its modes are (4, 5, 6) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n"
446 | ]
447 | }
448 | ],
449 | "source": [
450 | "ml_rank = (4, 5, 6)\n",
451 | "tensor_tkd_hooi = alg.decompose(tensor, ml_rank)\n",
452 | "print(\"\\tOutput of the {} algorithm:\".format(alg.name))\n",
453 | "print(tensor_tkd_hooi)\n",
454 | "\n",
455 | "print('\\n\\tFactor matrices')\n",
456 | "for mode, fmat in enumerate(tensor_tkd_hooi.fmat):\n",
457 | " print('Mode-{} factor matrix is of shape {}'.format(mode, fmat.shape))\n",
458 | " \n",
459 | "print('\\n\\tCore tensor')\n",
460 | "print(tensor_tkd_hooi.core)"
461 | ]
462 | },
463 | {
464 | "cell_type": "markdown",
465 | "metadata": {},
466 | "source": [
467 | "As we can see, the produced object of the **`TensorTKD`** class also contains general information about the underlying tensor, such as its shape, order etc, which can be accessed through the corresponding properties"
468 | ]
469 | },
470 | {
471 | "cell_type": "code",
472 | "execution_count": 11,
473 | "metadata": {},
474 | "outputs": [
475 | {
476 | "name": "stdout",
477 | "output_type": "stream",
478 | "text": [
479 | "The shape of the underlying tensor is (5, 6, 7)\n",
480 | "The order of the underlying tensor is 3\n"
481 | ]
482 | }
483 | ],
484 | "source": [
485 | "full_shape = tensor_tkd_hooi.ft_shape\n",
486 | "order = tensor_tkd_hooi.order\n",
487 | "print('The shape of the underlying tensor is {}'.format(full_shape))\n",
488 | "print('The order of the underlying tensor is {}'.format(order))"
489 | ]
490 | },
491 | {
492 | "cell_type": "markdown",
493 | "metadata": {},
494 | "source": [
495 | "# Tensor Train Decomposition via SVD\n",
496 | "\n",
497 | ""
498 | ]
499 | },
500 | {
501 | "cell_type": "markdown",
502 | "metadata": {},
503 | "source": [
504 | "## Theoretical background\n",
505 | "\n",
506 | "**Tensor train decomposition** represents a given tensor a set of sparsely interconnected lower-order tensors and factor matrices. Mathematically speaking, the obtained TT representation of an $N$-th order tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I_1 \\times I_2 \\times \\cdots \\times I_N}$ can be expressed as a TT as\n",
507 | "\n",
508 | "$$\n",
509 | "\\begin{aligned}\n",
510 | "\\mathbf{\\underline{X}}\n",
511 | "&= \\Big[ \\mathbf{A}, \\mathbf{\\underline{G}}^{(1)}, \\mathbf{\\underline{G}}^{(2)}, \\cdots, \\mathbf{\\underline{G}}^{(N-1)}, \\mathbf{B} \\Big]\\\\\n",
512 | "&= \\mathbf{A} \\times^1_2 \\mathbf{\\underline{G}}^{(1)} \\times^1_3 \\mathbf{\\underline{G}}^{(2)} \\times^1_3 \\cdots \\times^1_3 \\mathbf{\\underline{G}}^{(N-1)} \\times^1_3 \\mathbf{B} \n",
513 | "\\end{aligned}\n",
514 | "$$\n",
515 | "\n",
516 | "Each element of a TT is generally referred to as **tt-core** with sizesof its dimensions: $\\mathbf{A} \\in \\mathbb{R}^{I_1 \\times R_1}$, $\\mathbf{B} \\in \\mathbb{R}^{R_{N-1}\\times I_N}$, $\\mathbf{\\underline{G}}^{(n)} \\in \\mathbb{R}^{R_n \\times I_{n+1} \\times R_{n+1}}$\n",
517 | "\n",
518 | "\n",
519 | "The TTSVD algorithm involves iteratively performing a series of foldings and unfoldings on an original tensor $\\mathbf{\\underline{X}} \\in \\mathbb{R}^{I_1 \\times I_2 \\times \\cdots \\times I_N}$ in conjunction with SVD. At every iteration a core $\\mathbf{\\underline{G}}^{(n)} \\in \\mathbb{R}^{R_n \\times I_{n+1} \\times R_{n+1}}$ is computed, where the TT-rank $(R_1, R_2, \\dots, R_N)$ has been specified a priori. "
520 | ]
521 | },
522 | {
523 | "cell_type": "markdown",
524 | "metadata": {},
525 | "source": [
526 | "## TTSVD class in hottbox\n",
527 | "\n",
528 | "In **`hottbox`**, the TTSVD algorithm is implemented by the **`TTSVD`** class. Despite of the parameters used to initialise this algorithm, it outputs an instance of **`TensorTT`** class after each call of the **`decompose`** method. This method takes an object of **`Tensor`** class and desired values of tt-rank passed as a tuple. "
529 | ]
530 | },
531 | {
532 | "cell_type": "code",
533 | "execution_count": 12,
534 | "metadata": {},
535 | "outputs": [
536 | {
537 | "data": {
538 | "text/plain": [
539 | "TTSVD(verbose=False)"
540 | ]
541 | },
542 | "execution_count": 12,
543 | "metadata": {},
544 | "output_type": "execute_result"
545 | }
546 | ],
547 | "source": [
548 | "alg = TTSVD()\n",
549 | "alg"
550 | ]
551 | },
552 | {
553 | "cell_type": "code",
554 | "execution_count": 13,
555 | "metadata": {},
556 | "outputs": [
557 | {
558 | "name": "stdout",
559 | "output_type": "stream",
560 | "text": [
561 | "\tOutput of the TTSVD algorithm:\n",
562 | "Tensor train representation of a tensor with tt-rank=(2, 3).\n",
563 | "Shape of this representation in the full format is (5, 6, 7).\n",
564 | "Physical modes of its cores represent properties: ['mode-0', 'mode-1', 'mode-2']\n",
565 | "\n",
566 | "\tTT-Core #0\n",
567 | "This tensor is of order 2 and consists of 10 elements.\n",
568 | "Sizes and names of its modes are (5, 2) and ['mode-0', 'mode-1'] respectively.\n",
569 | "\n",
570 | "\tTT-Core #1\n",
571 | "This tensor is of order 3 and consists of 36 elements.\n",
572 | "Sizes and names of its modes are (2, 6, 3) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n",
573 | "\n",
574 | "\tTT-Core #2\n",
575 | "This tensor is of order 2 and consists of 21 elements.\n",
576 | "Sizes and names of its modes are (3, 7) and ['mode-0', 'mode-1'] respectively.\n"
577 | ]
578 | }
579 | ],
580 | "source": [
581 | "tt_rank = (2,3)\n",
582 | "\n",
583 | "tensor_tt = alg.decompose(tensor, tt_rank)\n",
584 | "print(\"\\tOutput of the {} algorithm:\".format(alg.name))\n",
585 | "print(tensor_tt)\n",
586 | "\n",
587 | "for i, core in enumerate(tensor_tt.cores):\n",
588 | " print('\\n\\tTT-Core #{}'.format(i))\n",
589 | " print(core)"
590 | ]
591 | },
592 | {
593 | "cell_type": "markdown",
594 | "metadata": {},
595 | "source": [
596 | "As we can see, the produced object of the **`TensorTT`** class also contains general information about the underlying tensor, such as its shape, order etc, which can be accessed through the corresponding properties"
597 | ]
598 | },
599 | {
600 | "cell_type": "code",
601 | "execution_count": 14,
602 | "metadata": {},
603 | "outputs": [
604 | {
605 | "name": "stdout",
606 | "output_type": "stream",
607 | "text": [
608 | "The shape of the underlying tensor is (5, 6, 7)\n",
609 | "The order of the underlying tensor is 3\n"
610 | ]
611 | }
612 | ],
613 | "source": [
614 | "full_shape = tensor_tt.ft_shape\n",
615 | "order = tensor_tt.order\n",
616 | "print('The shape of the underlying tensor is {}'.format(full_shape))\n",
617 | "print('The order of the underlying tensor is {}'.format(order))"
618 | ]
619 | },
620 | {
621 | "cell_type": "markdown",
622 | "metadata": {},
623 | "source": [
624 | "# Evaluating results of tensor decompositions\n",
625 | "\n",
626 | "For each result of the tensor decomposition we can compute a residual tensor and calculate relative error of approximation:\n",
627 | "```python\n",
628 | " tensor_res = residual_tensor(tensor, tensor_cpd)\n",
629 | " rel_error = tensor_res.frob_norm / tensor.frob_norm \n",
630 | "```\n",
631 | "Or can do it in one line:\n",
632 | "```python\n",
633 | " rel_error = residual_rel_error(tensor, tensor_cpd)\n",
634 | "```"
635 | ]
636 | },
637 | {
638 | "cell_type": "code",
639 | "execution_count": 15,
640 | "metadata": {},
641 | "outputs": [
642 | {
643 | "name": "stdout",
644 | "output_type": "stream",
645 | "text": [
646 | "\tResidual tensor\n",
647 | "This tensor is of order 3 and consists of 210 elements.\n",
648 | "Sizes and names of its modes are (5, 6, 7) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n"
649 | ]
650 | }
651 | ],
652 | "source": [
653 | "tensor_cpd_res = residual_tensor(tensor, tensor_cpd)\n",
654 | "print('\\tResidual tensor')\n",
655 | "print(tensor_cpd_res)"
656 | ]
657 | },
658 | {
659 | "cell_type": "code",
660 | "execution_count": 16,
661 | "metadata": {},
662 | "outputs": [
663 | {
664 | "name": "stdout",
665 | "output_type": "stream",
666 | "text": [
667 | "Relative error of CPD approximation = 0.31\n",
668 | "Relative error of CPD approximation = 0.31\n"
669 | ]
670 | }
671 | ],
672 | "source": [
673 | "rel_error = tensor_cpd_res.frob_norm / tensor.frob_norm \n",
674 | "print('Relative error of CPD approximation = {:.2f}'.format(rel_error))\n",
675 | "\n",
676 | "rel_error = residual_rel_error(tensor, tensor_cpd)\n",
677 | "print('Relative error of CPD approximation = {:.2f}'.format(rel_error))"
678 | ]
679 | },
680 | {
681 | "cell_type": "code",
682 | "execution_count": 17,
683 | "metadata": {},
684 | "outputs": [
685 | {
686 | "name": "stdout",
687 | "output_type": "stream",
688 | "text": [
689 | "Relative error of HOSVD approximation = 0.21\n"
690 | ]
691 | }
692 | ],
693 | "source": [
694 | "rel_error = residual_rel_error(tensor, tensor_tkd_hosvd)\n",
695 | "print('Relative error of HOSVD approximation = {:.2f}'.format(rel_error))"
696 | ]
697 | },
698 | {
699 | "cell_type": "code",
700 | "execution_count": 18,
701 | "metadata": {},
702 | "outputs": [
703 | {
704 | "name": "stdout",
705 | "output_type": "stream",
706 | "text": [
707 | "Relative error of HOOI approximation = 0.21\n"
708 | ]
709 | }
710 | ],
711 | "source": [
712 | "rel_error = residual_rel_error(tensor, tensor_tkd_hooi)\n",
713 | "print('Relative error of HOOI approximation = {:.2f}'.format(rel_error))"
714 | ]
715 | },
716 | {
717 | "cell_type": "code",
718 | "execution_count": 19,
719 | "metadata": {},
720 | "outputs": [
721 | {
722 | "name": "stdout",
723 | "output_type": "stream",
724 | "text": [
725 | "Relative error of TT approximation = 0.39\n"
726 | ]
727 | }
728 | ],
729 | "source": [
730 | "rel_error = residual_rel_error(tensor, tensor_tt)\n",
731 | "print('Relative error of TT approximation = {:.2f}'.format(rel_error))"
732 | ]
733 | },
734 | {
735 | "cell_type": "markdown",
736 | "metadata": {},
737 | "source": [
738 | "# Further reading list\n",
739 | "- Tamara G. Kolda and Brett W. Bader, \"Tensor decompositions and applications.\" SIAM REVIEW, 51(3):455–500, 2009.\n",
740 | "\n",
741 | "- Lieven De Lathauwer, Bart De Moor, and Joos Vandewalle, \"A multilinear singular value decomposition.\" SIAM journal on Matrix Analysis and Applications 21.4 (2000): 1253-1278.\n",
742 | "\n",
743 | "- Ivan V. Oseledets, \"Tensor-train decomposition.\" SIAM Journal on Scientific Computing 33.5 (2011): 2295-2317."
744 | ]
745 | }
746 | ],
747 | "metadata": {
748 | "kernelspec": {
749 | "display_name": "hottbox-tutorials",
750 | "language": "python",
751 | "name": "hottbox-tutorials"
752 | },
753 | "language_info": {
754 | "codemirror_mode": {
755 | "name": "ipython",
756 | "version": 3
757 | },
758 | "file_extension": ".py",
759 | "mimetype": "text/x-python",
760 | "name": "python",
761 | "nbconvert_exporter": "python",
762 | "pygments_lexer": "ipython3",
763 | "version": "3.6.6"
764 | }
765 | },
766 | "nbformat": 4,
767 | "nbformat_minor": 2
768 | }
769 |
--------------------------------------------------------------------------------
/4_Ecosystem_of_Tensor_class.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Ecosystem of a Tensor: N-dimensional arrays, their descriptions and meta infromation\n",
8 | "### Last modification (08.06.2018).\n",
9 | "\n",
10 | "\n",
11 | "**Note:** this tutorial assumes that you are familiar with the notion of N-dimensional arrays and their efficient representaitons. The related material can be found in out previous tutorials: [tutorial_1](https://github.com/hottbox/hottbox-tutorials/blob/master/1_N-dimensional_arrays_and_Tensor_class.ipynb) and [tutorial_2](https://github.com/hottbox/hottbox-tutorials/blob/master/2_Efficient_representations_of_tensors.ipynb).\n",
12 | "\n",
13 | "\n",
14 | "**Requirements:** ``hottbox==0.1.3``\n",
15 | "\n",
16 | "**Authors:** \n",
17 | "Ilya Kisil (ilyakisil@gmail.com); "
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 1,
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "import numpy as np\n",
27 | "from hottbox.core import Tensor"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 2,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "def show_meta_information(tensor, data=True, shapes=True, modes=True, state=True):\n",
37 | " \"\"\" Quick util for showing relevant information for this tutorial\n",
38 | " \n",
39 | " Parameters\n",
40 | " ----------\n",
41 | " tensor : Tensor\n",
42 | " data : bool\n",
43 | " If True, show data array\n",
44 | " shapes : bool\n",
45 | " If True, show current shape and normal shape\n",
46 | " modes : bool\n",
47 | " If True, show mode information\n",
48 | " state : bool \n",
49 | " If True, show state information\n",
50 | " \"\"\"\n",
51 | " print(tensor)\n",
52 | " \n",
53 | " if data:\n",
54 | " print(\"\\n\\tThe underlying data array is:\")\n",
55 | " print(tensor.data)\n",
56 | " \n",
57 | " if shapes:\n",
58 | " print(\"\\n\\tIs this tensor in normal state: {}\".format(tensor.in_normal_state))\n",
59 | " print(\"Current shape of the data array: {}\".format(tensor.shape))\n",
60 | " print(\"Normal shape of the data array: {}\".format(tensor.ft_shape))\n",
61 | " \n",
62 | " if modes:\n",
63 | " print(\"\\n\\tInformation about its modes:\")\n",
64 | " for i, tensor_mode in enumerate(tensor.modes):\n",
65 | " print(\"#{}: {}\".format(i, tensor_mode))\n",
66 | "\n",
67 | " if state:\n",
68 | " print(\"\\n\\tInformation about its current state:\") \n",
69 | " tensor.show_state()\n",
70 | " \n",
71 | "def print_sep_line():\n",
72 | " print(\"\\n===========================\"\n",
73 | " \"=============================\"\n",
74 | " \"===========================\\n\")"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "Recall tha the collected raw data in form of N-dimensional array represents different characteristics. Here are couple of examples:\n",
82 | "\n",
83 | "\n",
84 | "\n",
85 | "N-dimensional arrays of data can be represented in various different forms. By applying numerical methods (algorithms for tensor decompositions) to the raw data we can obtain, for example, Kruskal or Tucker representation. At the same time, simple data rearrangement procedures (e.g. folding, unfolding) of the raw data also yields different representation.\n",
86 | "\n",
87 | ""
88 | ]
89 | },
90 | {
91 | "cell_type": "markdown",
92 | "metadata": {},
93 | "source": [
94 | "Each dimension of an N-dimensional array is associated with a certain property, **mode**, of the raw data. At the same time, this characterisc is described by certain features. The relation between these properties defines **state** of this N-dimensional array. In other words, modes and state could be seen as the meta information about the tensor.\n",
95 | "\n",
96 | "**Mode** of the tensor is defined by name of the property it represents and features that describe this property.\n",
97 | "\n",
98 | "**State** of the tensor is defined by transformations applied to the data array. \n",
99 | "\n",
100 | "**Normal state** of the tensor is such state of the tensor when the underlying raw data array is in its original form. This means that it has not been folded, unfolded or rotated.\n",
101 | "\n",
102 | "Thus, the tensor is described by two different shapes: \n",
103 | "1. Shape of the data array in the current state of the tensor\n",
104 | "2. Normal shape (full shape) - shape of the data array in the normal state.\n",
105 | "\n",
106 | "Each transformation can be characterised by the mode order and type of reshaping. This information is enough in order to be able to revert applied transformation of the data array.\n",
107 | "\n",
108 | "Transformations such as folding or unfolding does not change the original properties of the underlying data array, but they change relashionship between these properties.\n",
109 | "\n",
110 | ""
111 | ]
112 | },
113 | {
114 | "cell_type": "markdown",
115 | "metadata": {},
116 | "source": [
117 | "By default, an object of **Tensor** class is created in **normal state** with generic mode names that describe properties of dimensions of data array."
118 | ]
119 | },
120 | {
121 | "cell_type": "code",
122 | "execution_count": 3,
123 | "metadata": {},
124 | "outputs": [
125 | {
126 | "name": "stdout",
127 | "output_type": "stream",
128 | "text": [
129 | "This tensor is of order 3 and consists of 24 elements.\n",
130 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n",
131 | "\n",
132 | "\tThe underlying data array is:\n",
133 | "[[[ 0 1 2 3]\n",
134 | " [ 4 5 6 7]\n",
135 | " [ 8 9 10 11]]\n",
136 | "\n",
137 | " [[12 13 14 15]\n",
138 | " [16 17 18 19]\n",
139 | " [20 21 22 23]]]\n",
140 | "\n",
141 | "\tIs this tensor in normal state: True\n",
142 | "Current shape of the data array: (2, 3, 4)\n",
143 | "Normal shape of the data array: (2, 3, 4)\n",
144 | "\n",
145 | "\tInformation about its modes:\n",
146 | "#0: Mode(name='mode-0', index=None)\n",
147 | "#1: Mode(name='mode-1', index=None)\n",
148 | "#2: Mode(name='mode-2', index=None)\n",
149 | "\n",
150 | "\tInformation about its current state:\n",
151 | "State(normal_shape=(2, 3, 4), rtype='Init', mode_order=([0], [1], [2]))\n"
152 | ]
153 | }
154 | ],
155 | "source": [
156 | "data_array = np.arange(24).reshape(2, 3, 4)\n",
157 | "\n",
158 | "tensor = Tensor(data_array)\n",
159 | "\n",
160 | "show_meta_information(tensor)"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "## Meta information after applying data transformations\n",
168 | "\n",
169 | "Next, we will show changes in the meta information of the tensor when different transformations are applied to it. \n",
170 | "\n",
171 | "**Note:** at the moment, only one data transformation can be applied at the time. This will be generalised in a future releases of **hottbox** and will be outlined in the [CHANGELOG](https://github.com/hottbox/hottbox/blob/master/CHANGELOG.md)."
172 | ]
173 | },
174 | {
175 | "cell_type": "markdown",
176 | "metadata": {},
177 | "source": [
178 | "### Unfolding of the data"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 4,
184 | "metadata": {},
185 | "outputs": [
186 | {
187 | "name": "stdout",
188 | "output_type": "stream",
189 | "text": [
190 | "This tensor is of order 2 and consists of 24 elements.\n",
191 | "Sizes and names of its modes are (3, 8) and ['mode-1', 'mode-0_mode-2'] respectively.\n",
192 | "\n",
193 | "\tThe underlying data array is:\n",
194 | "[[ 0 1 2 3 12 13 14 15]\n",
195 | " [ 4 5 6 7 16 17 18 19]\n",
196 | " [ 8 9 10 11 20 21 22 23]]\n",
197 | "\n",
198 | "\tIs this tensor in normal state: False\n",
199 | "Current shape of the data array: (3, 8)\n",
200 | "Normal shape of the data array: (2, 3, 4)\n",
201 | "\n",
202 | "\tInformation about its modes:\n",
203 | "#0: Mode(name='mode-0', index=None)\n",
204 | "#1: Mode(name='mode-1', index=None)\n",
205 | "#2: Mode(name='mode-2', index=None)\n",
206 | "\n",
207 | "\tInformation about its current state:\n",
208 | "State(normal_shape=(2, 3, 4), rtype='T', mode_order=([1], [0, 2]))\n"
209 | ]
210 | }
211 | ],
212 | "source": [
213 | "tensor.unfold(mode=1)\n",
214 | "\n",
215 | "show_meta_information(tensor)"
216 | ]
217 | },
218 | {
219 | "cell_type": "markdown",
220 | "metadata": {},
221 | "source": [
222 | "### Folding of the data"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": 5,
228 | "metadata": {},
229 | "outputs": [
230 | {
231 | "name": "stdout",
232 | "output_type": "stream",
233 | "text": [
234 | "This tensor is of order 3 and consists of 24 elements.\n",
235 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n",
236 | "\n",
237 | "\tThe underlying data array is:\n",
238 | "[[[ 0 1 2 3]\n",
239 | " [ 4 5 6 7]\n",
240 | " [ 8 9 10 11]]\n",
241 | "\n",
242 | " [[12 13 14 15]\n",
243 | " [16 17 18 19]\n",
244 | " [20 21 22 23]]]\n",
245 | "\n",
246 | "\tIs this tensor in normal state: True\n",
247 | "Current shape of the data array: (2, 3, 4)\n",
248 | "Normal shape of the data array: (2, 3, 4)\n",
249 | "\n",
250 | "\tInformation about its modes:\n",
251 | "#0: Mode(name='mode-0', index=None)\n",
252 | "#1: Mode(name='mode-1', index=None)\n",
253 | "#2: Mode(name='mode-2', index=None)\n",
254 | "\n",
255 | "\tInformation about its current state:\n",
256 | "State(normal_shape=(2, 3, 4), rtype='Init', mode_order=([0], [1], [2]))\n"
257 | ]
258 | }
259 | ],
260 | "source": [
261 | "tensor.fold()\n",
262 | "\n",
263 | "show_meta_information(tensor)"
264 | ]
265 | },
266 | {
267 | "cell_type": "markdown",
268 | "metadata": {},
269 | "source": [
270 | "### Vectorisation of the data"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": 6,
276 | "metadata": {},
277 | "outputs": [
278 | {
279 | "name": "stdout",
280 | "output_type": "stream",
281 | "text": [
282 | "This tensor is of order 1 and consists of 24 elements.\n",
283 | "Sizes and names of its modes are (24,) and ['mode-0_mode-1_mode-2'] respectively.\n",
284 | "\n",
285 | "\tThe underlying data array is:\n",
286 | "[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]\n",
287 | "\n",
288 | "\tIs this tensor in normal state: False\n",
289 | "Current shape of the data array: (24,)\n",
290 | "Normal shape of the data array: (2, 3, 4)\n",
291 | "\n",
292 | "\tInformation about its modes:\n",
293 | "#0: Mode(name='mode-0', index=None)\n",
294 | "#1: Mode(name='mode-1', index=None)\n",
295 | "#2: Mode(name='mode-2', index=None)\n",
296 | "\n",
297 | "\tInformation about its current state:\n",
298 | "State(normal_shape=(2, 3, 4), rtype='T', mode_order=([0, 1, 2],))\n"
299 | ]
300 | }
301 | ],
302 | "source": [
303 | "tensor.vectorise()\n",
304 | "\n",
305 | "show_meta_information(tensor)"
306 | ]
307 | },
308 | {
309 | "cell_type": "markdown",
310 | "metadata": {},
311 | "source": [
312 | "As wee can see, the applied transformations rearrange values of the underlying data array. Also they change relations between mode names and modifies state of the tensor. However, the normal shape, information about original modes remains the same."
313 | ]
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "metadata": {},
318 | "source": [
319 | "## Different reshaping convensions\n",
320 | "\n",
321 | "In computing, row-major order and column-major order are methods for storing multidimensional arrays in linear storage such as random access memory. For example, for the array\n",
322 | "$$\n",
323 | "\\mathbf{A} = \n",
324 | "\\begin{bmatrix}\n",
325 | " a_{11} & a_{12} & a_{13}\\\\ \n",
326 | " a_{21} & a_{22} & a_{23} \n",
327 | "\\end{bmatrix}\n",
328 | "$$\n",
329 | "the two possible ways are:\n",
330 | "\n",
331 | "\n",
332 | "\n",
333 | "Therefore, there are several conventions for reshaping (unfolding/folding/vectorising) data.\n",
334 | "Both of them are available in the **hottbox**. They produce arrays of the same shape, but with values being permuted. The state of the tensor memorises which convention has been applied and will use it for reverting the applied transformation."
335 | ]
336 | },
337 | {
338 | "cell_type": "markdown",
339 | "metadata": {},
340 | "source": [
341 | "### Row and column major unfolding"
342 | ]
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": 7,
347 | "metadata": {},
348 | "outputs": [
349 | {
350 | "name": "stdout",
351 | "output_type": "stream",
352 | "text": [
353 | "\tRow-major unfolding\n",
354 | "This tensor is of order 2 and consists of 24 elements.\n",
355 | "Sizes and names of its modes are (3, 8) and ['mode-1', 'mode-0_mode-2'] respectively.\n",
356 | "\n",
357 | "\tThe underlying data array is:\n",
358 | "[[ 0 1 2 3 12 13 14 15]\n",
359 | " [ 4 5 6 7 16 17 18 19]\n",
360 | " [ 8 9 10 11 20 21 22 23]]\n",
361 | "\n",
362 | "\tInformation about its current state:\n",
363 | "State(normal_shape=(2, 3, 4), rtype='T', mode_order=([1], [0, 2]))\n",
364 | "\n",
365 | "===================================================================================\n",
366 | "\n",
367 | "\tColumn-major unfolding\n",
368 | "This tensor is of order 2 and consists of 24 elements.\n",
369 | "Sizes and names of its modes are (3, 8) and ['mode-1', 'mode-0_mode-2'] respectively.\n",
370 | "\n",
371 | "\tThe underlying data array is:\n",
372 | "[[ 0 12 1 13 2 14 3 15]\n",
373 | " [ 4 16 5 17 6 18 7 19]\n",
374 | " [ 8 20 9 21 10 22 11 23]]\n",
375 | "\n",
376 | "\tInformation about its current state:\n",
377 | "State(normal_shape=(2, 3, 4), rtype='K', mode_order=([1], [0, 2]))\n"
378 | ]
379 | }
380 | ],
381 | "source": [
382 | "data_array = np.arange(24).reshape(2, 3, 4)\n",
383 | "\n",
384 | "tensor_1 = Tensor(data_array)\n",
385 | "tensor_2 = Tensor(data_array)\n",
386 | "\n",
387 | "tensor_1.unfold(mode=1, rtype=\"T\")\n",
388 | "tensor_2.unfold(mode=1, rtype=\"K\")\n",
389 | "\n",
390 | "print(\"\\tRow-major unfolding\")\n",
391 | "show_meta_information(tensor_1, shapes=False, modes=False)\n",
392 | "\n",
393 | "print_sep_line()\n",
394 | "\n",
395 | "print(\"\\tColumn-major unfolding\")\n",
396 | "show_meta_information(tensor_2, shapes=False, modes=False)"
397 | ]
398 | },
399 | {
400 | "cell_type": "markdown",
401 | "metadata": {},
402 | "source": [
403 | "### Row and column major folding"
404 | ]
405 | },
406 | {
407 | "cell_type": "code",
408 | "execution_count": 8,
409 | "metadata": {},
410 | "outputs": [
411 | {
412 | "name": "stdout",
413 | "output_type": "stream",
414 | "text": [
415 | "\tReverting Row-major unfolding\n",
416 | "This tensor is of order 3 and consists of 24 elements.\n",
417 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n",
418 | "\n",
419 | "\tThe underlying data array is:\n",
420 | "[[[ 0 1 2 3]\n",
421 | " [ 4 5 6 7]\n",
422 | " [ 8 9 10 11]]\n",
423 | "\n",
424 | " [[12 13 14 15]\n",
425 | " [16 17 18 19]\n",
426 | " [20 21 22 23]]]\n",
427 | "\n",
428 | "\tInformation about its current state:\n",
429 | "State(normal_shape=(2, 3, 4), rtype='Init', mode_order=([0], [1], [2]))\n",
430 | "\n",
431 | "===================================================================================\n",
432 | "\n",
433 | "\tReverting Column-major unfolding\n",
434 | "This tensor is of order 3 and consists of 24 elements.\n",
435 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n",
436 | "\n",
437 | "\tThe underlying data array is:\n",
438 | "[[[ 0 1 2 3]\n",
439 | " [ 4 5 6 7]\n",
440 | " [ 8 9 10 11]]\n",
441 | "\n",
442 | " [[12 13 14 15]\n",
443 | " [16 17 18 19]\n",
444 | " [20 21 22 23]]]\n",
445 | "\n",
446 | "\tInformation about its current state:\n",
447 | "State(normal_shape=(2, 3, 4), rtype='Init', mode_order=([0], [1], [2]))\n"
448 | ]
449 | }
450 | ],
451 | "source": [
452 | "tensor_1.fold()\n",
453 | "tensor_2.fold()\n",
454 | "print(\"\\tReverting Row-major unfolding\")\n",
455 | "show_meta_information(tensor_1, shapes=False, modes=False)\n",
456 | "\n",
457 | "print_sep_line()\n",
458 | "\n",
459 | "print(\"\\tReverting Column-major unfolding\")\n",
460 | "show_meta_information(tensor_2, shapes=False, modes=False)"
461 | ]
462 | },
463 | {
464 | "cell_type": "markdown",
465 | "metadata": {},
466 | "source": [
467 | "As we can see, the different approaches to reshaping uderlying data affect only the data array itself, whereas other properties remain the same. Similarly to the ufolding along different mode, the **state** of the tensor keeps track of this transformation as well. \n",
468 | "\n",
469 | "**Note:** the same type of unfolding and folding should be applied to the data array, in order not to mix up the values that describe different properties of the tensor. But don't worry about it, since this is handled automatically under the hood."
470 | ]
471 | },
472 | {
473 | "cell_type": "markdown",
474 | "metadata": {},
475 | "source": [
476 | "## Creating Tensor with custom meta information\n",
477 | "\n",
478 | "The **state** and list of **mode** are created at the initialisation of the **Tensor** object: \n",
479 | "1. **State** of the tensor is created. By default, this step assumes that data is passed in normal shape (was not folded or unfolded before).\n",
480 | "2. List of **modes** is created based on **state**. By default, it extracts from **state** the number of modes to be created and assigns default names to each of them.\n",
481 | "\n",
482 | "The **hottbox** provides flexibility for this procedure. The **Tensor** can be created with cutom names for the modes and in state that is not inferred (defined) from the provided data. \n",
483 | "\n",
484 | "If both customisation are passed to the **Tensor** constructor, the the list of mode names is dependent on the provided state. If only mode names are provided then its length should be consistent witht the number of dimensions of the data array.\n",
485 | "\n",
486 | "Defining a custom state is little bit more trickier, but there is nothing to be scared of. Because **state** and **modes** are crucial parts of **Tensor** ecosystem. Even though there is quit a bit of input validation involded, which will point you to the right direction in case something was not specified correctly, custom state should be specified with caution.\n",
487 | "\n",
488 | "**Note:** The usefullness of the custom mode names is not fully exploited in **hottbox** at the moment, but we work on that."
489 | ]
490 | },
491 | {
492 | "cell_type": "code",
493 | "execution_count": 9,
494 | "metadata": {},
495 | "outputs": [],
496 | "source": [
497 | "I, J, K = 2, 3, 4\n",
498 | "\n",
499 | "# Provied with 3D array\n",
500 | "data_3d = np.arange(I*J*K).reshape(I, J, K)\n",
501 | "\n",
502 | "# Provied with 3D array that had been unfoled\n",
503 | "data_2d = np.arange(I*J*K).reshape(I, (J*K))"
504 | ]
505 | },
506 | {
507 | "cell_type": "markdown",
508 | "metadata": {},
509 | "source": [
510 | "### Custom mode names"
511 | ]
512 | },
513 | {
514 | "cell_type": "code",
515 | "execution_count": 10,
516 | "metadata": {},
517 | "outputs": [
518 | {
519 | "name": "stdout",
520 | "output_type": "stream",
521 | "text": [
522 | "This tensor is of order 3 and consists of 24 elements.\n",
523 | "Sizes and names of its modes are (2, 3, 4) and ['Frequency', 'Time', 'Subject'] respectively.\n",
524 | "\n",
525 | "\tInformation about its modes:\n",
526 | "#0: Mode(name='Frequency', index=None)\n",
527 | "#1: Mode(name='Time', index=None)\n",
528 | "#2: Mode(name='Subject', index=None)\n"
529 | ]
530 | }
531 | ],
532 | "source": [
533 | "tensor_1 = Tensor(data_3d, mode_names=[\"Frequency\", \"Time\", \"Subject\"])\n",
534 | "\n",
535 | "show_meta_information(tensor_1, data=False, shapes=False, state=False)"
536 | ]
537 | },
538 | {
539 | "cell_type": "markdown",
540 | "metadata": {},
541 | "source": [
542 | "### Custom state: different mode order"
543 | ]
544 | },
545 | {
546 | "cell_type": "code",
547 | "execution_count": 11,
548 | "metadata": {},
549 | "outputs": [
550 | {
551 | "name": "stdout",
552 | "output_type": "stream",
553 | "text": [
554 | "This tensor is of order 2 and consists of 24 elements.\n",
555 | "Sizes and names of its modes are (2, 12) and ['mode-0', 'mode-1_mode-2'] respectively.\n",
556 | "\n",
557 | "\tThe underlying data array is:\n",
558 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n",
559 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n",
560 | "\n",
561 | "===================================================================================\n",
562 | "\n",
563 | "This tensor is of order 2 and consists of 24 elements.\n",
564 | "Sizes and names of its modes are (2, 12) and ['mode-1', 'mode-0_mode-2'] respectively.\n",
565 | "\n",
566 | "\tThe underlying data array is:\n",
567 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n",
568 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n"
569 | ]
570 | }
571 | ],
572 | "source": [
573 | "custom_state_1 = dict(mode_order=([0], [1, 2]),\n",
574 | " normal_shape=(2, 3, 4),\n",
575 | " rtype=\"T\"\n",
576 | " )\n",
577 | "custom_state_2 = dict(mode_order=([1], [0, 2]),\n",
578 | " normal_shape=(2, 3, 4),\n",
579 | " rtype=\"T\"\n",
580 | " )\n",
581 | "\n",
582 | "tensor_1 = Tensor(data_2d, custom_state=custom_state_1)\n",
583 | "tensor_2 = Tensor(data_2d, custom_state=custom_state_2)\n",
584 | "\n",
585 | "show_meta_information(tensor_1, modes=False, shapes=False, state=False)\n",
586 | "\n",
587 | "print_sep_line()\n",
588 | "\n",
589 | "show_meta_information(tensor_2, modes=False, shapes=False, state=False)"
590 | ]
591 | },
592 | {
593 | "cell_type": "code",
594 | "execution_count": 12,
595 | "metadata": {},
596 | "outputs": [
597 | {
598 | "name": "stdout",
599 | "output_type": "stream",
600 | "text": [
601 | "This tensor is of order 3 and consists of 24 elements.\n",
602 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n",
603 | "\n",
604 | "\tThe underlying data array is:\n",
605 | "[[[ 0 1 2 3]\n",
606 | " [ 4 5 6 7]\n",
607 | " [ 8 9 10 11]]\n",
608 | "\n",
609 | " [[12 13 14 15]\n",
610 | " [16 17 18 19]\n",
611 | " [20 21 22 23]]]\n",
612 | "\n",
613 | "===================================================================================\n",
614 | "\n",
615 | "This tensor is of order 3 and consists of 24 elements.\n",
616 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n",
617 | "\n",
618 | "\tThe underlying data array is:\n",
619 | "[[[ 0 1 2 3]\n",
620 | " [ 8 9 10 11]\n",
621 | " [16 17 18 19]]\n",
622 | "\n",
623 | " [[ 4 5 6 7]\n",
624 | " [12 13 14 15]\n",
625 | " [20 21 22 23]]]\n"
626 | ]
627 | }
628 | ],
629 | "source": [
630 | "tensor_1.fold()\n",
631 | "tensor_2.fold()\n",
632 | "\n",
633 | "show_meta_information(tensor_1, modes=False, shapes=False, state=False)\n",
634 | "\n",
635 | "print_sep_line()\n",
636 | "\n",
637 | "show_meta_information(tensor_2, modes=False, shapes=False, state=False)"
638 | ]
639 | },
640 | {
641 | "cell_type": "markdown",
642 | "metadata": {},
643 | "source": [
644 | "**Note:** this example is for illustration purposes only, since it does not follow true unfolding/folding expressions that is:\n",
645 | "\n",
646 | "```python\n",
647 | "unfolded_along = mode_order[0][0]\n",
648 | "data_2d.shape[0] != normal_shape[unfolded_along]\n",
649 | "```"
650 | ]
651 | },
652 | {
653 | "cell_type": "markdown",
654 | "metadata": {},
655 | "source": [
656 | "### Custom state: different reshaping type"
657 | ]
658 | },
659 | {
660 | "cell_type": "code",
661 | "execution_count": 13,
662 | "metadata": {},
663 | "outputs": [
664 | {
665 | "name": "stdout",
666 | "output_type": "stream",
667 | "text": [
668 | "This tensor is of order 2 and consists of 24 elements.\n",
669 | "Sizes and names of its modes are (2, 12) and ['mode-0', 'mode-1_mode-2'] respectively.\n",
670 | "\n",
671 | "\tThe underlying data array is:\n",
672 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n",
673 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n",
674 | "\n",
675 | "===================================================================================\n",
676 | "\n",
677 | "This tensor is of order 2 and consists of 24 elements.\n",
678 | "Sizes and names of its modes are (2, 12) and ['mode-0', 'mode-1_mode-2'] respectively.\n",
679 | "\n",
680 | "\tThe underlying data array is:\n",
681 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n",
682 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n"
683 | ]
684 | }
685 | ],
686 | "source": [
687 | "custom_state_1 = dict(mode_order=([0], [1, 2]),\n",
688 | " normal_shape=(2, 3, 4),\n",
689 | " rtype=\"T\"\n",
690 | " )\n",
691 | "custom_state_2 = dict(mode_order=([0], [1, 2]),\n",
692 | " normal_shape=(2, 3, 4),\n",
693 | " rtype=\"K\"\n",
694 | " )\n",
695 | "\n",
696 | "tensor_1 = Tensor(data_2d, custom_state=custom_state_1)\n",
697 | "tensor_2 = Tensor(data_2d, custom_state=custom_state_2)\n",
698 | "\n",
699 | "show_meta_information(tensor_1, modes=False, shapes=False, state=False)\n",
700 | "\n",
701 | "print_sep_line()\n",
702 | "\n",
703 | "show_meta_information(tensor_2, modes=False, shapes=False, state=False)"
704 | ]
705 | },
706 | {
707 | "cell_type": "code",
708 | "execution_count": 14,
709 | "metadata": {},
710 | "outputs": [
711 | {
712 | "name": "stdout",
713 | "output_type": "stream",
714 | "text": [
715 | "This tensor is of order 3 and consists of 24 elements.\n",
716 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n",
717 | "\n",
718 | "\tThe underlying data array is:\n",
719 | "[[[ 0 1 2 3]\n",
720 | " [ 4 5 6 7]\n",
721 | " [ 8 9 10 11]]\n",
722 | "\n",
723 | " [[12 13 14 15]\n",
724 | " [16 17 18 19]\n",
725 | " [20 21 22 23]]]\n",
726 | "\n",
727 | "===================================================================================\n",
728 | "\n",
729 | "This tensor is of order 3 and consists of 24 elements.\n",
730 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n",
731 | "\n",
732 | "\tThe underlying data array is:\n",
733 | "[[[ 0 3 6 9]\n",
734 | " [ 1 4 7 10]\n",
735 | " [ 2 5 8 11]]\n",
736 | "\n",
737 | " [[12 15 18 21]\n",
738 | " [13 16 19 22]\n",
739 | " [14 17 20 23]]]\n"
740 | ]
741 | }
742 | ],
743 | "source": [
744 | "tensor_1.fold()\n",
745 | "tensor_2.fold()\n",
746 | "\n",
747 | "show_meta_information(tensor_1, modes=False, shapes=False, state=False)\n",
748 | "\n",
749 | "print_sep_line()\n",
750 | "\n",
751 | "show_meta_information(tensor_2, modes=False, shapes=False, state=False)"
752 | ]
753 | },
754 | {
755 | "cell_type": "markdown",
756 | "metadata": {},
757 | "source": [
758 | "### Custom state: different normal shape"
759 | ]
760 | },
761 | {
762 | "cell_type": "code",
763 | "execution_count": 15,
764 | "metadata": {},
765 | "outputs": [
766 | {
767 | "name": "stdout",
768 | "output_type": "stream",
769 | "text": [
770 | "This tensor is of order 2 and consists of 24 elements.\n",
771 | "Sizes and names of its modes are (2, 12) and ['mode-0', 'mode-1_mode-2'] respectively.\n",
772 | "\n",
773 | "\tThe underlying data array is:\n",
774 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n",
775 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n",
776 | "\n",
777 | "===================================================================================\n",
778 | "\n",
779 | "This tensor is of order 2 and consists of 24 elements.\n",
780 | "Sizes and names of its modes are (2, 12) and ['mode-0', 'mode-1_mode-2'] respectively.\n",
781 | "\n",
782 | "\tThe underlying data array is:\n",
783 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n",
784 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n"
785 | ]
786 | }
787 | ],
788 | "source": [
789 | "custom_state_1 = dict(mode_order=([0], [1, 2]),\n",
790 | " normal_shape=(2, 3, 4),\n",
791 | " rtype=\"T\"\n",
792 | " )\n",
793 | "custom_state_2 = dict(mode_order=([0], [1, 2]),\n",
794 | " normal_shape=(2, 4, 3),\n",
795 | " rtype=\"T\"\n",
796 | " )\n",
797 | "\n",
798 | "tensor_1 = Tensor(data_2d, custom_state=custom_state_1)\n",
799 | "tensor_2 = Tensor(data_2d, custom_state=custom_state_2)\n",
800 | "\n",
801 | "show_meta_information(tensor_1, modes=False, shapes=False, state=False)\n",
802 | "\n",
803 | "print_sep_line()\n",
804 | "\n",
805 | "show_meta_information(tensor_2, modes=False, shapes=False, state=False)"
806 | ]
807 | },
808 | {
809 | "cell_type": "code",
810 | "execution_count": 16,
811 | "metadata": {},
812 | "outputs": [
813 | {
814 | "name": "stdout",
815 | "output_type": "stream",
816 | "text": [
817 | "This tensor is of order 3 and consists of 24 elements.\n",
818 | "Sizes and names of its modes are (2, 3, 4) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n",
819 | "\n",
820 | "\tThe underlying data array is:\n",
821 | "[[[ 0 1 2 3]\n",
822 | " [ 4 5 6 7]\n",
823 | " [ 8 9 10 11]]\n",
824 | "\n",
825 | " [[12 13 14 15]\n",
826 | " [16 17 18 19]\n",
827 | " [20 21 22 23]]]\n",
828 | "\n",
829 | "===================================================================================\n",
830 | "\n",
831 | "This tensor is of order 3 and consists of 24 elements.\n",
832 | "Sizes and names of its modes are (2, 4, 3) and ['mode-0', 'mode-1', 'mode-2'] respectively.\n",
833 | "\n",
834 | "\tThe underlying data array is:\n",
835 | "[[[ 0 1 2]\n",
836 | " [ 3 4 5]\n",
837 | " [ 6 7 8]\n",
838 | " [ 9 10 11]]\n",
839 | "\n",
840 | " [[12 13 14]\n",
841 | " [15 16 17]\n",
842 | " [18 19 20]\n",
843 | " [21 22 23]]]\n"
844 | ]
845 | }
846 | ],
847 | "source": [
848 | "tensor_1.fold()\n",
849 | "tensor_2.fold()\n",
850 | "\n",
851 | "show_meta_information(tensor_1, modes=False, shapes=False, state=False)\n",
852 | "\n",
853 | "print_sep_line()\n",
854 | "\n",
855 | "show_meta_information(tensor_2, modes=False, shapes=False, state=False)"
856 | ]
857 | },
858 | {
859 | "cell_type": "markdown",
860 | "metadata": {},
861 | "source": [
862 | "### Custom state and mode names"
863 | ]
864 | },
865 | {
866 | "cell_type": "code",
867 | "execution_count": 17,
868 | "metadata": {},
869 | "outputs": [
870 | {
871 | "name": "stdout",
872 | "output_type": "stream",
873 | "text": [
874 | "This tensor is of order 2 and consists of 24 elements.\n",
875 | "Sizes and names of its modes are (3, 8) and ['Time', 'Frequency_Subject'] respectively.\n",
876 | "\n",
877 | "\tThe underlying data array is:\n",
878 | "[[ 0 1 2 3 4 5 6 7]\n",
879 | " [ 8 9 10 11 12 13 14 15]\n",
880 | " [16 17 18 19 20 21 22 23]]\n",
881 | "\n",
882 | "\tInformation about its modes:\n",
883 | "#0: Mode(name='Frequency', index=None)\n",
884 | "#1: Mode(name='Time', index=None)\n",
885 | "#2: Mode(name='Subject', index=None)\n",
886 | "\n",
887 | "\tInformation about its current state:\n",
888 | "State(normal_shape=(3, 2, 4), rtype='T', mode_order=([1], [0, 2]))\n",
889 | "\n",
890 | "===================================================================================\n",
891 | "\n",
892 | "This tensor is of order 3 and consists of 24 elements.\n",
893 | "Sizes and names of its modes are (3, 2, 4) and ['Frequency', 'Time', 'Subject'] respectively.\n",
894 | "\n",
895 | "\tThe underlying data array is:\n",
896 | "[[[ 0 1 2 3]\n",
897 | " [12 13 14 15]]\n",
898 | "\n",
899 | " [[ 4 5 6 7]\n",
900 | " [16 17 18 19]]\n",
901 | "\n",
902 | " [[ 8 9 10 11]\n",
903 | " [20 21 22 23]]]\n",
904 | "\n",
905 | "\tInformation about its modes:\n",
906 | "#0: Mode(name='Frequency', index=None)\n",
907 | "#1: Mode(name='Time', index=None)\n",
908 | "#2: Mode(name='Subject', index=None)\n",
909 | "\n",
910 | "\tInformation about its current state:\n",
911 | "State(normal_shape=(3, 2, 4), rtype='Init', mode_order=([0], [1], [2]))\n"
912 | ]
913 | }
914 | ],
915 | "source": [
916 | "I, J, K = 2, 3, 4\n",
917 | "data_2d = np.arange(I*J*K).reshape(J, (I*K))\n",
918 | "\n",
919 | "custom_state = dict(mode_order=([1], [0, 2]),\n",
920 | " normal_shape=(3, 2, 4),\n",
921 | " rtype=\"T\"\n",
922 | " )\n",
923 | "tensor_1 = Tensor(data_2d, custom_state, mode_names=[\"Frequency\", \"Time\", \"Subject\"])\n",
924 | "show_meta_information(tensor_1, shapes=False)\n",
925 | "\n",
926 | "print_sep_line()\n",
927 | "\n",
928 | "tensor_1.fold()\n",
929 | "show_meta_information(tensor_1, shapes=False)"
930 | ]
931 | }
932 | ],
933 | "metadata": {
934 | "kernelspec": {
935 | "display_name": "hottbox-tutorials",
936 | "language": "python",
937 | "name": "hottbox-tutorials"
938 | },
939 | "language_info": {
940 | "codemirror_mode": {
941 | "name": "ipython",
942 | "version": 3
943 | },
944 | "file_extension": ".py",
945 | "mimetype": "text/x-python",
946 | "name": "python",
947 | "nbconvert_exporter": "python",
948 | "pygments_lexer": "ipython3",
949 | "version": "3.6.6"
950 | }
951 | },
952 | "nbformat": 4,
953 | "nbformat_minor": 2
954 | }
955 |
--------------------------------------------------------------------------------
/5_Tensor_meta_information_and_pandas_integration.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# State and Mode of the Tensor: Main part of meta information\n",
8 | "\n",
9 | "**Note:** this tutorial assumes that you are familiar with the notion of N-dimensional arrays and basic definitions. The related material can be found in out previous tutorials: [tutorial_1](https://github.com/hottbox/hottbox-tutorials/blob/master/1_N-dimensional_arrays_and_Tensor_class.ipynb) and [tutorial_4](https://github.com/hottbox/hottbox-tutorials/blob/master/4_Ecosystem_of_Tensor_class.ipynb).\n",
10 | "\n",
11 | "\n",
12 | "**Requirements:** ``hottbox==0.1.3``\n",
13 | "\n",
14 | "**Authors:** \n",
15 | "Ilya Kisil (ilyakisil@gmail.com); "
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "Meta information about the tensor is represented by the **State** and **Mode** classes.\n",
23 | "\n",
24 | "1. **State** keeps track of transformation applied to the underlying data array and can be seen as a link between current form of data array and current interpretatin of its original modes. \n",
25 | "2. **Mode** brings interpretability of the values for the underlying data array.\n",
26 | "\n",
27 | "Without the data array, both of them are standalone classes. But within an ecosystem of **Tensor** class they interact with each other and the data array itself.\n",
28 | "\n",
29 | "Any tensor that created using **hottbox** is assigined a default state which depends on data array. Each mode of the tensor will always have the accosiated names."
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 1,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "import numpy as np\n",
39 | "import pandas as pd\n",
40 | "from hottbox.core import Tensor\n",
41 | "from hottbox.pdtools import tensor_to_pd, pd_to_tensor\n",
42 | "\n",
43 | "\n",
44 | "def print_tensor_state(tensor, data=True, modes=True, transforms=True):\n",
45 | " \"\"\" Quick util for showing relevant information for this tutorial\n",
46 | " \n",
47 | " Parameters\n",
48 | " ----------\n",
49 | " tensor : Tensor \n",
50 | " data : bool\n",
51 | " If True, show data array \n",
52 | " modes : bool\n",
53 | " If True, show mode information\n",
54 | " \"\"\"\n",
55 | " state = tensor._state\n",
56 | " \n",
57 | " if data:\n",
58 | " print(\"\\tUnderlying data array:\")\n",
59 | " print(tensor.data) \n",
60 | " \n",
61 | " if modes:\n",
62 | " print(\"\\n\\tInformation about its modes:\")\n",
63 | " for i, tensor_mode in enumerate(tensor.modes):\n",
64 | " print(\"#{}: {}\".format(i, tensor_mode)) \n",
65 | " \n",
66 | " print(\"\\nProperties described by modes: {}\".format(tensor.mode_names))\n",
67 | " print(\"Associated normal shape: {}\".format(state.normal_shape)) \n",
68 | " \n",
69 | " if transforms:\n",
70 | " print(\"\\n\\t\\tApplied transformations:\")\n",
71 | " for i, transformation in enumerate(state.transformations):\n",
72 | " print(\"\\tTransformation #{}:\".format(i))\n",
73 | " print(\"Reshaping type: {}\".format(transformation[0]))\n",
74 | " print(\"New mode order: {}\\n\".format(transformation[1]))\n",
75 | "\n",
76 | "\n",
77 | "def print_sep_line():\n",
78 | " print(\"\\n===========================\"\n",
79 | " \"=============================\"\n",
80 | " \"===========================\\n\")"
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {},
86 | "source": [
87 | "## Tensor state: Default VS Custom\n",
88 | "\n",
89 | "The same data values can be characterised by different states. By specifying custom state we implicitly apply transformation to the state of the tensor during its creation.\n",
90 | "Each transformation is represented by the used reshaping type and the resulting order of the modes. List of **modes** of the tensor is created at the tensor initialisation. It depends on the normal shape if custom state is provided, otherwise it dependes on the shape of the data array."
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 2,
96 | "metadata": {},
97 | "outputs": [
98 | {
99 | "name": "stdout",
100 | "output_type": "stream",
101 | "text": [
102 | "\t\t2-D array as a tensor\n",
103 | "\tUnderlying data array:\n",
104 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n",
105 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n",
106 | "\n",
107 | "\tInformation about its modes:\n",
108 | "#0: Mode(name='mode-0', index=None)\n",
109 | "#1: Mode(name='mode-1', index=None)\n",
110 | "\n",
111 | "Properties described by modes: ['mode-0', 'mode-1']\n",
112 | "Associated normal shape: (2, 12)\n",
113 | "\n",
114 | "\t\tApplied transformations:\n",
115 | "\tTransformation #0:\n",
116 | "Reshaping type: Init\n",
117 | "New mode order: ([0], [1])\n",
118 | "\n",
119 | "\n",
120 | "===================================================================================\n",
121 | "\n",
122 | "\t\t3-D array as an unfolded tensor\n",
123 | "\tUnderlying data array:\n",
124 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n",
125 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n",
126 | "\n",
127 | "\tInformation about its modes:\n",
128 | "#0: Mode(name='mode-0', index=None)\n",
129 | "#1: Mode(name='mode-1', index=None)\n",
130 | "#2: Mode(name='mode-2', index=None)\n",
131 | "\n",
132 | "Properties described by modes: ['mode-0', 'mode-1_mode-2']\n",
133 | "Associated normal shape: (2, 3, 4)\n",
134 | "\n",
135 | "\t\tApplied transformations:\n",
136 | "\tTransformation #0:\n",
137 | "Reshaping type: Init\n",
138 | "New mode order: ([0], [1], [2])\n",
139 | "\n",
140 | "\tTransformation #1:\n",
141 | "Reshaping type: T\n",
142 | "New mode order: ([0], [1, 2])\n",
143 | "\n",
144 | "\n",
145 | "===================================================================================\n",
146 | "\n",
147 | "\t\t4-D array as an unfolded tensor\n",
148 | "\tUnderlying data array:\n",
149 | "[[ 0 1 2 3 4 5 6 7 8 9 10 11]\n",
150 | " [12 13 14 15 16 17 18 19 20 21 22 23]]\n",
151 | "\n",
152 | "\tInformation about its modes:\n",
153 | "#0: Mode(name='mode-0', index=None)\n",
154 | "#1: Mode(name='mode-1', index=None)\n",
155 | "#2: Mode(name='mode-2', index=None)\n",
156 | "#3: Mode(name='mode-3', index=None)\n",
157 | "\n",
158 | "Properties described by modes: ['mode-0', 'mode-1_mode-2_mode-3']\n",
159 | "Associated normal shape: (2, 3, 2, 2)\n",
160 | "\n",
161 | "\t\tApplied transformations:\n",
162 | "\tTransformation #0:\n",
163 | "Reshaping type: Init\n",
164 | "New mode order: ([0], [1], [2], [3])\n",
165 | "\n",
166 | "\tTransformation #1:\n",
167 | "Reshaping type: T\n",
168 | "New mode order: ([0], [1, 2, 3])\n",
169 | "\n"
170 | ]
171 | }
172 | ],
173 | "source": [
174 | "I, J, K, L = 2, 3, 2, 2\n",
175 | "\n",
176 | "data = np.arange(I*J*K*L).reshape(I, (J*K*L))\n",
177 | "\n",
178 | "\n",
179 | "custom_state_1 = dict(mode_order=([0], [1, 2]),\n",
180 | " normal_shape=(I, J, K*L),\n",
181 | " rtype=\"T\"\n",
182 | " )\n",
183 | "custom_state_2 = dict(mode_order=([0], [1, 2, 3]),\n",
184 | " normal_shape=(I, J, K, L),\n",
185 | " rtype=\"T\"\n",
186 | " )\n",
187 | "\n",
188 | "tensor = Tensor(data)\n",
189 | "tensor_1 = Tensor(data, custom_state_1)\n",
190 | "tensor_2 = Tensor(data, custom_state_2)\n",
191 | "\n",
192 | "print(\"\\t\\t2-D array as a tensor\")\n",
193 | "print_tensor_state(tensor)\n",
194 | "\n",
195 | "print_sep_line()\n",
196 | "\n",
197 | "print(\"\\t\\t3-D array as an unfolded tensor\")\n",
198 | "print_tensor_state(tensor_1)\n",
199 | "\n",
200 | "print_sep_line()\n",
201 | "\n",
202 | "print(\"\\t\\t4-D array as an unfolded tensor\")\n",
203 | "print_tensor_state(tensor_2)"
204 | ]
205 | },
206 | {
207 | "cell_type": "markdown",
208 | "metadata": {},
209 | "source": [
210 | "Here we can see, that the tensors with same data values are actually in different states and have different number of modes. These modes have default names by can be changed during object creation or by calling **set_mode_names()** - the designated method of **Tensor** class to changes their names.\n",
211 | "\n",
212 | "Next, we can bring tensor (for which we specified **custom state**) to the normal form by calling **fold()** method."
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": 3,
218 | "metadata": {},
219 | "outputs": [
220 | {
221 | "name": "stdout",
222 | "output_type": "stream",
223 | "text": [
224 | "\tUnderlying data array:\n",
225 | "[[[ 0 1 2 3]\n",
226 | " [ 4 5 6 7]\n",
227 | " [ 8 9 10 11]]\n",
228 | "\n",
229 | " [[12 13 14 15]\n",
230 | " [16 17 18 19]\n",
231 | " [20 21 22 23]]]\n",
232 | "\n",
233 | "\tInformation about its modes:\n",
234 | "#0: Mode(name='mode-0', index=None)\n",
235 | "#1: Mode(name='mode-1', index=None)\n",
236 | "#2: Mode(name='mode-2', index=None)\n",
237 | "\n",
238 | "Properties described by modes: ['mode-0', 'mode-1', 'mode-2']\n",
239 | "Associated normal shape: (2, 3, 4)\n",
240 | "\n",
241 | "\t\tApplied transformations:\n",
242 | "\tTransformation #0:\n",
243 | "Reshaping type: Init\n",
244 | "New mode order: ([0], [1], [2])\n",
245 | "\n",
246 | "\n",
247 | "===================================================================================\n",
248 | "\n",
249 | "\tUnderlying data array:\n",
250 | "[[[[ 0 1]\n",
251 | " [ 2 3]]\n",
252 | "\n",
253 | " [[ 4 5]\n",
254 | " [ 6 7]]\n",
255 | "\n",
256 | " [[ 8 9]\n",
257 | " [10 11]]]\n",
258 | "\n",
259 | "\n",
260 | " [[[12 13]\n",
261 | " [14 15]]\n",
262 | "\n",
263 | " [[16 17]\n",
264 | " [18 19]]\n",
265 | "\n",
266 | " [[20 21]\n",
267 | " [22 23]]]]\n",
268 | "\n",
269 | "\tInformation about its modes:\n",
270 | "#0: Mode(name='mode-0', index=None)\n",
271 | "#1: Mode(name='mode-1', index=None)\n",
272 | "#2: Mode(name='mode-2', index=None)\n",
273 | "#3: Mode(name='mode-3', index=None)\n",
274 | "\n",
275 | "Properties described by modes: ['mode-0', 'mode-1', 'mode-2', 'mode-3']\n",
276 | "Associated normal shape: (2, 3, 2, 2)\n",
277 | "\n",
278 | "\t\tApplied transformations:\n",
279 | "\tTransformation #0:\n",
280 | "Reshaping type: Init\n",
281 | "New mode order: ([0], [1], [2], [3])\n",
282 | "\n"
283 | ]
284 | }
285 | ],
286 | "source": [
287 | "tensor_1.fold()\n",
288 | "tensor_2.fold()\n",
289 | "\n",
290 | "print_tensor_state(tensor_1)\n",
291 | "\n",
292 | "print_sep_line()\n",
293 | "\n",
294 | "print_tensor_state(tensor_2)"
295 | ]
296 | },
297 | {
298 | "cell_type": "markdown",
299 | "metadata": {},
300 | "source": [
301 | "**Note:** at the moment, only one transformation can be applied to a tensor. This will be generalised in the future. "
302 | ]
303 | },
304 | {
305 | "cell_type": "markdown",
306 | "metadata": {},
307 | "source": [
308 | "## Tensor modes: integration with pandas library\n",
309 | "\n",
310 | "**Hottbox** is equipped with tools to convert multi-index pandas dataframe to tensors and vice versa. You can keep all meta information, only mode names or drop all of it."
311 | ]
312 | },
313 | {
314 | "cell_type": "markdown",
315 | "metadata": {},
316 | "source": [
317 | "### Multi-index dataframe to Tensor"
318 | ]
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": 4,
323 | "metadata": {},
324 | "outputs": [
325 | {
326 | "data": {
327 | "text/html": [
328 | "
\n",
329 | "\n",
342 | "
\n",
343 | " \n",
344 | " \n",
345 | " | \n",
346 | " | \n",
347 | " | \n",
348 | " Population | \n",
349 | "
\n",
350 | " \n",
351 | " Year | \n",
352 | " Month | \n",
353 | " Day | \n",
354 | " | \n",
355 | "
\n",
356 | " \n",
357 | " \n",
358 | " \n",
359 | " 2005 | \n",
360 | " Jan | \n",
361 | " Mon | \n",
362 | " 0 | \n",
363 | "
\n",
364 | " \n",
365 | " Wed | \n",
366 | " 1 | \n",
367 | "
\n",
368 | " \n",
369 | " Feb | \n",
370 | " Mon | \n",
371 | " 2 | \n",
372 | "
\n",
373 | " \n",
374 | " Wed | \n",
375 | " 3 | \n",
376 | "
\n",
377 | " \n",
378 | " 2010 | \n",
379 | " Jan | \n",
380 | " Mon | \n",
381 | " 4 | \n",
382 | "
\n",
383 | " \n",
384 | " Wed | \n",
385 | " 5 | \n",
386 | "
\n",
387 | " \n",
388 | " Feb | \n",
389 | " Mon | \n",
390 | " 6 | \n",
391 | "
\n",
392 | " \n",
393 | " Wed | \n",
394 | " 7 | \n",
395 | "
\n",
396 | " \n",
397 | "
\n",
398 | "
"
399 | ],
400 | "text/plain": [
401 | " Population\n",
402 | "Year Month Day \n",
403 | "2005 Jan Mon 0\n",
404 | " Wed 1\n",
405 | " Feb Mon 2\n",
406 | " Wed 3\n",
407 | "2010 Jan Mon 4\n",
408 | " Wed 5\n",
409 | " Feb Mon 6\n",
410 | " Wed 7"
411 | ]
412 | },
413 | "execution_count": 4,
414 | "metadata": {},
415 | "output_type": "execute_result"
416 | }
417 | ],
418 | "source": [
419 | "data = {'Year': [2005, 2005, 2005, 2005, 2010, 2010, 2010, 2010],\n",
420 | " 'Month': ['Jan', 'Jan', 'Feb', 'Feb', 'Jan', 'Jan', 'Feb', 'Feb'],\n",
421 | " 'Day': ['Mon', 'Wed', 'Mon', 'Wed', 'Mon', 'Wed', 'Mon', 'Wed'],\n",
422 | " 'Population': np.arange(8)\n",
423 | " }\n",
424 | "df = pd.DataFrame.from_dict(data)\n",
425 | "df.set_index([\"Year\", \"Month\", \"Day\"], inplace=True)\n",
426 | "df"
427 | ]
428 | },
429 | {
430 | "cell_type": "code",
431 | "execution_count": 5,
432 | "metadata": {},
433 | "outputs": [
434 | {
435 | "name": "stdout",
436 | "output_type": "stream",
437 | "text": [
438 | "\tUnderlying data array:\n",
439 | "[[[0 1]\n",
440 | " [2 3]]\n",
441 | "\n",
442 | " [[4 5]\n",
443 | " [6 7]]]\n",
444 | "\n",
445 | "\tInformation about its modes:\n",
446 | "#0: Mode(name='Year', index=[2005, 2010])\n",
447 | "#1: Mode(name='Month', index=['Jan', 'Feb'])\n",
448 | "#2: Mode(name='Day', index=['Mon', 'Wed'])\n",
449 | "\n",
450 | "Properties described by modes: ['Year', 'Month', 'Day']\n",
451 | "Associated normal shape: (2, 2, 2)\n"
452 | ]
453 | }
454 | ],
455 | "source": [
456 | "tensor_1 = pd_to_tensor(df)\n",
457 | "print_tensor_state(tensor_1, transforms=False)"
458 | ]
459 | },
460 | {
461 | "cell_type": "code",
462 | "execution_count": 6,
463 | "metadata": {},
464 | "outputs": [
465 | {
466 | "name": "stdout",
467 | "output_type": "stream",
468 | "text": [
469 | "\tUnderlying data array:\n",
470 | "[[[0 1]\n",
471 | " [2 3]]\n",
472 | "\n",
473 | " [[4 5]\n",
474 | " [6 7]]]\n",
475 | "\n",
476 | "\tInformation about its modes:\n",
477 | "#0: Mode(name='Year', index=None)\n",
478 | "#1: Mode(name='Month', index=None)\n",
479 | "#2: Mode(name='Day', index=None)\n",
480 | "\n",
481 | "Properties described by modes: ['Year', 'Month', 'Day']\n",
482 | "Associated normal shape: (2, 2, 2)\n"
483 | ]
484 | }
485 | ],
486 | "source": [
487 | "tensor_2 = pd_to_tensor(df, keep_index=False)\n",
488 | "print_tensor_state(tensor_2, transforms=False)"
489 | ]
490 | },
491 | {
492 | "cell_type": "markdown",
493 | "metadata": {},
494 | "source": [
495 | "### Tensor to Multi-index dataframe\n",
496 | "\n",
497 | "When tensor is converted to multi-index dataframe, the information about its modes is extracted, which then is used for column name and index values of the resulting dataframe. Next we show, various ways of specifying names/indecies for modes of the tensor and how this affects the result of the conversion."
498 | ]
499 | },
500 | {
501 | "cell_type": "code",
502 | "execution_count": 7,
503 | "metadata": {},
504 | "outputs": [
505 | {
506 | "name": "stdout",
507 | "output_type": "stream",
508 | "text": [
509 | "\n",
510 | "\tInformation about its modes:\n",
511 | "#0: Mode(name='mode-0', index=None)\n",
512 | "#1: Mode(name='mode-1', index=None)\n",
513 | "#2: Mode(name='mode-2', index=None)\n",
514 | "\n",
515 | "Properties described by modes: ['mode-0', 'mode-1', 'mode-2']\n",
516 | "Associated normal shape: (2, 2, 2)\n"
517 | ]
518 | },
519 | {
520 | "data": {
521 | "text/html": [
522 | "\n",
523 | "\n",
536 | "
\n",
537 | " \n",
538 | " \n",
539 | " | \n",
540 | " | \n",
541 | " | \n",
542 | " Values | \n",
543 | "
\n",
544 | " \n",
545 | " mode-0 | \n",
546 | " mode-1 | \n",
547 | " mode-2 | \n",
548 | " | \n",
549 | "
\n",
550 | " \n",
551 | " \n",
552 | " \n",
553 | " 0 | \n",
554 | " 0 | \n",
555 | " 0 | \n",
556 | " 0 | \n",
557 | "
\n",
558 | " \n",
559 | " 1 | \n",
560 | " 1 | \n",
561 | "
\n",
562 | " \n",
563 | " 1 | \n",
564 | " 0 | \n",
565 | " 2 | \n",
566 | "
\n",
567 | " \n",
568 | " 1 | \n",
569 | " 3 | \n",
570 | "
\n",
571 | " \n",
572 | " 1 | \n",
573 | " 0 | \n",
574 | " 0 | \n",
575 | " 4 | \n",
576 | "
\n",
577 | " \n",
578 | " 1 | \n",
579 | " 5 | \n",
580 | "
\n",
581 | " \n",
582 | " 1 | \n",
583 | " 0 | \n",
584 | " 6 | \n",
585 | "
\n",
586 | " \n",
587 | " 1 | \n",
588 | " 7 | \n",
589 | "
\n",
590 | " \n",
591 | "
\n",
592 | "
"
593 | ],
594 | "text/plain": [
595 | " Values\n",
596 | "mode-0 mode-1 mode-2 \n",
597 | "0 0 0 0\n",
598 | " 1 1\n",
599 | " 1 0 2\n",
600 | " 1 3\n",
601 | "1 0 0 4\n",
602 | " 1 5\n",
603 | " 1 0 6\n",
604 | " 1 7"
605 | ]
606 | },
607 | "execution_count": 7,
608 | "metadata": {},
609 | "output_type": "execute_result"
610 | }
611 | ],
612 | "source": [
613 | "# Default meta information\n",
614 | "data = np.arange(8).reshape(2, 2, 2)\n",
615 | "tensor = Tensor(data)\n",
616 | "df = tensor_to_pd(tensor)\n",
617 | "\n",
618 | "print_tensor_state(tensor, data=False, transforms=False)\n",
619 | "df"
620 | ]
621 | },
622 | {
623 | "cell_type": "code",
624 | "execution_count": 8,
625 | "metadata": {},
626 | "outputs": [
627 | {
628 | "name": "stdout",
629 | "output_type": "stream",
630 | "text": [
631 | "\n",
632 | "\tInformation about its modes:\n",
633 | "#0: Mode(name='Year', index=None)\n",
634 | "#1: Mode(name='Month', index=None)\n",
635 | "#2: Mode(name='Day', index=None)\n",
636 | "\n",
637 | "Properties described by modes: ['Year', 'Month', 'Day']\n",
638 | "Associated normal shape: (2, 2, 2)\n"
639 | ]
640 | },
641 | {
642 | "data": {
643 | "text/html": [
644 | "\n",
645 | "\n",
658 | "
\n",
659 | " \n",
660 | " \n",
661 | " | \n",
662 | " | \n",
663 | " | \n",
664 | " Values | \n",
665 | "
\n",
666 | " \n",
667 | " Year | \n",
668 | " Month | \n",
669 | " Day | \n",
670 | " | \n",
671 | "
\n",
672 | " \n",
673 | " \n",
674 | " \n",
675 | " 0 | \n",
676 | " 0 | \n",
677 | " 0 | \n",
678 | " 0 | \n",
679 | "
\n",
680 | " \n",
681 | " 1 | \n",
682 | " 1 | \n",
683 | "
\n",
684 | " \n",
685 | " 1 | \n",
686 | " 0 | \n",
687 | " 2 | \n",
688 | "
\n",
689 | " \n",
690 | " 1 | \n",
691 | " 3 | \n",
692 | "
\n",
693 | " \n",
694 | " 1 | \n",
695 | " 0 | \n",
696 | " 0 | \n",
697 | " 4 | \n",
698 | "
\n",
699 | " \n",
700 | " 1 | \n",
701 | " 5 | \n",
702 | "
\n",
703 | " \n",
704 | " 1 | \n",
705 | " 0 | \n",
706 | " 6 | \n",
707 | "
\n",
708 | " \n",
709 | " 1 | \n",
710 | " 7 | \n",
711 | "
\n",
712 | " \n",
713 | "
\n",
714 | "
"
715 | ],
716 | "text/plain": [
717 | " Values\n",
718 | "Year Month Day \n",
719 | "0 0 0 0\n",
720 | " 1 1\n",
721 | " 1 0 2\n",
722 | " 1 3\n",
723 | "1 0 0 4\n",
724 | " 1 5\n",
725 | " 1 0 6\n",
726 | " 1 7"
727 | ]
728 | },
729 | "execution_count": 8,
730 | "metadata": {},
731 | "output_type": "execute_result"
732 | }
733 | ],
734 | "source": [
735 | "# Custom mode names\n",
736 | "# Can also be passed as a list of names during creation of the tensor\n",
737 | "data = np.arange(8).reshape(2, 2, 2)\n",
738 | "new_mode_names = {0: \"Year\",\n",
739 | " 1: \"Month\",\n",
740 | " 2: \"Day\"\n",
741 | " }\n",
742 | "tensor = Tensor(data)\n",
743 | "tensor.set_mode_names(new_mode_names)\n",
744 | "df = tensor_to_pd(tensor)\n",
745 | "\n",
746 | "print_tensor_state(tensor, data=False, transforms=False)\n",
747 | "df"
748 | ]
749 | },
750 | {
751 | "cell_type": "code",
752 | "execution_count": 9,
753 | "metadata": {},
754 | "outputs": [
755 | {
756 | "name": "stdout",
757 | "output_type": "stream",
758 | "text": [
759 | "\n",
760 | "\tInformation about its modes:\n",
761 | "#0: Mode(name='mode-0', index=[2005, 2010])\n",
762 | "#1: Mode(name='mode-1', index=['Jan', 'Feb'])\n",
763 | "#2: Mode(name='mode-2', index=['Mon', 'Wed'])\n",
764 | "\n",
765 | "Properties described by modes: ['mode-0', 'mode-1', 'mode-2']\n",
766 | "Associated normal shape: (2, 2, 2)\n"
767 | ]
768 | },
769 | {
770 | "data": {
771 | "text/html": [
772 | "\n",
773 | "\n",
786 | "
\n",
787 | " \n",
788 | " \n",
789 | " | \n",
790 | " | \n",
791 | " | \n",
792 | " Values | \n",
793 | "
\n",
794 | " \n",
795 | " mode-0 | \n",
796 | " mode-1 | \n",
797 | " mode-2 | \n",
798 | " | \n",
799 | "
\n",
800 | " \n",
801 | " \n",
802 | " \n",
803 | " 2005 | \n",
804 | " Jan | \n",
805 | " Mon | \n",
806 | " 0 | \n",
807 | "
\n",
808 | " \n",
809 | " Wed | \n",
810 | " 1 | \n",
811 | "
\n",
812 | " \n",
813 | " Feb | \n",
814 | " Mon | \n",
815 | " 2 | \n",
816 | "
\n",
817 | " \n",
818 | " Wed | \n",
819 | " 3 | \n",
820 | "
\n",
821 | " \n",
822 | " 2010 | \n",
823 | " Jan | \n",
824 | " Mon | \n",
825 | " 4 | \n",
826 | "
\n",
827 | " \n",
828 | " Wed | \n",
829 | " 5 | \n",
830 | "
\n",
831 | " \n",
832 | " Feb | \n",
833 | " Mon | \n",
834 | " 6 | \n",
835 | "
\n",
836 | " \n",
837 | " Wed | \n",
838 | " 7 | \n",
839 | "
\n",
840 | " \n",
841 | "
\n",
842 | "
"
843 | ],
844 | "text/plain": [
845 | " Values\n",
846 | "mode-0 mode-1 mode-2 \n",
847 | "2005 Jan Mon 0\n",
848 | " Wed 1\n",
849 | " Feb Mon 2\n",
850 | " Wed 3\n",
851 | "2010 Jan Mon 4\n",
852 | " Wed 5\n",
853 | " Feb Mon 6\n",
854 | " Wed 7"
855 | ]
856 | },
857 | "execution_count": 9,
858 | "metadata": {},
859 | "output_type": "execute_result"
860 | }
861 | ],
862 | "source": [
863 | "# Custom mode index\n",
864 | "data = np.arange(8).reshape(2, 2, 2)\n",
865 | "tensor = Tensor(data)\n",
866 | "new_mode_index = {0: [2005, 2010],\n",
867 | " 1: [\"Jan\", \"Feb\"],\n",
868 | " 2: [\"Mon\", \"Wed\"],\n",
869 | " }\n",
870 | "tensor.set_mode_index(new_mode_index)\n",
871 | "df = tensor_to_pd(tensor)\n",
872 | "\n",
873 | "print_tensor_state(tensor, data=False, transforms=False)\n",
874 | "df"
875 | ]
876 | },
877 | {
878 | "cell_type": "code",
879 | "execution_count": 10,
880 | "metadata": {},
881 | "outputs": [
882 | {
883 | "name": "stdout",
884 | "output_type": "stream",
885 | "text": [
886 | "\n",
887 | "\tInformation about its modes:\n",
888 | "#0: Mode(name='Year', index=[2005, 2010])\n",
889 | "#1: Mode(name='Month', index=['Jan', 'Feb'])\n",
890 | "#2: Mode(name='Day', index=['Mon', 'Wed'])\n",
891 | "\n",
892 | "Properties described by modes: ['Year', 'Month', 'Day']\n",
893 | "Associated normal shape: (2, 2, 2)\n"
894 | ]
895 | },
896 | {
897 | "data": {
898 | "text/html": [
899 | "\n",
900 | "\n",
913 | "
\n",
914 | " \n",
915 | " \n",
916 | " | \n",
917 | " | \n",
918 | " | \n",
919 | " Population | \n",
920 | "
\n",
921 | " \n",
922 | " Year | \n",
923 | " Month | \n",
924 | " Day | \n",
925 | " | \n",
926 | "
\n",
927 | " \n",
928 | " \n",
929 | " \n",
930 | " 2005 | \n",
931 | " Jan | \n",
932 | " Mon | \n",
933 | " 0 | \n",
934 | "
\n",
935 | " \n",
936 | " Wed | \n",
937 | " 1 | \n",
938 | "
\n",
939 | " \n",
940 | " Feb | \n",
941 | " Mon | \n",
942 | " 2 | \n",
943 | "
\n",
944 | " \n",
945 | " Wed | \n",
946 | " 3 | \n",
947 | "
\n",
948 | " \n",
949 | " 2010 | \n",
950 | " Jan | \n",
951 | " Mon | \n",
952 | " 4 | \n",
953 | "
\n",
954 | " \n",
955 | " Wed | \n",
956 | " 5 | \n",
957 | "
\n",
958 | " \n",
959 | " Feb | \n",
960 | " Mon | \n",
961 | " 6 | \n",
962 | "
\n",
963 | " \n",
964 | " Wed | \n",
965 | " 7 | \n",
966 | "
\n",
967 | " \n",
968 | "
\n",
969 | "
"
970 | ],
971 | "text/plain": [
972 | " Population\n",
973 | "Year Month Day \n",
974 | "2005 Jan Mon 0\n",
975 | " Wed 1\n",
976 | " Feb Mon 2\n",
977 | " Wed 3\n",
978 | "2010 Jan Mon 4\n",
979 | " Wed 5\n",
980 | " Feb Mon 6\n",
981 | " Wed 7"
982 | ]
983 | },
984 | "execution_count": 10,
985 | "metadata": {},
986 | "output_type": "execute_result"
987 | }
988 | ],
989 | "source": [
990 | "# Custom mode names, mode index and column name for dataframe\n",
991 | "data = np.arange(8).reshape(2, 2, 2)\n",
992 | "new_mode_index = {0: [2005, 2010],\n",
993 | " 1: [\"Jan\", \"Feb\"],\n",
994 | " 2: [\"Mon\", \"Wed\"],\n",
995 | " }\n",
996 | "tensor = Tensor(data, mode_names=[\"Year\", \"Month\", \"Day\"])\n",
997 | "tensor.set_mode_index(new_mode_index)\n",
998 | "df = tensor_to_pd(tensor, col_name=\"Population\")\n",
999 | "\n",
1000 | "print_tensor_state(tensor, data=False, transforms=False)\n",
1001 | "df"
1002 | ]
1003 | }
1004 | ],
1005 | "metadata": {
1006 | "kernelspec": {
1007 | "display_name": "hottbox-tutorials",
1008 | "language": "python",
1009 | "name": "hottbox-tutorials"
1010 | },
1011 | "language_info": {
1012 | "codemirror_mode": {
1013 | "name": "ipython",
1014 | "version": 3
1015 | },
1016 | "file_extension": ".py",
1017 | "mimetype": "text/x-python",
1018 | "name": "python",
1019 | "nbconvert_exporter": "python",
1020 | "pygments_lexer": "ipython3",
1021 | "version": "3.6.6"
1022 | }
1023 | },
1024 | "nbformat": 4,
1025 | "nbformat_minor": 2
1026 | }
1027 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | HOTTBOX tutorials
2 | =================
3 | |Binder|_
4 |
5 | .. |Binder| image:: https://mybinder.org/badge.svg
6 | .. _Binder: https://mybinder.org/v2/gh/hottbox/hottbox-tutorials/master
7 |
8 | This repository contains a series of tutorials on how to use `hottbox `_.
9 |
10 |
11 | Local Installation
12 | ==================
13 |
14 | In order to get started you need to clone this repository and install
15 | packages specified in ``requirements.txt``::
16 |
17 | git clone https://github.com/hottbox/hottbox-tutorials
18 |
19 | cd hottbox-tutorials
20 |
21 | pip install -r requirements.txt
22 |
23 |
24 | If you are on Unix and have anaconda installed, you can execute ``bootstrap_venv.sh``.
25 | This script will prepare a new virtual environment for these tutorials.::
26 |
27 | git clone https://github.com/hottbox/hottbox-tutorials
28 |
29 | source bootstrap_venv.sh
30 |
31 |
32 | Table of contents:
33 | ==================
34 | .. |ti1| image:: https://mybinder.org/badge.svg
35 | .. _ti1: https://mybinder.org/v2/gh/hottbox/hottbox-tutorials/master?filepath=1_N-dimensional_arrays_and_Tensor_class.ipynb
36 | .. _Tutorial1: https://github.com/hottbox/hottbox-tutorials/blob/master/1_N-dimensional_arrays_and_Tensor_class.ipynb
37 |
38 | .. |ti2| image:: https://mybinder.org/badge.svg
39 | .. _ti2: https://mybinder.org/v2/gh/hottbox/hottbox-tutorials/master?filepath=2_Efficient_representations_of_tensors.ipynb
40 | .. _Tutorial2: https://github.com/hottbox/hottbox-tutorials/blob/master/2_Efficient_representations_of_tensors.ipynb
41 |
42 |
43 | .. |ti3| image:: https://mybinder.org/badge.svg
44 | .. _ti3: https://mybinder.org/v2/gh/hottbox/hottbox-tutorials/master?filepath=3_Fundamental_tensor_decompositions.ipynb
45 | .. _Tutorial3: https://github.com/hottbox/hottbox-tutorials/blob/master/3_Fundamental_tensor_decompositions.ipynb
46 |
47 | .. |ti4| image:: https://mybinder.org/badge.svg
48 | .. _ti4: https://mybinder.org/v2/gh/hottbox/hottbox-tutorials/master?filepath=4_Ecosystem_of_Tensor_class.ipynb
49 | .. _Tutorial4: https://github.com/hottbox/hottbox-tutorials/blob/master/4_Ecosystem_of_Tensor_class.ipynb
50 |
51 |
52 | .. |ti5| image:: https://mybinder.org/badge.svg
53 | .. _ti5: https://mybinder.org/v2/gh/hottbox/hottbox-tutorials/master?filepath=5_Tensor_meta_information_and_pandas_integration.ipynb
54 | .. _Tutorial5: https://github.com/hottbox/hottbox-tutorials/blob/master/5_Tensor_meta_information_and_pandas_integration.ipynb
55 |
56 |
57 | +--------------------------------------------------------------------------------------+-------------------------------+--------------------------------------+
58 | | Focus of the tutorial | Static notebook on github.com | Interactive notebook on mybinder.org |
59 | +======================================================================================+===============================+======================================+
60 | | 1. N-dimensional arrays and its functionality: Tensor | `Tutorial1`_ | |ti1|_ |
61 | +--------------------------------------------------------------------------------------+-------------------------------+--------------------------------------+
62 | | 2. Efficient representation of N-dimensional arrays: TensorCPD, TensorTKD, TensorTT | `Tutorial2`_ | |ti2|_ |
63 | +--------------------------------------------------------------------------------------+-------------------------------+--------------------------------------+
64 | | 3. Fundamental tensor decompositions: CPD, HOSVD, HOOI, TTSVD | `Tutorial3`_ | |ti3|_ |
65 | +--------------------------------------------------------------------------------------+-------------------------------+--------------------------------------+
66 | | 4. Ecosystem of Tensor class and transformations | `Tutorial4`_ | |ti4|_ |
67 | +--------------------------------------------------------------------------------------+-------------------------------+--------------------------------------+
68 | | 5. Tensor meta information and pandas integration | `Tutorial5`_ | |ti5|_ |
69 | +--------------------------------------------------------------------------------------+-------------------------------+--------------------------------------+
70 |
71 |
72 | Data used in these tutorials
73 | ============================
74 |
75 | All data for these tutorials can be found under ``data/`` directory.
76 |
77 |
78 |
79 | Short description of datasets
80 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
81 |
82 | - **ETH80** dataset
83 |
84 | This dataset consists of 3,280 images of natural objects from 8 categories (apple, car, cow, cup, dog, horse, pera, tomato), each containing 10 objects with 41 views per object. More info about this dataset can be found on `here `_.
85 |
86 |
87 |
88 | Short description of files with data
89 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
90 |
91 | 1. ``data/ETH80/basic_066-063.npy``
92 |
93 | Contains only one RGB image of one object from each category, which makes it a total of 8 samples. The view point identifier for all of them is ``066-063``. These images are 128 by 128 pixes and are stored in the unfolded form. Thus, when this file is read by ``numpy`` it outputs array with 8 rows and 128\*128\*3 = 49152 columns.
94 |
--------------------------------------------------------------------------------
/bootstrap_venv.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | RED="\033[0;31m"
4 | GREEN="\033[0;32m"
5 | CYAN="\033[0;36m"
6 | BROWN="\033[0;33m"
7 | WHITE="\033[0;0m"
8 |
9 | VENV_NAME="hottbox-tutorials"
10 | proceed_installation=1
11 |
12 | welcome_message(){
13 | printf "\n====================================================================\n"
14 |
15 |
16 | printf "\n\tWelcome to a series of tutorials on HOTTBOX. \n\n"
17 | printf "This script will prepare a virtual environment for these tutorials. \n"
18 | printf "Here is what is going to happen during this process: \n\n"
19 | printf "1) Use anaconda to create a new venv: ${GREEN}${VENV_NAME}${WHITE} \n\n"
20 | printf "2) Install required packages: \n"
21 | cat requirements.txt
22 | printf "\n\n"
23 | printf "3) Install ipykernel: ${GREEN}${VENV_NAME}${WHITE}\n"
24 |
25 | printf "\n====================================================================\n\n"
26 |
27 | }
28 |
29 |
30 | ##################################################################
31 | ###-------------------- Main --------------------###
32 | ##################################################################
33 |
34 |
35 | welcome_message
36 |
37 |
38 | printf "Do you want to proceed? (y/n) "
39 | answer=$( while ! head -c 1 | grep -i '[ny]' ;do true ;done )
40 | if echo "$answer" | grep -iq "^y" ;then
41 | echo -e "\nFingers crossed and start $GREEN :-/ $WHITE"
42 | else
43 | echo -e "\nQuitting $RED :-( $WHITE\n"
44 | proceed_installation=0
45 | fi
46 |
47 | ###-------- Check if conda installation exists
48 | if [[ ($proceed_installation == 1) ]]; then
49 | printf "\nChecking to see if ${GREEN}anaconda${WHITE} is installed: "
50 | if ! [ -x "$(command -v conda)" ]; then
51 | echo -e "${RED}not installed${WHITE}.\n"
52 | echo -e "You need have ${RED}anaconda${WHITE} to proceed with this script."
53 | echo -e "Abort installation, nothing has been configured.\n"
54 | proceed_installation=0
55 | else
56 | echo -e "${GREEN}yes, it is${WHITE}."
57 | fi
58 | fi
59 |
60 | ###-------- Environment creation
61 | if [[ ($proceed_installation == 1) ]]; then
62 | conda create --name ${VENV_NAME} python=3.6
63 | source activate ${VENV_NAME}
64 | VENV_HOME="$(which python)"
65 |
66 | pip install --upgrade pip
67 | pip install -r requirements.txt
68 | python -m ipykernel install --user --name ${VENV_NAME} --display-name ${VENV_NAME}
69 |
70 | printf "\n"
71 | printf "====================================================================\n"
72 | printf "============ ============\n"
73 | printf "============ Working environment is ready ============\n"
74 | printf "============ ============\n"
75 | printf "====================================================================\n"
76 | printf "\n"
77 | printf "1) Python interpreter for '$VENV_NAME' is located in: \n\n\t"
78 | printf "${VENV_HOME}\n\n"
79 |
80 | printf "2) The associate ipython kernel (used in the tutorials by default) is located in: \n\n\t"
81 | jupyter kernelspec list | grep "$VENV_NAME"
82 |
83 | printf "\n"
84 | printf "3) You can also select this kernel (${VENV_NAME}), if you want to play around in your own notebook.\n\n"
85 |
86 | source deactivate
87 | fi
88 |
--------------------------------------------------------------------------------
/data/ETH80/basic_066-063.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/data/ETH80/basic_066-063.npy
--------------------------------------------------------------------------------
/images/C_Fortran_ordering.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/C_Fortran_ordering.png
--------------------------------------------------------------------------------
/images/TensorCPD.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/TensorCPD.png
--------------------------------------------------------------------------------
/images/TensorTKD.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/TensorTKD.png
--------------------------------------------------------------------------------
/images/TensorTT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/TensorTT.png
--------------------------------------------------------------------------------
/images/cpd_as_rank_one.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/cpd_as_rank_one.png
--------------------------------------------------------------------------------
/images/data-modes-state.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/data-modes-state.png
--------------------------------------------------------------------------------
/images/different-forms-of-data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/different-forms-of-data.png
--------------------------------------------------------------------------------
/images/different-tensors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/different-tensors.png
--------------------------------------------------------------------------------
/images/folding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/folding.png
--------------------------------------------------------------------------------
/images/mode_n_product.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/mode_n_product.png
--------------------------------------------------------------------------------
/images/outerproduct_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/outerproduct_3.png
--------------------------------------------------------------------------------
/images/storage_complexity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/storage_complexity.png
--------------------------------------------------------------------------------
/images/tensor_substructures.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/tensor_substructures.png
--------------------------------------------------------------------------------
/images/tensors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/tensors.png
--------------------------------------------------------------------------------
/images/unfolding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hottbox/hottbox-tutorials/5a61e872096d59e5012b94b469903cafe4228aca/images/unfolding.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.14.2
2 | scipy==1.0.1
3 | pandas==0.22.0
4 | hottbox==0.1.3
5 | ipykernel
--------------------------------------------------------------------------------