├── data
├── .gitkeep
├── .DS_Store
├── img
│ ├── beach.png
│ ├── coins.png
│ ├── index.jpg
│ ├── trex.png
│ ├── wave.png
│ ├── florence.jpg
│ ├── t-rex2.png
│ ├── terasse.jpeg
│ ├── jefferson.jpg
│ ├── starry_night.jpg
│ ├── the_bedroom.jpg
│ ├── clonazepam_1mg.png
│ └── Make-a-meme-butterfly.png
├── viz
│ ├── 3-channels.png
│ ├── numpy-axes.png
│ ├── translate.png
│ ├── RGB-examples.png
│ └── python-zero-index.svg
└── sample-data
│ ├── sample-data-03.csv
│ ├── sample-data-08.csv
│ ├── sample-data-11.csv
│ ├── sample-data-02.csv
│ ├── sample-data-09.csv
│ ├── sample-data-06.csv
│ ├── sample-data-07.csv
│ ├── sample-data-10.csv
│ ├── sample-data-12.csv
│ ├── sample-data-05.csv
│ ├── sample-data-01.csv
│ └── sample-data-04.csv
├── src
├── .gitkeep
├── hello.py
└── argparse_test.py
├── utils
├── .gitkeep
├── __init__.py
├── styletransfer.py
├── imutils.py
└── classifier_utils.py
├── assignments
├── .gitkeep
└── .DS_Store
├── .DS_Store
├── nbs
├── .DS_Store
├── session7_inclass_rdkm.ipynb
├── session10_inclass_rdkm.ipynb
├── session2_inclass_rdkm.ipynb
├── session5_inclass_rdkm.ipynb
├── session11_inclass_rdkm.ipynb
└── session12_inclass_rdkm.ipynb
├── requirements.txt
├── setup.sh
├── LICENSE
├── README.md
├── .gitignore
└── syllabus
└── readme.md
/data/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/utils/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assignments/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/.DS_Store
--------------------------------------------------------------------------------
/nbs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/nbs/.DS_Store
--------------------------------------------------------------------------------
/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/.DS_Store
--------------------------------------------------------------------------------
/data/img/beach.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/img/beach.png
--------------------------------------------------------------------------------
/data/img/coins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/img/coins.png
--------------------------------------------------------------------------------
/data/img/index.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/img/index.jpg
--------------------------------------------------------------------------------
/data/img/trex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/img/trex.png
--------------------------------------------------------------------------------
/data/img/wave.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/img/wave.png
--------------------------------------------------------------------------------
/assignments/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/assignments/.DS_Store
--------------------------------------------------------------------------------
/data/img/florence.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/img/florence.jpg
--------------------------------------------------------------------------------
/data/img/t-rex2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/img/t-rex2.png
--------------------------------------------------------------------------------
/data/img/terasse.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/img/terasse.jpeg
--------------------------------------------------------------------------------
/data/img/jefferson.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/img/jefferson.jpg
--------------------------------------------------------------------------------
/data/viz/3-channels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/viz/3-channels.png
--------------------------------------------------------------------------------
/data/viz/numpy-axes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/viz/numpy-axes.png
--------------------------------------------------------------------------------
/data/viz/translate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/viz/translate.png
--------------------------------------------------------------------------------
/data/img/starry_night.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/img/starry_night.jpg
--------------------------------------------------------------------------------
/data/img/the_bedroom.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/img/the_bedroom.jpg
--------------------------------------------------------------------------------
/data/viz/RGB-examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/viz/RGB-examples.png
--------------------------------------------------------------------------------
/data/img/clonazepam_1mg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/img/clonazepam_1mg.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | numpy
3 | matplotlib
4 | opencv-python
5 | scikit-learn
6 | scikeras
7 | tensorflow-cpu
--------------------------------------------------------------------------------
/data/img/Make-a-meme-butterfly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDS-AU-DK/cds-visual/HEAD/data/img/Make-a-meme-butterfly.png
--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | sudo apt-get update
3 |
4 | # install required Linux binaries for OpenCV and Tesseract
5 | sudo apt-get install -y python3-opencv
6 | #sudo apt install -y tesseract-ocr
7 | #sudo apt install -y libtesseract-dev
8 |
9 | pip install -r requirements.txt
--------------------------------------------------------------------------------
/src/hello.py:
--------------------------------------------------------------------------------
1 | from argparse import ArgumentParser
2 |
3 | def arg_parser():
4 | parser = ArgumentParser(description="Gets name variables")
5 | parser.add_argument("--name",
6 | required=True,
7 | help="Please enter a valid name!")
8 | args = parser.parse_args()
9 |
10 | return args
11 |
12 | def hello(name):
13 | print(f"Hello, {name}!")
14 |
15 | def main():
16 | """
17 | The main logic of our code
18 | """
19 | args = arg_parser()
20 | hello(args.name)
21 |
22 | if __name__=="__main__":
23 | # "if this code is called from the command line, do the following"
24 | main()
25 |
--------------------------------------------------------------------------------
/src/argparse_test.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | import numpy as np
4 |
5 | def file_loader():
6 | parser = argparse.ArgumentParser(description="Loading and printing an array")
7 | parser.add_argument("--input",
8 | "-i",
9 | required=True,
10 | help="Filepath to CSV for loading and printing")
11 | args = parser.parse_args()
12 | return args
13 |
14 | def process(filename):
15 | data = np.loadtxt(filename, delimiter=",")
16 | print(data)
17 |
18 | def main():
19 | args = file_loader()
20 | filename = os.path.join("..",
21 | "..",
22 | "cds-vis-data",
23 | "data",
24 | "sample-data",
25 | args.input)
26 | process(filename)
27 |
28 | if __name__=="__main__":
29 | main()
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Cultural Data Science - Aarhus University
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/utils/styletransfer.py:
--------------------------------------------------------------------------------
1 | # @title Define image loading and visualization functions { display-mode: "form" }
2 | import functools, os
3 | from matplotlib import gridspec
4 | import matplotlib.pylab as plt
5 | import tensorflow as tf
6 | import numpy as np
7 |
8 | def crop_center(image):
9 | """Returns a cropped square image."""
10 | shape = image.shape
11 | new_shape = min(shape[1], shape[2])
12 | offset_y = max(shape[1] - shape[2], 0) // 2
13 | offset_x = max(shape[2] - shape[1], 0) // 2
14 | image = tf.image.crop_to_bounding_box(
15 | image, offset_y, offset_x, new_shape, new_shape)
16 | return image
17 |
18 | @functools.lru_cache(maxsize=None)
19 | def st_load(image_path, image_size=(256, 256), preserve_aspect_ratio=True):
20 | """Loads and preprocesses images."""
21 | # Load and convert to float32 numpy array, add batch dimension, and normalize to range [0, 1].
22 | img = plt.imread(image_path).astype(np.float32)[np.newaxis, ...]
23 | if img.max() > 1.0:
24 | img = img / 255.
25 | if len(img.shape) == 3:
26 | img = tf.stack([img, img, img], axis=-1)
27 | img = crop_center(img)
28 | img = tf.image.resize(img, image_size, preserve_aspect_ratio=True)
29 | return img
30 |
31 | def show_n(images, titles=('',)):
32 | n = len(images)
33 | image_sizes = [image.shape[1] for image in images]
34 | w = (image_sizes[0] * 6) // 320
35 | plt.figure(figsize=(w * n, w))
36 | gs = gridspec.GridSpec(1, n, width_ratios=image_sizes)
37 | for i in range(n):
38 | plt.subplot(gs[i])
39 | plt.imshow(images[i][0], aspect='equal')
40 | plt.axis('off')
41 | plt.title(titles[i] if len(titles) > i else '')
42 | plt.show()
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Visual Analytics - Spring 2025
2 |
3 | This repository contains all of the code and data related to the Spring 2025 module _Visual Analytics_ as part of the bachelor's tilvalg in [Cultural Data Science](https://bachelor.au.dk/en/supplementary-subject/culturaldatascience/) at Aarhus University.
4 |
5 | This repository is in active development, with new material being pushed on a weekly basis.
6 |
7 | ## Technicalities
8 |
9 | The coding part of this course will be run entirely on [UCloud](https://cloud.sdu.dk/). This is to ensure that everyone has access to the same computing resources and to avoid potential conflicts with different operating systems, different versions of Python, and so on.
10 |
11 | Students are welcome to develop code locally on their machine instead. However, please not that *no support* will be given for potential problems or errors you encounter!
12 |
13 | ## Repo structure
14 |
15 | This repository has the following directory structure:
16 |
17 | | Column | Description|
18 | |--------|:-----------|
19 | | ```data``` | A folder to be used for sample datasets that we use in class |
20 | | ```notebooks``` | This is where you should save all exploratory and experimental notebooks |
21 | | ```src``` | For Python scripts developed in class and as part of assignments |
22 | | ```syllabus```| Contains a full course syllabus and reading list |
23 | | ```utils``` | Utility functions that are written by me, and which we'll use in class. |
24 |
25 | ## Class times
26 |
27 | This class takes place on Fridays from 8-12. Teaching will take place in person, and you can find the room number via the AU Timetable website.
28 |
29 | ## Course overview and readings
30 |
31 | A detailed breakdown of the course structure and the associated readings can be found in the [syllabus](syllabus/readme.md). Also, be sure to familiarise yourself with the [_studieordning_](https://eddiprod.au.dk/EDDI/webservices/DokOrdningService.cfc?method=visGodkendtOrdning&dokOrdningId=15952&sprog=en) for the course, especially in relation to examination and academic regulations.
32 |
33 | ## Contact details
34 |
35 | The instructor is me! That is to say, [Ross](https://www.au.dk/en/rdkm@cc.au.dk).
36 |
37 | All communication to you will be sent via Blackboard to your student email address.
38 |
39 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/utils/imutils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | store basic image processing methods
4 |
5 | """
6 | import cv2
7 | import numpy as np
8 | import matplotlib as mpl
9 | import matplotlib.pyplot as plt
10 |
11 |
12 | def grab_contours(cnts):
13 | # if the length the contours tuple returned by cv2.findContours
14 | # is '2' then we are using either OpenCV v2.4, v4-beta, or
15 | # v4-official
16 | if len(cnts) == 2:
17 | cnts = cnts[0]
18 |
19 | # if the length of the contours tuple is '3' then we are using
20 | # either OpenCV v3, v4-pre, or v4-alpha
21 | elif len(cnts) == 3:
22 | cnts = cnts[1]
23 |
24 | # otherwise OpenCV has changed their cv2.findContours return
25 | # signature yet again and I have no idea WTH is going on
26 | else:
27 | raise Exception(("Contours tuple must have length 2 or 3, "
28 | "otherwise OpenCV changed their cv2.findContours return "
29 | "signature yet again. Refer to OpenCV's documentation "
30 | "in that case"))
31 |
32 | # return the actual contours array
33 | return cnts
34 |
35 | def translate(image, x, y):
36 | # Define the translation matrix and perform the translation
37 | M = np.float32([[1, 0, x], [0, 1, y]])
38 | shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
39 |
40 | # Return the translated image
41 | return shifted
42 |
43 |
44 | def rotate(image, angle, center = None, scale = 1.0):
45 | # Grab the dimensions of the image
46 | (h, w) = image.shape[:2]
47 |
48 | # If the center is None, initialize it as the center of
49 | # the image
50 | if center is None:
51 | center = (w / 2, h / 2)
52 |
53 | # Perform the rotation
54 | M = cv2.getRotationMatrix2D(center, angle, scale)
55 | rotated = cv2.warpAffine(image, M, (w, h))
56 |
57 | # Return the rotated image
58 | return rotated
59 |
60 |
61 | def resize(image, width = None, height = None, inter = cv2.INTER_AREA):
62 | # initialize the dimensions of the image to be resized and
63 | # grab the image size
64 | dim = None
65 | (h, w) = image.shape[:2]
66 |
67 | # if both the width and height are None, then return the
68 | # original image
69 | if width is None and height is None:
70 | return image
71 |
72 | # check to see if the width is None
73 | if width is None:
74 | # calculate the ratio of the height and construct the
75 | # dimensions
76 | r = height / float(h)
77 | dim = (int(w * r), height)
78 |
79 | # otherwise, the height is None
80 | else:
81 | # calculate the ratio of the width and construct the
82 | # dimensions
83 | r = width / float(w)
84 | dim = (width, int(h * r))
85 |
86 | # resize the image
87 | resized = cv2.resize(image, dim, interpolation = inter)
88 |
89 | # return the resized image
90 | return resized
91 |
92 |
93 | def jimshow(image, title=False):
94 | """imshow with matplotlib dependencies
95 | """
96 | # Acquire default dots per inch value of matplotlib
97 | dpi = mpl.rcParams['figure.dpi']
98 |
99 | height, width, depth = image.shape
100 | figsize = width / float(dpi), height / float(dpi)
101 |
102 | plt.figure(figsize=figsize)
103 |
104 | if depth == 1:
105 | plt.imshow(image, cmap='gray')
106 | else:
107 | plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
108 |
109 | if title:
110 | plt.title(title)
111 | plt.axis('off')
112 |
113 | plt.show()
114 |
115 | def jimshow_channel(image, title=False):
116 | """
117 | Modified jimshow() to plot individual channels
118 | """
119 | # Acquire default dots per inch value of matplotlib
120 | dpi = mpl.rcParams['figure.dpi']
121 |
122 | height, width = image.shape
123 | figsize = width / float(dpi), height / float(dpi)
124 |
125 | plt.figure(figsize=figsize)
126 |
127 | plt.imshow(image, cmap='gray')
128 |
129 | if title:
130 | plt.title(title)
131 | plt.axis('off')
132 |
133 | plt.show()
134 |
135 | if __name__=="__main__":
136 | pass
--------------------------------------------------------------------------------
/data/sample-data/sample-data-03.csv:
--------------------------------------------------------------------------------
1 | 0,0,0,2,0,4,1,7,2,6,4,7,2,4,10,7,3,13,9,3,0,1,0,15,0,5,12,3,8,6,8,6,4,3,3,2,0,0,0,0
2 | 0,0,0,3,0,4,3,5,3,11,4,13,6,2,8,1,3,0,17,12,4,3,4,4,9,8,8,2,5,3,11,2,3,1,5,0,1,1,0,0
3 | 0,0,1,2,0,1,0,1,6,9,4,3,7,5,4,14,8,5,0,10,5,4,9,12,4,5,5,8,6,2,0,0,1,7,6,1,1,0,0,0
4 | 0,0,0,0,0,3,7,3,2,1,5,2,8,0,10,3,3,1,13,11,12,0,5,0,5,6,10,8,4,3,2,11,3,5,1,1,0,0,0,0
5 | 0,0,1,0,0,2,1,2,3,9,6,14,4,6,13,8,7,13,0,15,3,1,3,1,6,4,4,0,4,2,5,11,3,5,6,0,0,1,0,0
6 | 0,0,0,1,1,2,2,7,4,5,4,6,5,3,11,6,2,14,1,15,8,3,3,1,2,0,8,3,14,5,4,1,2,4,6,3,1,1,0,0
7 | 0,0,1,2,1,0,0,1,1,5,2,14,9,1,12,12,1,13,10,14,2,13,4,16,8,0,12,2,3,4,7,6,3,2,1,0,0,0,0,0
8 | 0,0,0,2,0,5,2,5,1,5,6,11,2,4,2,5,0,0,4,1,7,1,7,17,12,5,5,4,3,6,1,0,5,5,0,2,3,0,0,0
9 | 0,0,0,2,0,2,6,3,1,6,3,9,8,4,11,5,0,10,9,3,9,5,5,16,10,2,2,8,10,1,1,0,0,2,2,1,3,0,0,0
10 | 0,0,0,2,0,3,3,5,6,0,0,12,7,0,16,8,2,19,0,7,2,13,5,8,11,2,5,5,1,5,7,11,5,3,3,3,2,0,0,0
11 | 0,0,0,1,1,5,3,7,5,8,5,10,4,4,13,8,8,6,14,14,8,9,3,1,0,1,6,4,10,4,5,2,1,1,6,0,0,0,0,0
12 | 0,0,0,0,1,4,4,4,1,3,2,10,3,5,10,14,6,11,2,8,10,12,2,3,10,4,0,8,14,1,1,6,1,5,1,2,1,0,0,0
13 | 0,0,1,3,0,2,5,1,5,1,3,8,5,6,7,6,7,14,4,15,0,2,3,0,2,8,16,1,13,0,3,5,0,4,1,2,0,1,0,0
14 | 0,0,0,2,0,4,5,7,6,11,4,14,2,1,14,8,4,5,8,2,8,11,6,9,10,3,14,8,10,2,11,4,1,1,1,0,0,1,0,0
15 | 0,0,0,0,0,5,4,1,6,0,3,7,7,5,11,8,1,7,2,13,12,5,6,10,16,2,12,3,3,6,10,1,2,3,4,3,2,0,0,0
16 | 0,0,1,0,1,1,1,6,2,5,6,2,1,1,4,6,0,10,12,15,6,1,5,0,8,3,1,4,4,6,3,2,2,5,4,0,1,0,0,0
17 | 0,0,1,1,1,0,2,0,1,8,6,5,3,1,2,12,5,12,2,14,9,8,4,1,14,1,11,7,10,1,5,6,4,7,6,3,2,1,0,0
18 | 0,0,1,0,1,0,7,2,0,6,4,7,9,0,12,5,2,18,5,15,6,5,0,4,16,7,2,6,2,5,11,7,1,0,1,3,1,0,0,0
19 | 0,0,1,2,0,1,4,6,1,12,6,9,0,2,8,3,6,0,7,12,5,15,10,5,16,7,1,4,11,6,6,5,2,4,1,1,1,0,0,0
20 | 0,0,1,3,0,0,2,3,0,4,2,9,7,1,11,11,8,0,16,0,4,7,10,14,0,7,15,1,11,4,1,0,2,4,2,3,1,1,0,0
21 | 0,0,1,2,1,3,0,3,6,4,4,5,9,3,14,7,4,20,6,0,6,7,7,5,8,7,13,8,3,4,9,10,3,2,0,0,3,1,0,0
22 | 0,0,0,0,0,3,5,4,1,5,3,14,7,6,15,15,7,20,16,11,10,17,5,16,2,3,9,8,14,0,7,3,5,1,2,0,1,1,0,0
23 | 0,0,1,0,1,0,3,5,6,1,3,13,6,2,16,0,8,11,7,1,10,12,0,1,12,6,6,4,12,3,0,0,1,1,3,1,3,0,0,0
24 | 0,0,1,2,1,5,7,5,3,9,5,12,1,4,1,15,1,5,8,9,5,1,7,3,7,2,12,7,11,2,5,0,0,0,1,0,3,0,0,0
25 | 0,0,0,3,0,0,6,3,5,1,6,14,6,0,4,2,9,13,12,13,8,6,4,15,8,5,16,7,13,3,11,3,4,1,5,3,1,1,0,0
26 | 0,0,1,2,0,2,1,6,1,7,1,1,5,6,15,1,4,10,12,8,0,6,2,1,9,4,10,7,6,6,11,2,5,5,5,0,1,0,0,0
27 | 0,0,0,3,1,1,4,1,0,4,4,5,1,6,14,16,4,10,12,8,10,1,0,19,3,2,9,7,3,5,8,5,4,6,0,3,0,0,0,0
28 | 0,0,1,3,0,3,1,5,1,10,3,5,2,1,0,6,2,9,0,3,12,5,3,5,16,2,9,1,13,5,11,1,1,5,1,2,0,1,0,0
29 | 0,0,0,1,1,3,2,1,4,4,3,7,3,6,1,17,4,11,0,13,6,8,8,2,3,5,0,7,14,1,4,7,0,0,1,1,3,0,0,0
30 | 0,0,0,3,0,3,1,0,4,0,3,1,1,6,5,5,7,18,9,9,2,15,8,1,9,7,8,3,5,5,2,4,0,1,0,1,1,1,0,0
31 | 0,0,1,0,0,5,0,3,2,8,1,6,4,6,15,9,6,15,11,6,3,11,3,13,7,0,7,0,5,0,5,1,1,0,1,0,2,0,0,0
32 | 0,0,0,2,0,1,1,7,2,4,0,8,6,0,1,12,3,5,15,0,12,13,9,7,15,5,16,5,13,5,3,0,0,3,4,0,3,1,0,0
33 | 0,0,0,0,1,5,3,0,1,4,0,12,1,5,13,3,8,20,15,4,2,14,1,6,3,8,3,8,7,3,0,9,1,0,1,1,3,0,0,0
34 | 0,0,0,2,1,3,1,5,2,11,1,2,4,2,4,5,1,7,2,14,5,16,3,16,6,2,15,1,8,2,4,3,4,4,3,1,1,0,0,0
35 | 0,0,0,0,1,0,7,7,2,7,5,10,2,1,17,10,4,8,16,4,9,3,3,8,5,7,6,0,4,1,1,3,0,6,2,1,3,1,0,0
36 | 0,0,0,2,0,1,1,5,5,8,6,8,9,1,7,9,2,3,11,5,10,9,0,15,14,4,14,7,8,1,4,7,1,0,6,1,3,0,0,0
37 | 0,0,1,0,0,1,4,4,5,5,0,8,6,1,17,16,7,20,7,13,5,16,9,6,10,7,4,5,3,5,0,5,2,1,5,0,2,1,0,0
38 | 0,0,1,2,1,1,2,5,3,9,1,5,0,4,2,14,7,6,16,16,6,2,5,13,10,8,8,8,6,5,5,10,1,7,6,1,0,0,0,0
39 | 0,0,1,2,0,0,4,1,0,11,3,0,1,0,14,13,4,16,12,0,10,12,0,18,16,2,10,3,5,5,4,8,5,1,3,3,0,1,0,0
40 | 0,0,0,3,0,5,2,4,3,8,6,9,4,5,10,4,6,17,13,10,5,11,5,18,8,1,4,3,13,0,2,5,0,0,3,1,1,1,0,0
41 | 0,0,1,3,1,4,7,5,5,8,1,3,2,6,8,1,8,3,17,16,1,10,1,9,3,6,1,1,8,0,0,3,5,4,3,2,2,1,0,0
42 | 0,0,1,0,1,3,4,2,1,0,6,14,2,6,13,6,1,18,15,11,9,17,8,15,2,1,9,5,5,4,1,11,3,7,6,3,2,1,0,0
43 | 0,0,1,0,0,5,7,4,1,6,3,2,5,0,16,11,2,6,16,0,7,4,5,7,13,4,2,8,9,2,0,2,1,2,3,3,0,0,0,0
44 | 0,0,0,1,0,5,4,6,1,7,5,14,4,0,12,3,3,13,2,8,11,13,0,0,10,0,15,0,13,1,10,3,0,1,5,3,2,1,0,0
45 | 0,0,1,3,0,0,1,4,5,6,2,9,6,3,3,2,7,19,6,1,12,9,8,18,11,4,7,6,5,1,1,4,4,2,1,2,1,0,0,0
46 | 0,0,1,3,0,5,7,5,4,11,3,0,3,1,10,2,5,8,12,7,11,7,2,9,15,7,7,0,14,4,0,6,4,6,4,1,2,0,0,0
47 | 0,0,1,3,0,4,5,3,5,1,3,10,4,2,2,16,6,1,12,1,11,5,2,5,14,2,2,3,10,6,0,3,5,7,6,0,3,1,0,0
48 | 0,0,1,1,1,0,2,2,3,2,4,4,6,4,6,13,6,11,15,2,10,3,3,2,6,8,7,5,13,3,0,7,3,2,2,0,2,1,0,0
49 | 0,0,1,3,0,3,0,3,6,3,5,9,3,3,10,3,9,1,9,6,12,13,8,11,16,4,2,3,1,5,1,9,4,0,5,3,2,0,0,0
50 | 0,0,1,1,0,1,6,2,5,8,0,7,2,5,13,14,0,19,4,16,9,2,6,16,3,3,6,0,11,0,1,9,2,2,5,1,2,1,0,0
51 | 0,0,1,1,0,3,4,4,5,5,0,6,7,3,14,9,8,7,6,1,0,13,9,3,1,2,5,0,12,5,5,0,5,7,3,1,0,1,0,0
52 | 0,0,1,0,1,2,2,1,5,0,6,8,8,5,3,13,3,6,6,15,7,12,2,19,16,8,10,0,7,1,3,6,3,2,4,1,0,0,0,0
53 | 0,0,1,2,1,3,6,5,0,7,5,7,2,1,11,1,5,4,1,2,6,7,7,7,13,4,2,2,9,1,12,0,4,6,1,0,3,1,0,0
54 | 0,0,1,1,1,3,0,4,3,8,0,1,1,4,2,6,6,6,7,13,12,15,3,12,13,8,11,1,8,2,0,1,2,0,0,2,2,1,0,0
55 | 0,0,0,0,0,3,6,3,3,3,0,11,8,6,4,0,3,17,8,2,8,5,3,18,5,8,1,6,0,6,12,1,3,6,0,1,0,0,0,0
56 | 0,0,1,3,0,2,6,5,6,7,2,10,1,4,14,11,1,19,14,8,10,14,10,4,11,8,8,2,3,5,2,2,3,6,5,0,1,0,0,0
57 | 0,0,1,2,1,2,4,5,3,10,5,10,0,4,12,8,2,12,8,8,4,14,1,13,2,8,6,5,1,4,3,2,3,6,1,2,1,0,0,0
58 | 0,0,1,3,0,3,2,0,3,2,6,11,3,1,0,3,3,0,11,1,6,3,4,16,3,2,13,6,9,4,1,7,5,3,3,1,3,1,0,0
59 | 0,0,1,2,1,0,4,3,1,6,4,14,4,3,14,17,1,0,8,5,4,4,10,2,14,5,11,0,6,4,4,5,0,3,0,0,2,1,0,0
60 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
61 |
--------------------------------------------------------------------------------
/data/sample-data/sample-data-08.csv:
--------------------------------------------------------------------------------
1 | 0,0,0,2,0,4,1,7,2,6,4,7,2,4,10,7,3,13,9,3,0,1,0,15,0,5,12,3,8,6,8,6,4,3,3,2,0,0,0,0
2 | 0,0,0,3,0,4,3,5,3,11,4,13,6,2,8,1,3,0,17,12,4,3,4,4,9,8,8,2,5,3,11,2,3,1,5,0,1,1,0,0
3 | 0,0,1,2,0,1,0,1,6,9,4,3,7,5,4,14,8,5,0,10,5,4,9,12,4,5,5,8,6,2,0,0,1,7,6,1,1,0,0,0
4 | 0,0,0,0,0,3,7,3,2,1,5,2,8,0,10,3,3,1,13,11,12,0,5,0,5,6,10,8,4,3,2,11,3,5,1,1,0,0,0,0
5 | 0,0,1,0,0,2,1,2,3,9,6,14,4,6,13,8,7,13,0,15,3,1,3,1,6,4,4,0,4,2,5,11,3,5,6,0,0,1,0,0
6 | 0,0,0,1,1,2,2,7,4,5,4,6,5,3,11,6,2,14,1,15,8,3,3,1,2,0,8,3,14,5,4,1,2,4,6,3,1,1,0,0
7 | 0,0,1,2,1,0,0,1,1,5,2,14,9,1,12,12,1,13,10,14,2,13,4,16,8,0,12,2,3,4,7,6,3,2,1,0,0,0,0,0
8 | 0,0,0,2,0,5,2,5,1,5,6,11,2,4,2,5,0,0,4,1,7,1,7,17,12,5,5,4,3,6,1,0,5,5,0,2,3,0,0,0
9 | 0,0,0,2,0,2,6,3,1,6,3,9,8,4,11,5,0,10,9,3,9,5,5,16,10,2,2,8,10,1,1,0,0,2,2,1,3,0,0,0
10 | 0,0,0,2,0,3,3,5,6,0,0,12,7,0,16,8,2,19,0,7,2,13,5,8,11,2,5,5,1,5,7,11,5,3,3,3,2,0,0,0
11 | 0,0,0,1,1,5,3,7,5,8,5,10,4,4,13,8,8,6,14,14,8,9,3,1,0,1,6,4,10,4,5,2,1,1,6,0,0,0,0,0
12 | 0,0,0,0,1,4,4,4,1,3,2,10,3,5,10,14,6,11,2,8,10,12,2,3,10,4,0,8,14,1,1,6,1,5,1,2,1,0,0,0
13 | 0,0,1,3,0,2,5,1,5,1,3,8,5,6,7,6,7,14,4,15,0,2,3,0,2,8,16,1,13,0,3,5,0,4,1,2,0,1,0,0
14 | 0,0,0,2,0,4,5,7,6,11,4,14,2,1,14,8,4,5,8,2,8,11,6,9,10,3,14,8,10,2,11,4,1,1,1,0,0,1,0,0
15 | 0,0,0,0,0,5,4,1,6,0,3,7,7,5,11,8,1,7,2,13,12,5,6,10,16,2,12,3,3,6,10,1,2,3,4,3,2,0,0,0
16 | 0,0,1,0,1,1,1,6,2,5,6,2,1,1,4,6,0,10,12,15,6,1,5,0,8,3,1,4,4,6,3,2,2,5,4,0,1,0,0,0
17 | 0,0,1,1,1,0,2,0,1,8,6,5,3,1,2,12,5,12,2,14,9,8,4,1,14,1,11,7,10,1,5,6,4,7,6,3,2,1,0,0
18 | 0,0,1,0,1,0,7,2,0,6,4,7,9,0,12,5,2,18,5,15,6,5,0,4,16,7,2,6,2,5,11,7,1,0,1,3,1,0,0,0
19 | 0,0,1,2,0,1,4,6,1,12,6,9,0,2,8,3,6,0,7,12,5,15,10,5,16,7,1,4,11,6,6,5,2,4,1,1,1,0,0,0
20 | 0,0,1,3,0,0,2,3,0,4,2,9,7,1,11,11,8,0,16,0,4,7,10,14,0,7,15,1,11,4,1,0,2,4,2,3,1,1,0,0
21 | 0,0,1,2,1,3,0,3,6,4,4,5,9,3,14,7,4,20,6,0,6,7,7,5,8,7,13,8,3,4,9,10,3,2,0,0,3,1,0,0
22 | 0,0,0,0,0,3,5,4,1,5,3,14,7,6,15,15,7,20,16,11,10,17,5,16,2,3,9,8,14,0,7,3,5,1,2,0,1,1,0,0
23 | 0,0,1,0,1,0,3,5,6,1,3,13,6,2,16,0,8,11,7,1,10,12,0,1,12,6,6,4,12,3,0,0,1,1,3,1,3,0,0,0
24 | 0,0,1,2,1,5,7,5,3,9,5,12,1,4,1,15,1,5,8,9,5,1,7,3,7,2,12,7,11,2,5,0,0,0,1,0,3,0,0,0
25 | 0,0,0,3,0,0,6,3,5,1,6,14,6,0,4,2,9,13,12,13,8,6,4,15,8,5,16,7,13,3,11,3,4,1,5,3,1,1,0,0
26 | 0,0,1,2,0,2,1,6,1,7,1,1,5,6,15,1,4,10,12,8,0,6,2,1,9,4,10,7,6,6,11,2,5,5,5,0,1,0,0,0
27 | 0,0,0,3,1,1,4,1,0,4,4,5,1,6,14,16,4,10,12,8,10,1,0,19,3,2,9,7,3,5,8,5,4,6,0,3,0,0,0,0
28 | 0,0,1,3,0,3,1,5,1,10,3,5,2,1,0,6,2,9,0,3,12,5,3,5,16,2,9,1,13,5,11,1,1,5,1,2,0,1,0,0
29 | 0,0,0,1,1,3,2,1,4,4,3,7,3,6,1,17,4,11,0,13,6,8,8,2,3,5,0,7,14,1,4,7,0,0,1,1,3,0,0,0
30 | 0,0,0,3,0,3,1,0,4,0,3,1,1,6,5,5,7,18,9,9,2,15,8,1,9,7,8,3,5,5,2,4,0,1,0,1,1,1,0,0
31 | 0,0,1,0,0,5,0,3,2,8,1,6,4,6,15,9,6,15,11,6,3,11,3,13,7,0,7,0,5,0,5,1,1,0,1,0,2,0,0,0
32 | 0,0,0,2,0,1,1,7,2,4,0,8,6,0,1,12,3,5,15,0,12,13,9,7,15,5,16,5,13,5,3,0,0,3,4,0,3,1,0,0
33 | 0,0,0,0,1,5,3,0,1,4,0,12,1,5,13,3,8,20,15,4,2,14,1,6,3,8,3,8,7,3,0,9,1,0,1,1,3,0,0,0
34 | 0,0,0,2,1,3,1,5,2,11,1,2,4,2,4,5,1,7,2,14,5,16,3,16,6,2,15,1,8,2,4,3,4,4,3,1,1,0,0,0
35 | 0,0,0,0,1,0,7,7,2,7,5,10,2,1,17,10,4,8,16,4,9,3,3,8,5,7,6,0,4,1,1,3,0,6,2,1,3,1,0,0
36 | 0,0,0,2,0,1,1,5,5,8,6,8,9,1,7,9,2,3,11,5,10,9,0,15,14,4,14,7,8,1,4,7,1,0,6,1,3,0,0,0
37 | 0,0,1,0,0,1,4,4,5,5,0,8,6,1,17,16,7,20,7,13,5,16,9,6,10,7,4,5,3,5,0,5,2,1,5,0,2,1,0,0
38 | 0,0,1,2,1,1,2,5,3,9,1,5,0,4,2,14,7,6,16,16,6,2,5,13,10,8,8,8,6,5,5,10,1,7,6,1,0,0,0,0
39 | 0,0,1,2,0,0,4,1,0,11,3,0,1,0,14,13,4,16,12,0,10,12,0,18,16,2,10,3,5,5,4,8,5,1,3,3,0,1,0,0
40 | 0,0,0,3,0,5,2,4,3,8,6,9,4,5,10,4,6,17,13,10,5,11,5,18,8,1,4,3,13,0,2,5,0,0,3,1,1,1,0,0
41 | 0,0,1,3,1,4,7,5,5,8,1,3,2,6,8,1,8,3,17,16,1,10,1,9,3,6,1,1,8,0,0,3,5,4,3,2,2,1,0,0
42 | 0,0,1,0,1,3,4,2,1,0,6,14,2,6,13,6,1,18,15,11,9,17,8,15,2,1,9,5,5,4,1,11,3,7,6,3,2,1,0,0
43 | 0,0,1,0,0,5,7,4,1,6,3,2,5,0,16,11,2,6,16,0,7,4,5,7,13,4,2,8,9,2,0,2,1,2,3,3,0,0,0,0
44 | 0,0,0,1,0,5,4,6,1,7,5,14,4,0,12,3,3,13,2,8,11,13,0,0,10,0,15,0,13,1,10,3,0,1,5,3,2,1,0,0
45 | 0,0,1,3,0,0,1,4,5,6,2,9,6,3,3,2,7,19,6,1,12,9,8,18,11,4,7,6,5,1,1,4,4,2,1,2,1,0,0,0
46 | 0,0,1,3,0,5,7,5,4,11,3,0,3,1,10,2,5,8,12,7,11,7,2,9,15,7,7,0,14,4,0,6,4,6,4,1,2,0,0,0
47 | 0,0,1,3,0,4,5,3,5,1,3,10,4,2,2,16,6,1,12,1,11,5,2,5,14,2,2,3,10,6,0,3,5,7,6,0,3,1,0,0
48 | 0,0,1,1,1,0,2,2,3,2,4,4,6,4,6,13,6,11,15,2,10,3,3,2,6,8,7,5,13,3,0,7,3,2,2,0,2,1,0,0
49 | 0,0,1,3,0,3,0,3,6,3,5,9,3,3,10,3,9,1,9,6,12,13,8,11,16,4,2,3,1,5,1,9,4,0,5,3,2,0,0,0
50 | 0,0,1,1,0,1,6,2,5,8,0,7,2,5,13,14,0,19,4,16,9,2,6,16,3,3,6,0,11,0,1,9,2,2,5,1,2,1,0,0
51 | 0,0,1,1,0,3,4,4,5,5,0,6,7,3,14,9,8,7,6,1,0,13,9,3,1,2,5,0,12,5,5,0,5,7,3,1,0,1,0,0
52 | 0,0,1,0,1,2,2,1,5,0,6,8,8,5,3,13,3,6,6,15,7,12,2,19,16,8,10,0,7,1,3,6,3,2,4,1,0,0,0,0
53 | 0,0,1,2,1,3,6,5,0,7,5,7,2,1,11,1,5,4,1,2,6,7,7,7,13,4,2,2,9,1,12,0,4,6,1,0,3,1,0,0
54 | 0,0,1,1,1,3,0,4,3,8,0,1,1,4,2,6,6,6,7,13,12,15,3,12,13,8,11,1,8,2,0,1,2,0,0,2,2,1,0,0
55 | 0,0,0,0,0,3,6,3,3,3,0,11,8,6,4,0,3,17,8,2,8,5,3,18,5,8,1,6,0,6,12,1,3,6,0,1,0,0,0,0
56 | 0,0,1,3,0,2,6,5,6,7,2,10,1,4,14,11,1,19,14,8,10,14,10,4,11,8,8,2,3,5,2,2,3,6,5,0,1,0,0,0
57 | 0,0,1,2,1,2,4,5,3,10,5,10,0,4,12,8,2,12,8,8,4,14,1,13,2,8,6,5,1,4,3,2,3,6,1,2,1,0,0,0
58 | 0,0,1,3,0,3,2,0,3,2,6,11,3,1,0,3,3,0,11,1,6,3,4,16,3,2,13,6,9,4,1,7,5,3,3,1,3,1,0,0
59 | 0,0,1,2,1,0,4,3,1,6,4,14,4,3,14,17,1,0,8,5,4,4,10,2,14,5,11,0,6,4,4,5,0,3,0,0,2,1,0,0
60 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
61 |
--------------------------------------------------------------------------------
/data/sample-data/sample-data-11.csv:
--------------------------------------------------------------------------------
1 | 0,0,0,2,0,4,1,7,2,6,4,7,2,4,10,7,3,13,9,3,0,1,0,15,0,5,12,3,8,6,8,6,4,3,3,2,0,0,0,0
2 | 0,0,0,3,0,4,3,5,3,11,4,13,6,2,8,1,3,0,17,12,4,3,4,4,9,8,8,2,5,3,11,2,3,1,5,0,1,1,0,0
3 | 0,0,1,2,0,1,0,1,6,9,4,3,7,5,4,14,8,5,0,10,5,4,9,12,4,5,5,8,6,2,0,0,1,7,6,1,1,0,0,0
4 | 0,0,0,0,0,3,7,3,2,1,5,2,8,0,10,3,3,1,13,11,12,0,5,0,5,6,10,8,4,3,2,11,3,5,1,1,0,0,0,0
5 | 0,0,1,0,0,2,1,2,3,9,6,14,4,6,13,8,7,13,0,15,3,1,3,1,6,4,4,0,4,2,5,11,3,5,6,0,0,1,0,0
6 | 0,0,0,1,1,2,2,7,4,5,4,6,5,3,11,6,2,14,1,15,8,3,3,1,2,0,8,3,14,5,4,1,2,4,6,3,1,1,0,0
7 | 0,0,1,2,1,0,0,1,1,5,2,14,9,1,12,12,1,13,10,14,2,13,4,16,8,0,12,2,3,4,7,6,3,2,1,0,0,0,0,0
8 | 0,0,0,2,0,5,2,5,1,5,6,11,2,4,2,5,0,0,4,1,7,1,7,17,12,5,5,4,3,6,1,0,5,5,0,2,3,0,0,0
9 | 0,0,0,2,0,2,6,3,1,6,3,9,8,4,11,5,0,10,9,3,9,5,5,16,10,2,2,8,10,1,1,0,0,2,2,1,3,0,0,0
10 | 0,0,0,2,0,3,3,5,6,0,0,12,7,0,16,8,2,19,0,7,2,13,5,8,11,2,5,5,1,5,7,11,5,3,3,3,2,0,0,0
11 | 0,0,0,1,1,5,3,7,5,8,5,10,4,4,13,8,8,6,14,14,8,9,3,1,0,1,6,4,10,4,5,2,1,1,6,0,0,0,0,0
12 | 0,0,0,0,1,4,4,4,1,3,2,10,3,5,10,14,6,11,2,8,10,12,2,3,10,4,0,8,14,1,1,6,1,5,1,2,1,0,0,0
13 | 0,0,1,3,0,2,5,1,5,1,3,8,5,6,7,6,7,14,4,15,0,2,3,0,2,8,16,1,13,0,3,5,0,4,1,2,0,1,0,0
14 | 0,0,0,2,0,4,5,7,6,11,4,14,2,1,14,8,4,5,8,2,8,11,6,9,10,3,14,8,10,2,11,4,1,1,1,0,0,1,0,0
15 | 0,0,0,0,0,5,4,1,6,0,3,7,7,5,11,8,1,7,2,13,12,5,6,10,16,2,12,3,3,6,10,1,2,3,4,3,2,0,0,0
16 | 0,0,1,0,1,1,1,6,2,5,6,2,1,1,4,6,0,10,12,15,6,1,5,0,8,3,1,4,4,6,3,2,2,5,4,0,1,0,0,0
17 | 0,0,1,1,1,0,2,0,1,8,6,5,3,1,2,12,5,12,2,14,9,8,4,1,14,1,11,7,10,1,5,6,4,7,6,3,2,1,0,0
18 | 0,0,1,0,1,0,7,2,0,6,4,7,9,0,12,5,2,18,5,15,6,5,0,4,16,7,2,6,2,5,11,7,1,0,1,3,1,0,0,0
19 | 0,0,1,2,0,1,4,6,1,12,6,9,0,2,8,3,6,0,7,12,5,15,10,5,16,7,1,4,11,6,6,5,2,4,1,1,1,0,0,0
20 | 0,0,1,3,0,0,2,3,0,4,2,9,7,1,11,11,8,0,16,0,4,7,10,14,0,7,15,1,11,4,1,0,2,4,2,3,1,1,0,0
21 | 0,0,1,2,1,3,0,3,6,4,4,5,9,3,14,7,4,20,6,0,6,7,7,5,8,7,13,8,3,4,9,10,3,2,0,0,3,1,0,0
22 | 0,0,0,0,0,3,5,4,1,5,3,14,7,6,15,15,7,20,16,11,10,17,5,16,2,3,9,8,14,0,7,3,5,1,2,0,1,1,0,0
23 | 0,0,1,0,1,0,3,5,6,1,3,13,6,2,16,0,8,11,7,1,10,12,0,1,12,6,6,4,12,3,0,0,1,1,3,1,3,0,0,0
24 | 0,0,1,2,1,5,7,5,3,9,5,12,1,4,1,15,1,5,8,9,5,1,7,3,7,2,12,7,11,2,5,0,0,0,1,0,3,0,0,0
25 | 0,0,0,3,0,0,6,3,5,1,6,14,6,0,4,2,9,13,12,13,8,6,4,15,8,5,16,7,13,3,11,3,4,1,5,3,1,1,0,0
26 | 0,0,1,2,0,2,1,6,1,7,1,1,5,6,15,1,4,10,12,8,0,6,2,1,9,4,10,7,6,6,11,2,5,5,5,0,1,0,0,0
27 | 0,0,0,3,1,1,4,1,0,4,4,5,1,6,14,16,4,10,12,8,10,1,0,19,3,2,9,7,3,5,8,5,4,6,0,3,0,0,0,0
28 | 0,0,1,3,0,3,1,5,1,10,3,5,2,1,0,6,2,9,0,3,12,5,3,5,16,2,9,1,13,5,11,1,1,5,1,2,0,1,0,0
29 | 0,0,0,1,1,3,2,1,4,4,3,7,3,6,1,17,4,11,0,13,6,8,8,2,3,5,0,7,14,1,4,7,0,0,1,1,3,0,0,0
30 | 0,0,0,3,0,3,1,0,4,0,3,1,1,6,5,5,7,18,9,9,2,15,8,1,9,7,8,3,5,5,2,4,0,1,0,1,1,1,0,0
31 | 0,0,1,0,0,5,0,3,2,8,1,6,4,6,15,9,6,15,11,6,3,11,3,13,7,0,7,0,5,0,5,1,1,0,1,0,2,0,0,0
32 | 0,0,0,2,0,1,1,7,2,4,0,8,6,0,1,12,3,5,15,0,12,13,9,7,15,5,16,5,13,5,3,0,0,3,4,0,3,1,0,0
33 | 0,0,0,0,1,5,3,0,1,4,0,12,1,5,13,3,8,20,15,4,2,14,1,6,3,8,3,8,7,3,0,9,1,0,1,1,3,0,0,0
34 | 0,0,0,2,1,3,1,5,2,11,1,2,4,2,4,5,1,7,2,14,5,16,3,16,6,2,15,1,8,2,4,3,4,4,3,1,1,0,0,0
35 | 0,0,0,0,1,0,7,7,2,7,5,10,2,1,17,10,4,8,16,4,9,3,3,8,5,7,6,0,4,1,1,3,0,6,2,1,3,1,0,0
36 | 0,0,0,2,0,1,1,5,5,8,6,8,9,1,7,9,2,3,11,5,10,9,0,15,14,4,14,7,8,1,4,7,1,0,6,1,3,0,0,0
37 | 0,0,1,0,0,1,4,4,5,5,0,8,6,1,17,16,7,20,7,13,5,16,9,6,10,7,4,5,3,5,0,5,2,1,5,0,2,1,0,0
38 | 0,0,1,2,1,1,2,5,3,9,1,5,0,4,2,14,7,6,16,16,6,2,5,13,10,8,8,8,6,5,5,10,1,7,6,1,0,0,0,0
39 | 0,0,1,2,0,0,4,1,0,11,3,0,1,0,14,13,4,16,12,0,10,12,0,18,16,2,10,3,5,5,4,8,5,1,3,3,0,1,0,0
40 | 0,0,0,3,0,5,2,4,3,8,6,9,4,5,10,4,6,17,13,10,5,11,5,18,8,1,4,3,13,0,2,5,0,0,3,1,1,1,0,0
41 | 0,0,1,3,1,4,7,5,5,8,1,3,2,6,8,1,8,3,17,16,1,10,1,9,3,6,1,1,8,0,0,3,5,4,3,2,2,1,0,0
42 | 0,0,1,0,1,3,4,2,1,0,6,14,2,6,13,6,1,18,15,11,9,17,8,15,2,1,9,5,5,4,1,11,3,7,6,3,2,1,0,0
43 | 0,0,1,0,0,5,7,4,1,6,3,2,5,0,16,11,2,6,16,0,7,4,5,7,13,4,2,8,9,2,0,2,1,2,3,3,0,0,0,0
44 | 0,0,0,1,0,5,4,6,1,7,5,14,4,0,12,3,3,13,2,8,11,13,0,0,10,0,15,0,13,1,10,3,0,1,5,3,2,1,0,0
45 | 0,0,1,3,0,0,1,4,5,6,2,9,6,3,3,2,7,19,6,1,12,9,8,18,11,4,7,6,5,1,1,4,4,2,1,2,1,0,0,0
46 | 0,0,1,3,0,5,7,5,4,11,3,0,3,1,10,2,5,8,12,7,11,7,2,9,15,7,7,0,14,4,0,6,4,6,4,1,2,0,0,0
47 | 0,0,1,3,0,4,5,3,5,1,3,10,4,2,2,16,6,1,12,1,11,5,2,5,14,2,2,3,10,6,0,3,5,7,6,0,3,1,0,0
48 | 0,0,1,1,1,0,2,2,3,2,4,4,6,4,6,13,6,11,15,2,10,3,3,2,6,8,7,5,13,3,0,7,3,2,2,0,2,1,0,0
49 | 0,0,1,3,0,3,0,3,6,3,5,9,3,3,10,3,9,1,9,6,12,13,8,11,16,4,2,3,1,5,1,9,4,0,5,3,2,0,0,0
50 | 0,0,1,1,0,1,6,2,5,8,0,7,2,5,13,14,0,19,4,16,9,2,6,16,3,3,6,0,11,0,1,9,2,2,5,1,2,1,0,0
51 | 0,0,1,1,0,3,4,4,5,5,0,6,7,3,14,9,8,7,6,1,0,13,9,3,1,2,5,0,12,5,5,0,5,7,3,1,0,1,0,0
52 | 0,0,1,0,1,2,2,1,5,0,6,8,8,5,3,13,3,6,6,15,7,12,2,19,16,8,10,0,7,1,3,6,3,2,4,1,0,0,0,0
53 | 0,0,1,2,1,3,6,5,0,7,5,7,2,1,11,1,5,4,1,2,6,7,7,7,13,4,2,2,9,1,12,0,4,6,1,0,3,1,0,0
54 | 0,0,1,1,1,3,0,4,3,8,0,1,1,4,2,6,6,6,7,13,12,15,3,12,13,8,11,1,8,2,0,1,2,0,0,2,2,1,0,0
55 | 0,0,0,0,0,3,6,3,3,3,0,11,8,6,4,0,3,17,8,2,8,5,3,18,5,8,1,6,0,6,12,1,3,6,0,1,0,0,0,0
56 | 0,0,1,3,0,2,6,5,6,7,2,10,1,4,14,11,1,19,14,8,10,14,10,4,11,8,8,2,3,5,2,2,3,6,5,0,1,0,0,0
57 | 0,0,1,2,1,2,4,5,3,10,5,10,0,4,12,8,2,12,8,8,4,14,1,13,2,8,6,5,1,4,3,2,3,6,1,2,1,0,0,0
58 | 0,0,1,3,0,3,2,0,3,2,6,11,3,1,0,3,3,0,11,1,6,3,4,16,3,2,13,6,9,4,1,7,5,3,3,1,3,1,0,0
59 | 0,0,1,2,1,0,4,3,1,6,4,14,4,3,14,17,1,0,8,5,4,4,10,2,14,5,11,0,6,4,4,5,0,3,0,0,2,1,0,0
60 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
61 |
--------------------------------------------------------------------------------
/data/sample-data/sample-data-02.csv:
--------------------------------------------------------------------------------
1 | 0,0,0,1,3,4,6,5,2,7,7,8,6,11,5,6,10,4,5,9,15,15,14,13,14,12,10,9,8,8,6,6,6,6,5,4,2,1,1,0
2 | 0,0,2,2,4,2,1,7,5,7,3,6,10,5,5,14,14,9,11,10,5,5,5,15,6,6,10,13,6,8,3,5,7,7,3,2,2,0,2,1
3 | 0,1,2,3,2,1,4,1,8,7,4,5,10,3,11,5,11,8,18,4,17,9,5,6,15,14,11,5,6,4,7,2,5,6,4,5,4,0,2,1
4 | 0,0,0,0,1,2,4,7,3,5,8,7,5,13,10,7,11,8,18,6,13,4,10,13,5,5,4,3,8,9,2,3,2,3,5,3,1,3,1,1
5 | 0,1,0,2,1,2,3,6,5,2,9,3,5,12,9,5,8,11,9,4,19,19,15,9,6,12,9,3,6,2,9,9,8,5,3,5,3,0,2,1
6 | 0,0,1,3,4,4,2,2,6,3,2,9,4,11,12,8,6,8,8,7,18,11,13,13,10,5,7,11,3,6,9,6,4,5,1,4,1,0,0,0
7 | 0,1,0,3,2,3,2,2,4,6,4,11,11,8,3,9,11,7,12,16,10,5,17,8,11,15,6,8,11,10,6,9,4,3,3,3,1,3,1,1
8 | 0,1,2,1,4,1,2,7,2,2,8,9,5,6,12,12,6,14,6,5,12,11,11,5,10,7,6,6,10,2,4,5,8,3,5,3,3,0,0,1
9 | 0,0,1,2,2,1,4,2,7,4,10,6,4,3,6,5,8,13,8,8,12,4,13,4,13,4,14,5,12,10,6,3,2,1,5,3,4,3,2,0
10 | 0,1,1,1,4,2,1,3,5,3,2,3,11,3,4,5,14,4,5,8,18,18,13,5,11,4,13,12,11,4,10,9,3,3,6,3,2,0,2,0
11 | 0,1,0,0,2,3,5,2,5,8,4,7,7,8,5,5,8,15,14,4,10,12,8,14,11,14,5,13,4,6,8,3,6,5,5,2,4,2,2,1
12 | 0,1,2,3,2,5,6,4,2,4,6,9,6,9,6,6,14,11,6,18,6,13,18,7,15,13,3,12,8,8,5,2,5,7,4,2,2,3,2,0
13 | 0,0,0,1,4,1,4,1,7,7,8,7,7,4,9,3,8,17,17,9,13,19,5,10,8,7,5,3,7,4,6,5,4,1,5,2,1,0,0,0
14 | 0,0,2,1,1,4,5,7,8,5,5,3,6,9,7,8,10,10,13,19,18,15,4,11,6,4,8,11,7,5,4,3,7,3,5,4,4,0,1,0
15 | 0,1,0,3,4,3,3,7,6,8,4,11,6,10,10,7,12,9,11,17,10,16,17,4,5,8,4,8,10,8,5,5,4,7,4,2,3,1,0,1
16 | 0,0,0,3,1,3,5,1,6,5,3,4,8,11,11,3,4,12,14,17,7,9,4,8,8,15,3,12,9,10,6,6,3,3,2,5,4,3,1,0
17 | 0,1,0,0,4,5,5,6,8,9,2,11,4,13,5,15,13,5,13,7,7,5,12,4,12,10,7,4,4,10,10,7,8,2,4,3,4,0,1,1
18 | 0,0,2,0,2,3,2,4,4,3,10,5,8,9,8,12,15,10,9,4,17,5,13,12,15,5,8,10,9,5,3,9,4,2,6,4,2,0,1,1
19 | 0,0,2,1,3,4,3,2,7,3,5,7,9,8,6,3,7,12,13,15,20,7,5,17,13,5,5,13,8,6,8,4,5,1,1,5,3,2,1,1
20 | 0,0,0,3,4,2,2,5,2,8,6,10,7,13,7,11,10,6,12,14,8,7,9,12,11,5,5,13,7,7,4,9,4,7,2,1,2,3,0,1
21 | 0,1,1,2,4,1,6,3,8,8,8,9,8,7,12,9,5,7,9,11,8,7,11,6,8,13,14,5,3,7,10,6,8,6,5,4,4,2,0,0
22 | 0,0,2,3,3,1,5,3,3,6,8,4,12,8,12,11,14,9,5,7,11,13,13,4,13,12,14,6,7,5,3,4,3,1,1,3,4,3,2,1
23 | 0,1,2,3,2,4,1,3,6,2,10,11,7,3,9,6,11,15,4,19,16,9,18,4,6,12,6,5,9,6,9,5,2,4,6,2,1,3,2,1
24 | 0,1,0,3,4,5,6,5,4,3,3,9,9,13,10,12,14,7,15,16,15,7,15,6,9,7,10,9,4,8,2,6,8,2,6,4,1,3,0,1
25 | 0,1,0,1,4,2,2,7,7,8,7,11,9,5,5,6,14,7,6,14,8,17,5,13,8,6,13,13,10,10,4,2,2,7,6,3,4,1,1,1
26 | 0,0,2,2,2,4,3,7,6,9,10,10,3,5,14,14,9,15,16,17,15,10,4,14,12,6,8,12,4,3,6,4,8,3,2,5,1,1,2,1
27 | 0,1,0,2,3,5,3,6,3,7,6,5,11,7,14,9,7,8,6,4,12,5,12,6,5,6,3,7,3,8,7,7,4,7,5,3,2,2,2,0
28 | 0,1,1,0,2,3,4,1,3,8,8,8,7,6,6,11,13,9,9,9,10,14,8,5,13,4,5,3,3,2,9,2,2,6,5,2,1,1,1,1
29 | 0,0,2,3,4,5,2,3,8,6,6,5,10,8,7,15,14,6,6,6,8,7,12,10,7,12,5,8,12,11,4,5,5,6,6,2,2,2,0,0
30 | 0,0,1,1,3,2,4,3,4,8,4,3,4,13,11,14,6,6,15,16,10,19,10,15,14,13,7,9,4,2,6,8,2,1,1,5,4,2,1,1
31 | 0,1,0,2,2,2,3,1,4,9,9,2,5,6,13,7,13,8,17,15,7,13,11,13,9,5,7,13,10,5,9,3,8,4,6,1,2,3,1,1
32 | 0,0,1,1,1,3,5,4,2,2,6,10,9,9,5,5,5,11,18,18,6,14,12,8,15,5,4,4,11,4,5,7,3,4,6,3,2,1,2,1
33 | 0,1,0,2,2,5,2,3,2,9,4,2,12,11,6,4,9,11,4,18,19,5,4,6,7,7,10,13,9,2,8,4,3,5,4,2,3,0,0,1
34 | 0,1,1,3,2,5,2,5,2,2,9,5,10,11,14,14,15,8,4,13,6,13,11,13,9,5,10,12,8,8,2,2,2,2,6,5,3,1,1,0
35 | 0,1,1,2,2,3,2,7,7,8,7,9,4,5,3,9,8,8,11,19,5,16,13,7,16,12,8,7,11,8,3,4,6,1,1,1,4,3,1,0
36 | 0,1,1,2,4,4,4,4,4,5,5,11,3,5,6,13,8,14,5,14,9,6,9,15,9,6,4,7,4,6,7,2,4,4,4,3,1,2,0,1
37 | 0,0,2,1,1,1,4,7,3,2,9,7,11,4,5,4,16,16,9,4,16,5,16,17,4,9,6,4,10,11,9,9,6,4,6,1,1,0,2,1
38 | 0,0,1,1,3,1,4,4,4,7,9,2,3,11,5,10,12,8,6,6,16,13,10,6,7,10,9,7,4,6,5,7,4,3,6,3,1,2,1,1
39 | 0,0,1,0,3,3,1,7,4,8,8,2,12,5,12,15,4,12,12,13,20,8,14,5,14,15,6,5,4,4,6,9,5,1,2,1,4,2,0,0
40 | 0,1,0,1,4,2,2,5,4,7,3,11,3,12,11,6,4,15,15,16,8,4,16,15,8,7,12,10,5,5,9,5,8,1,3,4,4,2,0,0
41 | 0,0,1,2,3,5,4,6,7,7,2,8,9,6,4,9,7,14,6,11,17,16,13,12,16,12,6,5,8,3,8,5,3,1,4,3,1,2,0,1
42 | 0,1,2,3,1,3,5,2,2,4,5,9,12,4,7,13,15,4,15,12,15,18,5,16,4,15,8,9,4,9,2,2,6,1,2,3,3,2,1,0
43 | 0,1,1,1,2,2,6,3,5,2,10,4,7,13,3,5,14,10,9,16,18,11,15,5,9,14,8,4,3,3,2,8,4,1,4,1,1,1,2,1
44 | 0,1,1,2,1,1,5,3,5,4,9,8,11,3,5,15,6,6,8,19,8,15,18,10,12,10,10,6,9,3,10,9,7,6,3,3,1,2,0,0
45 | 0,1,2,0,2,1,4,1,5,7,3,2,5,6,6,9,4,17,11,10,16,12,17,13,10,7,13,6,8,9,8,3,8,2,6,1,1,3,2,0
46 | 0,0,1,2,3,2,3,5,3,9,8,4,3,9,8,14,6,15,13,4,17,8,9,17,9,5,6,8,10,6,3,7,4,4,3,1,1,0,2,0
47 | 0,1,1,0,1,1,2,7,8,6,4,4,9,3,10,14,14,11,6,8,18,5,13,10,4,5,3,12,9,7,8,8,2,4,3,4,3,2,1,1
48 | 0,0,0,1,1,1,4,2,5,4,10,9,7,9,3,15,12,6,14,17,16,18,5,8,10,12,10,11,11,8,10,9,8,5,1,3,4,3,0,1
49 | 0,0,0,2,4,4,1,2,7,4,7,7,10,7,14,9,6,17,8,8,8,9,6,15,15,12,10,9,11,6,4,7,7,2,4,1,4,1,1,1
50 | 0,1,1,1,1,1,1,3,3,4,10,2,6,7,12,8,6,5,11,19,8,10,6,9,15,7,13,7,10,3,3,8,2,2,1,3,2,0,2,1
51 | 0,1,1,2,2,4,5,3,4,6,2,3,10,3,7,15,10,8,12,7,13,12,9,7,8,4,9,8,12,10,6,2,4,3,4,3,3,1,0,0
52 | 0,0,0,1,3,2,6,5,6,6,7,8,3,13,5,12,4,12,10,18,13,7,7,4,15,13,5,8,10,3,7,6,3,4,5,5,2,1,1,0
53 | 0,0,1,0,2,2,3,3,4,8,5,2,8,7,9,7,9,4,7,4,6,11,10,10,8,14,4,5,3,10,6,5,8,3,6,2,3,3,2,0
54 | 0,0,2,2,2,1,6,4,4,2,2,3,7,4,8,15,8,12,17,10,17,8,13,13,8,7,3,9,6,2,3,4,8,2,1,1,2,1,2,0
55 | 0,0,0,1,4,2,1,4,8,7,7,10,12,5,4,4,12,7,18,9,16,19,11,7,14,8,11,11,10,9,9,8,4,7,6,5,2,2,1,1
56 | 0,0,2,2,4,2,3,6,4,5,4,2,5,4,11,13,4,10,16,16,6,16,7,14,5,7,11,10,12,10,8,6,4,1,2,2,4,1,2,0
57 | 0,1,0,2,2,1,6,2,2,2,9,5,9,12,5,12,10,13,9,4,17,14,5,10,12,3,13,4,9,8,8,6,7,4,4,5,4,0,2,0
58 | 0,0,2,0,4,3,5,5,6,9,4,5,4,3,10,3,7,11,12,10,19,16,17,14,16,9,12,5,10,11,6,7,7,3,3,1,1,0,2,0
59 | 0,0,0,3,3,1,5,7,7,7,6,8,7,6,10,14,6,12,5,15,20,18,14,17,14,11,13,10,9,5,5,5,5,7,1,5,3,2,2,0
60 | 0,1,2,0,4,5,6,6,2,5,10,10,3,7,13,9,5,16,6,18,15,10,13,11,12,15,10,12,3,8,8,7,5,6,2,5,2,3,2,0
61 |
--------------------------------------------------------------------------------
/data/sample-data/sample-data-09.csv:
--------------------------------------------------------------------------------
1 | 0,0,0,2,4,5,5,4,4,6,8,2,3,8,7,13,8,14,17,6,5,15,14,13,8,6,9,9,11,10,3,5,3,1,5,4,4,3,2,1
2 | 0,1,0,1,3,1,5,3,8,5,8,7,11,4,14,13,9,6,15,12,6,5,11,11,14,5,6,6,5,5,8,5,5,4,5,2,2,3,2,1
3 | 0,1,2,2,4,1,4,2,7,5,10,6,12,3,9,9,9,5,6,12,14,19,9,6,7,6,14,3,11,2,2,4,3,7,4,5,3,0,2,1
4 | 0,1,1,2,2,1,5,3,5,6,3,7,9,8,11,9,4,16,4,17,13,12,8,4,9,13,5,6,8,10,3,8,2,4,6,2,2,3,0,1
5 | 0,1,2,3,3,5,3,4,4,6,8,7,10,11,6,13,4,6,5,6,10,10,17,6,9,14,13,5,3,9,9,3,7,1,6,1,3,0,1,1
6 | 0,1,0,2,4,2,4,5,5,6,4,4,5,4,10,10,10,11,11,4,18,11,14,14,12,5,13,4,7,11,10,4,2,5,6,1,2,3,0,1
7 | 0,0,0,3,2,4,3,1,6,4,2,6,7,8,10,6,16,10,15,5,16,18,16,4,9,13,7,11,6,7,7,6,5,6,5,4,2,1,2,1
8 | 0,0,1,2,1,1,2,1,8,8,10,7,8,7,6,14,11,9,4,8,6,9,18,6,7,12,4,6,8,3,3,8,2,1,3,1,3,3,0,0
9 | 0,0,2,1,3,1,2,7,2,8,7,7,9,13,5,10,9,10,5,16,7,4,8,6,10,13,11,5,8,4,3,3,5,5,4,5,3,3,1,0
10 | 0,1,1,2,3,5,1,4,5,6,4,6,9,13,11,7,5,8,9,12,8,6,4,14,14,14,14,11,4,8,6,7,3,1,1,5,1,3,1,0
11 | 0,1,0,3,1,2,4,4,6,3,8,9,4,10,10,3,12,17,18,15,13,19,10,8,8,5,12,10,11,4,7,3,7,6,3,5,1,3,2,0
12 | 0,1,0,1,1,3,1,5,8,3,5,5,12,6,6,10,14,11,7,18,19,16,5,15,4,15,4,7,10,11,2,3,4,1,5,2,1,0,1,0
13 | 0,1,0,1,4,2,2,2,2,7,4,4,7,3,6,6,15,10,17,17,20,7,6,16,4,7,9,13,3,11,5,5,7,2,5,3,1,3,1,0
14 | 0,1,0,3,1,1,2,7,6,5,8,2,11,4,3,10,8,9,18,6,20,11,14,9,9,10,4,6,3,10,7,7,4,7,2,5,3,1,2,0
15 | 0,1,0,3,4,3,1,4,3,7,3,6,7,3,11,3,6,7,14,12,18,12,13,9,11,13,13,7,8,4,10,6,7,2,6,2,1,3,0,1
16 | 0,0,0,0,4,1,3,7,5,6,2,6,9,6,3,5,13,14,16,18,9,13,4,4,16,9,11,6,12,2,8,4,4,3,6,3,3,2,2,0
17 | 0,0,0,1,4,4,1,2,8,7,4,9,10,12,11,13,9,10,12,16,7,14,16,17,15,10,12,11,10,5,10,4,7,5,3,1,3,3,0,1
18 | 0,1,1,0,1,5,6,6,2,9,10,7,5,11,9,15,8,11,4,8,15,19,4,13,5,9,11,3,9,10,10,2,7,1,3,1,3,2,1,0
19 | 0,0,2,3,4,3,3,6,6,2,6,11,11,10,10,6,9,5,9,17,7,8,9,13,11,9,10,8,5,7,3,4,6,2,4,2,3,1,1,0
20 | 0,1,2,3,2,3,3,3,5,9,6,9,10,9,14,10,6,4,16,14,6,8,12,7,15,14,7,8,3,10,2,6,2,4,2,1,2,3,2,0
21 | 0,1,2,1,1,4,4,1,5,2,9,4,9,11,9,3,5,13,13,6,16,10,12,16,10,3,10,8,4,7,5,9,5,5,1,5,3,2,2,1
22 | 0,0,0,1,4,4,6,2,4,8,8,4,5,6,14,12,7,5,8,14,5,7,8,17,15,6,5,9,8,8,8,5,2,3,4,1,4,0,1,1
23 | 0,1,1,3,2,2,6,4,5,9,7,8,4,13,3,11,7,7,17,8,12,11,9,6,13,14,11,13,10,3,5,4,7,6,3,4,1,3,1,1
24 | 0,0,2,2,3,4,5,1,7,6,5,6,6,8,10,4,16,15,5,7,6,9,14,11,14,8,6,10,5,11,8,4,5,3,2,1,4,3,1,1
25 | 0,1,1,1,2,2,1,3,5,6,5,6,7,5,8,12,9,5,4,7,12,13,7,14,15,9,3,11,7,9,4,6,2,1,6,1,3,3,2,1
26 | 0,1,0,0,4,5,3,5,5,8,8,5,9,7,8,5,4,4,18,14,16,13,12,7,7,12,12,9,10,6,10,3,2,1,4,3,3,0,2,0
27 | 0,1,0,3,3,5,1,2,3,5,5,5,5,9,11,8,5,6,15,13,9,14,13,6,6,4,6,13,9,6,9,4,6,2,4,3,3,2,0,1
28 | 0,0,0,3,4,5,5,2,3,2,4,7,4,5,4,13,12,14,12,12,11,8,17,17,5,3,7,4,9,2,4,7,7,7,6,1,1,1,2,0
29 | 0,1,0,0,1,5,5,6,3,3,8,9,9,6,7,14,14,9,18,6,12,13,10,12,16,5,10,13,9,7,9,2,6,7,5,3,2,1,1,1
30 | 0,0,2,3,3,2,6,2,5,8,5,10,5,8,9,3,4,13,17,5,7,6,5,10,6,12,7,10,4,11,5,9,5,1,3,2,2,1,0,0
31 | 0,0,1,3,1,5,4,2,4,8,3,7,3,13,6,11,16,16,17,13,13,11,7,17,16,7,4,12,9,10,10,9,5,7,3,2,3,2,1,1
32 | 0,1,1,1,1,2,3,4,8,5,6,8,6,13,7,14,7,12,15,10,5,7,6,6,13,11,10,4,6,11,2,4,2,7,5,5,1,1,0,0
33 | 0,0,2,2,3,2,3,1,8,9,6,6,10,12,6,9,7,12,11,17,15,18,15,13,15,3,11,9,8,10,2,2,3,7,2,2,4,2,2,0
34 | 0,1,0,2,2,3,5,3,3,5,5,4,12,5,10,4,6,10,10,6,13,9,13,12,13,12,11,8,9,9,8,9,5,3,2,2,1,0,0,1
35 | 0,1,1,3,1,5,4,4,6,6,10,10,8,4,4,11,15,6,6,7,10,15,11,17,6,13,7,9,11,6,10,2,3,2,2,5,1,1,0,1
36 | 0,0,0,3,3,2,2,7,7,9,2,8,4,3,7,12,5,5,4,18,19,9,15,13,11,14,9,7,10,6,7,5,8,7,5,1,1,0,2,0
37 | 0,0,2,1,1,5,5,1,7,9,3,9,5,6,8,8,12,4,12,14,18,5,7,11,16,14,12,11,8,5,3,9,2,4,6,4,4,1,2,0
38 | 0,1,0,2,1,2,5,5,2,2,3,11,5,5,6,3,6,9,10,7,14,8,7,7,14,14,5,10,5,8,9,9,6,5,1,1,3,3,1,0
39 | 0,1,1,2,3,4,1,5,6,7,4,2,11,11,11,8,13,4,11,16,12,18,18,11,9,5,3,7,7,11,7,5,4,5,3,1,2,2,1,1
40 | 0,1,2,2,4,2,2,4,4,7,9,8,12,3,6,7,14,9,7,13,9,11,10,12,10,4,4,11,5,7,8,4,6,1,4,5,3,0,1,1
41 | 0,1,2,3,4,1,2,7,5,3,8,7,6,12,6,13,14,11,16,8,8,9,5,15,4,11,10,3,9,7,9,3,7,1,4,5,4,0,1,0
42 | 0,0,1,1,1,5,5,5,8,7,10,10,11,3,3,7,16,8,9,18,13,5,18,4,16,13,5,7,9,4,5,9,6,2,2,3,3,1,0,1
43 | 0,1,1,0,1,3,1,5,5,8,9,6,8,12,13,10,10,11,9,13,14,11,12,15,8,4,11,4,8,8,8,6,6,4,2,5,4,0,1,1
44 | 0,0,0,0,4,4,6,7,7,8,4,5,7,3,14,9,5,15,13,12,20,16,14,15,6,13,6,13,5,4,5,3,2,5,2,4,4,0,0,0
45 | 0,1,0,3,1,3,2,5,5,5,6,2,5,7,9,13,6,17,16,4,15,5,11,13,6,15,9,8,9,9,5,7,5,6,5,4,2,0,2,0
46 | 0,0,1,3,3,3,2,5,3,4,2,11,4,7,11,3,12,4,10,17,6,17,9,7,12,8,8,6,10,5,4,3,3,1,2,4,1,0,2,1
47 | 0,1,1,0,2,3,3,3,6,5,4,11,4,4,9,7,9,16,6,13,10,9,6,13,5,7,12,8,11,7,9,5,6,7,5,1,4,2,2,0
48 | 0,0,1,0,4,2,2,2,3,9,2,9,3,3,9,12,16,9,13,5,15,16,13,5,15,9,11,11,11,7,10,7,6,6,1,2,2,2,0,0
49 | 0,1,2,3,2,1,5,6,5,6,10,5,5,12,6,5,11,15,17,12,11,5,18,9,6,10,5,11,9,6,5,8,8,4,4,2,4,3,2,0
50 | 0,1,0,0,2,1,1,1,4,9,10,5,7,3,5,9,12,17,7,10,9,9,18,13,5,7,3,7,7,8,6,8,6,2,1,3,3,2,0,1
51 | 0,0,0,0,1,1,5,5,8,4,9,2,12,3,4,4,5,5,13,15,17,12,5,17,5,5,11,6,4,8,3,9,3,1,2,2,3,3,2,0
52 | 0,1,2,1,4,1,6,6,3,3,4,9,8,10,9,7,16,7,5,4,20,18,7,6,7,6,11,7,11,9,3,9,5,3,5,5,3,1,2,1
53 | 0,0,0,2,3,1,2,6,3,6,10,11,6,13,5,9,11,8,13,16,20,8,13,5,13,6,6,8,5,3,2,5,3,6,5,4,2,3,2,1
54 | 0,1,1,1,2,2,5,5,5,9,6,4,6,12,4,5,11,17,5,19,10,6,8,7,10,13,14,4,8,2,7,3,2,5,4,5,4,3,0,0
55 | 0,0,0,3,1,1,6,3,4,8,10,10,6,12,13,9,6,10,18,8,8,4,4,15,6,7,14,11,5,2,8,3,3,6,4,1,3,1,1,1
56 | 0,0,1,0,4,1,2,4,7,2,6,7,7,7,13,7,11,7,8,8,5,11,10,12,14,10,6,9,11,8,4,2,8,7,4,2,3,0,0,0
57 | 0,0,2,0,4,3,5,7,5,7,3,8,6,3,11,11,6,9,6,10,5,14,17,17,10,8,3,12,11,10,10,2,8,3,1,1,2,1,2,1
58 | 0,1,2,0,4,3,6,5,2,9,7,2,8,11,9,9,8,14,17,8,15,13,4,4,8,11,13,3,12,2,7,5,3,7,4,1,3,2,0,1
59 | 0,0,0,2,3,2,3,6,3,7,7,3,12,5,7,12,12,15,9,18,11,13,5,15,8,11,3,11,12,11,2,2,2,5,2,3,3,1,1,0
60 | 0,1,2,0,4,2,2,7,5,5,9,8,4,9,7,7,9,12,10,6,18,14,14,10,6,8,4,5,5,10,5,9,7,1,1,4,2,0,0,1
61 |
--------------------------------------------------------------------------------
/data/sample-data/sample-data-06.csv:
--------------------------------------------------------------------------------
1 | 0,0,2,0,3,4,5,7,6,7,8,4,4,6,9,5,10,12,16,8,19,17,16,16,12,12,12,9,8,4,2,8,3,5,6,3,2,2,0,0
2 | 0,1,0,2,2,4,2,4,2,8,7,8,5,6,12,3,13,14,18,4,10,17,14,11,9,15,3,10,3,8,10,7,6,3,6,1,1,3,0,0
3 | 0,0,1,0,2,2,2,5,5,7,7,6,8,5,7,13,14,11,15,16,6,14,11,10,9,5,4,7,8,7,4,7,2,1,5,2,3,2,0,1
4 | 0,0,0,2,4,2,2,2,4,4,5,8,5,9,8,13,8,9,11,15,7,8,18,14,16,3,6,7,9,6,8,7,2,3,2,2,1,2,0,1
5 | 0,1,2,1,3,2,5,7,3,8,3,6,5,5,3,15,16,6,15,6,18,13,4,10,5,5,12,3,7,7,3,3,4,6,6,1,1,0,2,0
6 | 0,0,1,2,3,1,1,7,8,2,2,6,8,12,12,14,6,5,18,12,13,6,17,8,14,3,4,7,7,4,5,7,4,5,2,2,4,0,2,1
7 | 0,1,0,0,4,3,1,3,8,6,9,3,10,6,3,14,7,15,18,6,7,4,10,5,9,12,4,6,8,5,10,9,4,7,1,5,1,3,1,0
8 | 0,1,0,3,2,1,4,6,5,6,3,9,11,13,11,15,16,13,18,7,9,6,15,10,16,5,7,10,9,9,3,4,7,2,4,2,4,0,2,1
9 | 0,0,1,3,1,4,6,5,5,8,7,8,4,13,6,14,16,16,11,8,16,11,8,8,16,8,6,4,11,5,6,8,7,3,5,4,2,3,0,1
10 | 0,0,0,2,3,3,5,2,3,3,4,2,8,10,5,13,7,4,15,9,11,5,12,4,11,7,4,6,6,3,4,3,8,2,1,5,4,1,2,0
11 | 0,1,2,3,3,5,5,3,2,6,10,9,6,6,10,3,11,4,7,7,20,5,9,8,9,4,6,4,6,8,8,2,5,1,2,1,3,2,2,0
12 | 0,1,1,3,1,1,4,3,5,4,3,6,9,13,10,10,12,14,14,12,5,14,10,9,10,10,11,4,10,6,4,9,2,6,4,2,2,3,2,0
13 | 0,0,1,1,2,3,3,4,7,7,7,9,9,13,12,8,10,15,18,9,11,7,5,13,13,9,4,10,4,8,6,5,7,1,6,2,4,3,2,1
14 | 0,1,0,3,1,3,1,2,3,8,5,5,4,4,6,5,10,7,7,19,15,5,11,6,11,11,7,8,5,8,6,4,6,6,4,1,1,2,1,1
15 | 0,1,1,2,4,3,4,1,6,7,6,2,10,12,9,8,8,14,18,15,16,15,16,9,10,12,14,12,8,5,4,5,2,7,5,1,4,3,1,0
16 | 0,0,2,1,4,1,5,4,5,6,10,11,3,5,13,11,4,8,13,11,6,10,12,5,16,4,9,5,3,4,7,4,6,7,5,2,3,2,2,0
17 | 0,1,2,1,4,4,4,3,2,9,7,2,9,3,11,12,14,8,18,9,8,13,4,12,14,3,10,12,8,8,10,8,6,2,6,3,1,1,2,0
18 | 0,0,1,2,3,4,6,7,2,3,6,5,12,13,4,12,8,14,13,18,7,18,9,9,15,7,12,11,4,7,10,7,2,3,2,5,4,0,1,0
19 | 0,1,1,1,1,3,1,4,8,3,3,10,6,10,9,5,11,10,6,9,19,4,18,7,10,15,3,3,10,9,10,3,6,1,1,2,3,2,1,0
20 | 0,1,0,3,4,5,5,3,6,2,8,4,10,8,12,12,11,4,18,6,19,5,7,14,14,5,8,4,10,6,3,8,7,1,6,5,3,2,0,1
21 | 0,1,0,1,4,1,1,5,5,3,4,3,11,6,11,11,6,12,13,10,16,5,15,15,12,5,13,5,8,6,9,7,3,3,3,1,4,2,1,1
22 | 0,0,2,1,4,2,1,4,4,5,6,11,7,10,8,7,16,11,16,11,9,7,6,17,9,3,4,6,9,11,7,5,8,6,4,2,1,3,2,1
23 | 0,0,0,0,2,5,5,1,6,2,8,3,8,13,10,7,7,6,4,9,7,8,17,15,8,14,4,12,5,3,9,7,7,6,3,5,2,3,0,1
24 | 0,0,0,1,4,4,3,7,8,8,10,11,10,11,7,4,13,8,12,13,12,17,7,16,7,8,4,10,5,7,9,2,7,7,3,1,3,1,0,0
25 | 0,0,1,2,1,5,4,7,2,4,9,10,4,4,10,11,5,8,11,6,8,17,5,15,12,11,8,8,5,5,3,5,4,5,1,4,4,1,1,0
26 | 0,0,2,2,1,4,6,5,8,5,6,9,7,7,10,5,14,7,7,13,6,11,7,11,8,12,10,5,4,5,10,5,3,1,1,2,1,3,2,1
27 | 0,0,2,0,3,1,4,3,7,8,3,11,3,10,9,9,7,5,7,10,9,7,6,7,7,4,11,6,5,7,3,5,3,4,2,2,2,1,1,0
28 | 0,0,2,3,3,3,1,5,3,2,4,11,9,11,14,5,11,14,6,18,14,7,10,13,10,15,13,10,12,5,3,5,6,3,5,2,3,2,0,0
29 | 0,0,2,1,2,3,5,5,6,7,5,4,12,9,5,14,6,14,7,4,7,17,9,9,12,14,6,13,4,3,6,9,8,7,3,1,1,2,1,1
30 | 0,0,0,3,2,3,1,4,8,8,2,2,8,3,5,8,7,4,16,11,18,12,8,9,7,10,12,8,8,7,9,8,5,2,1,5,4,2,1,0
31 | 0,0,0,0,3,4,6,6,8,5,2,9,8,8,11,8,10,12,8,13,9,5,5,17,13,9,3,5,11,4,4,2,4,5,5,2,4,1,1,0
32 | 0,1,2,2,2,1,5,7,2,6,10,4,7,8,4,9,5,15,12,11,13,9,7,16,6,7,13,4,3,6,5,3,3,5,2,3,4,1,0,1
33 | 0,0,1,3,1,5,1,7,5,5,2,7,6,11,10,8,13,16,6,7,11,4,11,14,13,7,6,4,3,10,4,8,2,7,4,4,2,1,1,0
34 | 0,1,1,3,3,1,3,6,2,8,5,6,12,4,4,13,15,17,12,11,6,11,4,7,11,8,13,6,4,9,8,6,2,1,6,1,1,1,2,0
35 | 0,1,0,0,3,3,4,6,2,8,4,9,6,4,8,14,15,16,7,18,6,8,13,7,6,7,9,6,4,7,10,3,7,7,6,4,1,1,1,0
36 | 0,1,1,0,2,5,6,3,8,2,9,9,4,4,9,9,13,14,10,17,10,19,11,12,5,13,7,5,6,5,3,4,4,1,5,2,3,1,1,1
37 | 0,1,1,2,2,1,2,2,8,4,8,10,10,13,7,9,12,5,10,10,17,14,9,12,7,15,11,9,4,11,7,2,5,6,6,4,2,0,1,1
38 | 0,1,1,2,4,1,6,6,7,9,6,2,3,7,14,3,12,14,17,9,17,5,7,15,11,4,8,11,8,7,8,3,6,3,6,2,2,0,2,0
39 | 0,0,0,2,4,5,6,1,6,8,5,9,12,9,12,9,15,4,14,4,18,13,11,8,12,14,11,10,3,7,10,6,2,3,6,4,1,2,2,0
40 | 0,0,0,3,4,5,6,5,5,9,6,3,9,12,14,13,16,14,18,9,6,15,7,10,6,5,7,7,10,11,10,2,6,6,2,2,1,3,1,1
41 | 0,0,1,1,1,5,4,3,5,9,8,10,9,13,5,4,14,7,10,14,20,7,7,12,14,8,12,5,7,8,10,5,7,4,2,4,4,2,0,0
42 | 0,0,1,0,1,2,1,4,6,6,10,5,6,13,4,9,7,10,5,10,18,14,16,10,7,8,11,8,3,2,3,9,4,7,3,2,2,0,2,0
43 | 0,1,1,2,1,1,3,7,2,8,10,10,7,9,10,5,13,4,12,17,5,5,16,16,15,9,7,3,10,10,2,9,3,4,1,4,1,0,0,0
44 | 0,1,0,3,1,3,6,1,2,5,2,11,6,10,8,5,6,8,17,14,16,4,15,13,16,5,5,8,10,7,5,6,6,6,5,2,4,0,0,0
45 | 0,0,2,0,4,5,6,5,6,4,3,6,11,6,11,13,13,4,5,4,9,15,7,5,5,7,12,5,8,3,3,6,4,5,5,2,3,3,0,0
46 | 0,1,2,2,4,1,4,2,6,8,8,3,8,13,6,8,16,11,18,16,11,11,12,6,9,6,12,4,11,6,10,4,5,3,4,5,2,0,1,0
47 | 0,1,0,3,2,4,2,6,5,7,4,3,8,4,8,3,7,7,11,13,7,7,10,17,5,4,6,7,6,3,8,8,8,2,5,3,2,1,2,0
48 | 0,0,0,0,2,1,5,3,3,7,8,9,5,7,8,4,11,9,12,18,6,7,11,16,10,3,6,6,12,5,3,4,2,4,4,5,2,2,1,1
49 | 0,0,1,2,4,3,6,5,4,6,8,7,9,9,13,11,14,7,5,11,9,14,16,11,12,13,7,3,7,10,3,6,4,2,4,4,3,1,1,1
50 | 0,0,2,3,1,2,4,2,3,3,3,10,5,13,7,9,15,13,6,17,14,4,12,10,12,8,13,11,10,3,7,4,2,7,5,5,3,1,0,0
51 | 0,1,0,0,2,1,2,3,3,7,2,9,9,6,12,14,15,13,18,17,14,10,8,14,4,6,3,8,3,11,9,4,2,6,5,3,1,3,0,0
52 | 0,0,1,2,2,2,6,2,3,2,4,8,10,7,6,11,6,17,4,17,12,15,17,11,4,9,9,13,3,7,5,2,5,4,6,2,2,0,1,0
53 | 0,0,2,3,4,2,6,3,4,3,4,7,10,11,11,14,16,6,6,17,7,12,17,7,9,7,10,4,3,8,9,9,6,6,6,4,1,0,1,1
54 | 0,0,1,2,1,5,4,3,8,2,10,11,9,7,8,4,15,7,13,9,12,9,15,13,9,11,11,4,9,5,5,7,3,6,6,2,3,1,1,0
55 | 0,1,1,0,3,2,2,7,2,5,7,9,12,4,5,9,16,11,9,15,18,5,10,13,7,11,3,13,6,11,2,8,7,7,4,4,3,2,0,1
56 | 0,1,0,1,2,2,4,3,6,5,2,4,10,3,8,7,11,10,9,12,11,16,12,14,9,3,10,12,5,2,5,8,7,6,4,1,4,3,2,1
57 | 0,1,0,3,3,1,3,2,3,2,10,5,6,4,3,11,8,7,14,12,7,14,8,9,14,14,3,11,8,9,5,3,6,3,1,3,3,2,2,0
58 | 0,0,2,2,4,3,1,3,4,4,7,3,10,9,11,8,5,8,14,16,16,18,9,12,14,3,9,11,7,8,2,3,7,4,3,4,3,2,2,0
59 | 0,1,0,2,4,1,4,3,6,8,7,7,6,7,6,14,9,7,4,18,13,14,18,4,7,6,10,9,12,10,10,9,6,5,2,3,2,1,0,1
60 | 0,0,1,1,4,3,5,1,3,6,6,6,12,5,7,12,16,14,10,10,9,10,9,8,9,9,6,12,12,2,5,4,8,5,6,5,1,3,2,0
61 |
--------------------------------------------------------------------------------
/data/sample-data/sample-data-07.csv:
--------------------------------------------------------------------------------
1 | 0,1,0,2,2,5,6,2,4,7,2,2,11,5,6,4,4,7,18,17,9,5,7,15,10,4,10,3,3,2,3,4,3,7,3,3,4,1,1,1
2 | 0,1,0,2,3,4,1,5,3,9,2,5,8,10,10,14,15,16,7,9,10,14,6,9,4,6,6,12,7,3,9,5,6,7,3,2,1,0,0,1
3 | 0,0,1,2,3,4,6,7,6,4,5,9,6,13,5,12,8,10,7,6,7,12,8,13,6,9,14,6,12,2,9,9,3,3,2,2,1,1,1,0
4 | 0,1,2,2,1,1,3,4,7,4,2,7,12,6,9,10,12,8,11,15,5,16,18,10,16,8,7,8,5,4,6,8,4,4,5,2,1,2,2,1
5 | 0,0,2,1,2,5,3,5,6,4,4,2,9,3,10,15,5,17,16,6,6,16,7,6,13,8,4,5,3,10,2,2,8,5,3,3,2,1,0,0
6 | 0,0,1,0,2,5,1,1,7,5,3,10,8,10,7,6,10,11,8,17,8,17,7,7,7,14,8,9,4,5,8,3,7,3,3,5,4,2,2,0
7 | 0,1,0,3,1,1,1,1,6,5,7,3,4,4,9,10,12,8,5,19,14,15,11,5,4,13,7,10,3,5,5,5,8,5,1,3,4,1,0,0
8 | 0,0,1,0,1,2,1,1,6,7,10,10,6,13,11,6,6,11,5,5,14,18,14,14,5,3,12,5,7,8,4,5,7,1,3,4,4,2,2,0
9 | 0,0,2,1,1,4,6,5,5,6,2,2,6,4,10,6,5,15,12,5,12,14,9,16,8,10,9,7,4,10,5,5,7,3,1,3,2,2,1,0
10 | 0,0,2,2,1,1,6,4,6,3,10,6,12,5,5,10,8,6,10,14,15,17,17,4,15,12,7,3,11,6,8,4,4,1,5,4,1,3,1,1
11 | 0,1,2,0,2,2,4,7,4,4,4,3,6,3,9,8,13,12,8,5,6,12,14,5,10,6,7,10,11,7,6,4,8,3,4,5,4,1,1,0
12 | 0,0,2,0,4,2,2,5,3,6,6,7,9,4,3,13,16,10,16,5,12,7,12,5,5,12,4,12,4,9,6,4,6,5,4,3,1,3,0,1
13 | 0,1,0,3,1,5,1,5,7,4,10,4,7,12,11,8,13,17,5,15,18,12,5,17,13,3,8,4,12,2,7,3,8,7,5,4,4,3,0,1
14 | 0,0,1,2,2,4,5,3,6,8,4,11,8,4,4,4,6,17,5,10,15,15,7,13,16,12,4,9,8,4,4,5,4,6,5,2,4,1,0,0
15 | 0,0,0,3,1,4,6,5,4,3,5,9,9,9,8,5,5,5,17,10,19,10,8,9,11,4,9,7,3,8,4,6,3,6,4,4,1,3,2,1
16 | 0,1,1,1,2,2,1,7,2,5,9,5,8,3,7,3,5,7,10,10,13,8,4,5,8,12,7,8,12,2,9,4,4,1,5,3,2,3,1,0
17 | 0,0,1,3,4,5,5,1,3,3,8,2,5,3,8,14,15,5,6,8,16,15,7,12,11,11,7,4,12,7,4,8,8,1,6,2,1,1,2,1
18 | 0,1,1,1,1,4,2,4,4,4,6,8,11,13,12,3,9,11,14,17,12,16,8,13,7,15,14,9,10,7,7,3,2,2,1,3,3,1,0,1
19 | 0,0,1,3,4,1,6,3,4,3,7,3,9,5,12,7,8,11,17,17,13,7,7,5,14,5,11,4,7,2,9,4,7,1,3,4,1,1,1,0
20 | 0,0,1,3,3,2,5,3,6,4,5,8,12,4,12,13,7,5,16,12,20,4,16,7,5,3,10,11,5,10,10,7,2,7,4,5,2,3,2,0
21 | 0,0,1,0,2,2,2,1,4,8,10,4,12,9,6,9,5,13,15,12,20,12,12,11,15,10,4,7,4,7,6,2,5,7,5,5,1,0,2,0
22 | 0,0,1,0,1,2,4,4,3,2,2,5,10,5,10,4,10,16,9,14,5,16,11,13,5,3,9,13,7,6,3,7,2,7,1,1,4,1,1,1
23 | 0,1,1,1,3,3,4,3,2,8,10,9,4,13,4,15,10,12,4,15,7,9,16,16,7,8,8,10,5,9,4,3,4,5,6,2,1,1,0,1
24 | 0,1,0,2,2,4,1,4,5,8,10,5,8,13,10,4,5,7,16,18,20,10,13,12,15,12,12,13,9,9,10,3,3,3,6,4,2,3,1,0
25 | 0,0,1,1,4,5,2,1,2,8,10,7,4,5,11,11,7,7,17,6,14,5,17,8,9,15,9,12,12,5,8,6,6,3,1,1,2,3,1,1
26 | 0,1,1,0,3,1,4,5,4,2,10,4,10,12,5,7,13,9,18,5,8,19,13,8,7,14,4,13,3,11,3,7,3,2,1,1,2,3,2,1
27 | 0,1,2,2,4,2,3,6,4,2,5,7,10,8,5,11,8,16,14,19,11,5,10,10,4,9,9,11,7,9,5,9,3,7,2,4,3,2,1,1
28 | 0,0,1,1,3,4,3,3,4,6,4,5,3,12,11,14,14,9,13,7,19,5,14,16,16,11,10,10,9,3,6,3,4,5,6,1,3,0,0,1
29 | 0,0,0,1,2,4,6,7,7,2,3,5,9,10,8,3,9,13,9,13,17,10,13,14,11,13,13,12,3,3,7,8,7,4,3,3,1,0,0,0
30 | 0,0,2,2,3,5,6,3,7,8,8,11,4,6,6,3,13,5,10,11,14,19,14,12,7,10,14,10,7,4,4,5,2,5,4,1,4,1,2,1
31 | 0,0,2,1,1,2,1,2,8,8,8,5,5,5,11,3,16,6,9,13,15,8,15,5,15,6,7,7,11,2,2,6,3,1,6,5,3,2,1,0
32 | 0,1,1,3,2,5,3,3,4,6,7,2,7,6,14,6,15,13,7,5,5,12,10,7,6,15,14,12,4,6,3,8,7,5,2,4,4,3,1,0
33 | 0,1,1,3,1,5,1,7,8,6,8,8,7,7,7,10,6,17,9,10,15,12,11,13,4,8,11,9,11,5,7,5,4,1,3,4,3,0,2,1
34 | 0,1,0,1,3,3,2,2,4,8,8,4,5,6,6,10,14,5,6,13,12,16,15,12,7,6,4,7,10,7,7,7,3,6,5,3,3,2,0,0
35 | 0,1,1,1,1,1,5,6,4,6,8,9,12,10,7,15,16,14,13,15,15,7,13,11,7,7,11,13,7,3,10,3,3,1,6,2,2,1,1,1
36 | 0,1,2,2,4,4,5,1,2,3,10,3,12,10,11,7,10,8,4,11,14,19,16,14,8,7,14,5,5,4,3,6,4,4,2,3,3,3,1,1
37 | 0,0,1,1,4,1,4,7,6,7,2,6,7,6,12,13,9,9,16,6,16,4,14,6,14,14,9,11,6,11,5,3,4,5,3,3,3,0,1,1
38 | 0,1,0,3,4,2,5,7,5,2,3,10,12,8,7,7,10,10,5,18,13,18,16,13,9,12,12,6,12,6,5,2,7,7,5,1,4,1,1,0
39 | 0,1,1,2,3,2,1,3,8,5,10,7,9,7,6,7,5,4,14,4,14,18,11,13,6,13,6,13,4,11,7,8,2,2,1,5,4,1,2,1
40 | 0,1,1,2,3,2,5,1,3,3,10,10,7,12,4,11,13,9,10,12,13,6,11,11,6,7,11,11,12,3,5,7,3,5,2,3,4,0,0,1
41 | 0,1,2,2,1,5,6,1,4,4,5,4,8,10,4,4,13,16,6,11,13,18,4,15,15,4,5,4,8,3,6,6,2,1,1,1,4,3,2,0
42 | 0,1,1,3,3,3,2,1,2,9,2,2,6,9,10,3,5,16,9,6,18,16,12,8,11,15,7,11,4,8,8,4,8,3,2,1,3,2,2,1
43 | 0,0,1,0,4,1,2,5,7,8,6,4,10,6,5,3,16,16,4,12,14,10,17,10,13,12,10,10,8,2,4,3,5,7,5,3,4,2,1,0
44 | 0,1,0,3,3,1,4,5,5,7,7,8,4,7,13,12,16,7,4,8,5,9,10,17,16,7,9,13,4,6,8,6,5,5,2,3,2,3,0,0
45 | 0,0,2,3,3,2,3,7,7,7,2,8,11,7,10,6,12,5,6,7,14,14,5,4,13,4,9,6,3,10,4,2,3,7,1,1,3,3,0,1
46 | 0,1,1,3,3,4,3,2,6,2,3,5,6,10,6,6,7,6,12,19,19,8,5,14,12,6,4,8,11,6,2,4,4,2,5,2,4,1,0,0
47 | 0,1,1,0,2,3,4,4,6,7,7,9,11,3,10,15,5,9,9,9,20,17,12,6,9,11,3,5,12,11,6,7,4,6,1,1,1,2,1,0
48 | 0,1,2,3,4,3,2,1,4,7,3,10,6,10,4,3,15,12,15,6,11,14,8,4,12,6,4,12,11,7,9,8,6,2,1,2,4,3,1,0
49 | 0,1,2,0,1,2,3,7,5,5,4,9,8,4,4,14,6,8,17,15,5,19,8,6,15,5,12,9,8,7,5,5,7,7,2,2,4,1,0,1
50 | 0,0,1,1,4,5,1,3,5,2,9,10,7,11,5,12,14,15,12,15,16,11,4,6,16,6,12,12,4,2,10,4,8,4,2,5,4,3,2,1
51 | 0,1,0,1,3,5,1,7,4,5,4,7,7,6,13,13,10,14,5,9,16,4,7,9,14,12,5,6,9,11,4,6,5,6,2,3,1,0,0,0
52 | 0,0,0,0,4,3,4,7,3,8,3,6,9,3,3,9,15,6,11,8,20,8,15,10,12,4,14,5,4,9,4,9,5,7,3,4,1,1,1,0
53 | 0,1,1,1,2,2,6,1,2,3,7,3,3,7,5,13,12,6,5,7,7,6,17,11,4,10,12,7,11,7,8,6,5,4,1,4,3,3,2,1
54 | 0,1,1,0,3,4,5,5,8,7,8,6,5,12,4,8,7,8,13,7,6,17,8,4,8,15,3,7,5,11,5,8,6,2,4,4,2,3,0,0
55 | 0,1,2,3,1,3,1,1,5,4,2,9,12,8,7,6,16,15,9,15,16,18,4,12,16,3,12,12,12,10,7,5,2,5,5,3,4,2,2,1
56 | 0,1,2,0,2,3,1,1,2,4,9,6,6,13,7,3,6,13,14,17,12,6,11,14,12,5,13,5,8,11,4,2,6,7,6,4,4,3,2,0
57 | 0,0,0,0,2,5,4,3,3,6,8,8,9,9,10,11,16,5,8,13,11,6,5,12,14,8,4,3,6,6,5,7,7,4,2,4,3,2,2,1
58 | 0,1,2,2,4,2,3,2,4,4,8,8,6,4,3,8,9,12,16,19,5,5,10,11,16,15,11,8,5,6,6,4,4,6,6,4,3,3,2,1
59 | 0,1,0,2,3,4,4,4,4,7,2,6,5,9,14,8,13,12,13,10,7,18,15,17,14,15,3,11,6,3,10,4,3,3,2,1,3,1,0,0
60 | 0,0,0,0,1,3,3,6,2,5,7,7,10,6,12,4,9,15,13,14,15,7,13,16,16,14,9,4,12,11,6,8,6,3,5,3,1,3,0,1
61 |
--------------------------------------------------------------------------------
/data/sample-data/sample-data-10.csv:
--------------------------------------------------------------------------------
1 | 0,1,0,0,3,2,3,6,7,5,10,9,10,9,5,15,12,14,13,9,15,17,4,4,4,8,5,4,7,10,3,4,4,1,1,3,1,3,0,0
2 | 0,1,1,3,2,3,4,3,8,3,4,7,10,5,6,6,8,16,14,5,10,11,7,11,14,13,6,6,3,4,5,3,5,2,1,3,4,0,1,0
3 | 0,1,0,3,1,1,3,5,6,2,2,8,11,9,14,4,13,6,16,15,8,7,6,17,15,14,14,10,10,10,4,4,6,7,4,5,1,0,0,1
4 | 0,0,2,3,3,1,5,6,8,9,6,9,4,13,5,7,15,4,12,8,8,8,15,10,12,14,3,13,12,2,10,4,6,3,3,4,3,0,1,0
5 | 0,0,2,0,1,4,1,3,4,7,8,9,9,11,7,4,13,14,11,16,11,13,10,6,12,11,11,5,11,10,7,4,4,5,1,5,4,3,1,0
6 | 0,1,0,0,1,5,2,3,5,2,10,9,3,12,14,6,13,8,4,9,19,5,11,5,15,15,10,6,4,9,9,7,7,3,5,5,2,0,0,0
7 | 0,1,1,3,3,1,2,5,7,4,10,7,12,3,3,12,10,6,18,5,9,7,11,14,9,5,10,8,9,9,6,7,6,1,6,1,2,0,2,1
8 | 0,0,1,2,4,2,6,6,2,3,10,3,12,7,14,9,15,11,8,17,9,8,7,8,15,3,9,7,10,7,9,4,6,7,5,1,2,1,2,0
9 | 0,0,0,2,3,2,5,6,4,4,6,10,9,6,8,5,11,10,10,8,11,11,13,6,4,7,9,5,8,8,3,2,2,2,4,5,1,2,1,0
10 | 0,0,1,0,2,5,6,2,6,9,6,5,8,3,10,11,8,8,6,7,6,13,9,12,10,4,4,8,11,11,5,8,6,2,5,2,2,3,0,1
11 | 0,0,0,1,4,4,1,7,5,3,3,2,4,5,6,13,9,10,4,19,5,9,16,16,5,10,4,7,8,4,6,2,5,4,5,1,2,3,2,0
12 | 0,0,1,1,4,4,4,4,6,3,3,7,11,12,8,6,9,13,9,13,15,8,16,16,9,4,7,5,4,9,8,2,3,3,1,4,3,2,0,1
13 | 0,1,2,2,1,3,2,3,4,5,10,2,4,6,11,10,13,9,15,18,14,6,12,9,16,9,10,11,5,7,3,3,8,3,4,5,4,1,0,0
14 | 0,0,0,2,4,3,4,2,7,7,8,5,12,5,13,5,11,8,18,13,20,19,10,6,15,15,8,6,7,6,9,3,7,3,5,2,1,1,1,1
15 | 0,0,1,0,3,2,1,4,7,9,4,5,7,13,12,15,13,14,12,7,19,10,7,14,13,13,14,11,11,4,6,8,6,7,6,3,4,2,2,1
16 | 0,0,0,0,2,2,3,1,5,4,4,11,8,5,10,15,16,7,5,10,7,16,14,12,7,10,11,6,11,4,5,4,4,3,1,1,3,2,1,1
17 | 0,1,2,3,4,5,5,7,2,7,5,10,4,13,5,10,6,5,8,11,18,9,13,9,8,14,11,7,6,6,10,9,6,3,6,3,3,3,0,1
18 | 0,1,2,0,2,5,2,3,7,6,8,6,11,11,13,6,12,7,4,12,6,4,8,5,16,11,13,12,7,3,9,7,8,4,4,2,1,3,2,0
19 | 0,1,0,3,4,2,4,4,3,3,10,7,8,7,11,10,12,10,17,7,10,17,12,9,16,11,10,4,6,4,9,2,2,6,1,2,2,0,2,0
20 | 0,1,1,1,1,3,4,3,8,6,4,8,11,3,6,13,9,6,18,9,11,5,12,14,10,4,10,3,12,2,3,7,3,6,6,5,3,2,1,1
21 | 0,1,0,3,2,3,4,5,8,2,2,4,9,10,12,15,12,8,16,5,7,15,12,14,14,12,5,7,11,4,8,2,6,2,1,5,2,2,1,1
22 | 0,1,2,1,4,4,1,2,5,6,10,7,3,10,13,15,7,17,13,4,17,19,16,7,14,12,8,6,3,2,9,7,3,2,4,2,1,2,2,0
23 | 0,0,1,1,1,5,4,5,6,7,8,10,4,8,5,14,13,6,15,17,16,13,5,16,8,14,4,7,7,6,7,2,8,2,6,1,2,2,2,1
24 | 0,1,2,1,1,2,1,5,2,6,2,8,3,3,5,7,10,7,10,15,7,11,10,16,10,8,7,9,9,6,7,5,3,4,5,3,4,3,2,0
25 | 0,0,2,1,3,3,3,6,7,4,3,6,3,6,4,8,5,10,5,6,20,10,18,4,13,12,8,11,4,6,8,5,2,3,5,4,1,0,0,0
26 | 0,1,0,0,1,2,5,7,6,3,8,7,6,3,8,6,14,8,11,17,19,6,18,17,12,10,8,11,12,4,10,2,4,5,6,4,1,2,0,1
27 | 0,1,0,2,2,1,4,3,5,5,3,10,6,6,6,13,6,14,10,8,12,4,10,11,9,4,7,5,4,5,3,3,5,7,2,2,2,2,2,0
28 | 0,1,2,1,3,3,6,2,7,4,6,9,8,5,4,13,4,12,13,5,10,5,10,9,6,14,8,9,3,5,5,2,7,5,4,3,3,3,1,0
29 | 0,1,2,0,3,4,4,6,8,6,8,9,9,10,11,13,16,5,6,15,10,16,14,11,16,15,10,9,10,10,5,5,8,7,5,3,2,3,1,1
30 | 0,0,1,1,3,5,3,4,3,4,8,3,8,12,13,10,10,6,5,18,17,17,7,7,14,6,3,9,11,2,2,3,2,2,2,3,4,1,1,0
31 | 0,1,1,3,1,1,6,3,3,5,10,7,12,7,14,4,11,17,6,9,17,4,15,15,4,5,8,6,7,7,2,2,5,4,3,1,4,0,2,0
32 | 0,1,2,3,3,4,6,6,8,7,3,5,3,9,9,12,7,15,4,5,16,10,6,11,10,12,5,7,12,10,2,4,7,6,2,4,2,1,0,0
33 | 0,0,1,3,4,4,2,4,8,5,7,6,4,3,3,9,15,8,10,15,6,11,18,8,15,13,4,8,10,10,9,4,4,4,2,5,4,2,1,0
34 | 0,1,2,1,2,1,5,6,5,7,6,7,12,5,7,13,11,13,13,19,14,15,6,10,10,4,10,10,4,5,10,3,4,6,5,1,1,2,1,0
35 | 0,1,0,3,4,4,5,5,8,6,9,7,11,11,8,7,5,12,15,9,11,7,8,12,8,15,9,4,10,8,3,7,3,6,1,5,2,3,1,0
36 | 0,1,1,1,1,4,5,3,3,6,9,7,6,8,4,12,5,4,13,7,13,15,18,4,7,15,6,8,8,8,8,4,6,7,2,3,3,0,0,1
37 | 0,1,0,2,3,3,5,6,5,2,8,11,10,13,3,7,9,16,11,12,8,16,18,11,10,13,10,8,8,10,6,5,3,1,2,3,2,2,1,0
38 | 0,1,0,1,1,2,4,6,5,8,10,9,5,10,9,15,8,6,11,10,8,7,17,7,13,10,9,6,9,9,2,8,7,3,1,3,1,0,2,1
39 | 0,1,2,2,2,5,3,2,2,8,3,11,7,9,5,5,6,16,16,11,17,19,14,8,9,13,12,5,7,9,10,2,2,6,1,5,1,1,1,1
40 | 0,0,1,0,1,1,6,1,6,9,6,4,4,4,4,5,4,15,18,11,7,4,4,17,4,12,13,12,7,4,7,3,7,6,4,4,3,2,2,1
41 | 0,1,2,1,2,2,5,1,3,7,5,8,5,7,9,4,14,8,18,14,9,10,12,11,8,5,13,6,10,6,7,8,4,6,4,3,2,1,1,0
42 | 0,0,1,2,4,5,6,7,4,7,9,2,11,10,14,12,12,7,11,14,13,12,14,17,6,7,3,11,4,8,3,3,3,7,6,4,4,3,2,1
43 | 0,1,2,1,4,5,3,7,3,4,10,5,10,8,11,4,10,4,13,7,12,16,9,17,11,11,11,13,9,3,6,9,7,2,3,3,3,1,1,0
44 | 0,0,0,0,1,3,6,2,4,5,10,2,4,3,5,8,16,16,16,12,18,18,14,8,13,3,3,9,7,3,3,8,8,5,1,5,3,1,2,1
45 | 0,1,0,1,3,3,4,7,3,8,9,7,5,8,3,10,5,7,15,13,5,4,6,6,16,7,3,4,9,11,9,9,4,1,2,4,2,3,2,0
46 | 0,1,1,3,4,3,6,6,2,9,9,11,9,10,13,9,7,5,15,18,8,16,18,13,10,6,4,6,6,10,6,5,8,1,2,4,3,1,0,1
47 | 0,0,2,0,4,1,1,3,3,7,5,2,4,6,6,11,7,4,5,15,19,11,13,8,8,13,6,13,7,4,9,5,2,2,6,2,3,3,2,0
48 | 0,1,0,1,1,2,3,3,7,3,5,7,12,10,8,3,16,5,14,10,10,9,8,15,6,12,4,7,8,10,7,4,4,6,6,1,3,3,1,0
49 | 0,0,0,1,1,4,4,1,6,6,3,3,12,6,13,11,16,12,8,8,8,18,5,14,9,15,7,13,6,9,2,4,3,6,6,3,1,0,0,0
50 | 0,1,1,2,2,4,6,6,8,6,6,6,9,5,9,14,15,7,18,4,8,7,6,11,6,10,3,7,7,10,7,9,5,3,4,2,3,3,1,1
51 | 0,1,0,2,3,4,5,1,2,4,5,2,7,13,9,4,16,12,5,11,8,6,16,6,16,8,8,10,6,8,8,9,4,5,2,1,4,1,0,1
52 | 0,0,2,0,4,2,5,1,2,6,10,3,6,13,4,13,10,10,6,6,13,6,6,8,14,12,13,10,11,8,3,4,8,7,2,3,2,0,1,1
53 | 0,1,2,1,2,3,2,5,7,2,2,2,5,8,7,7,6,17,18,13,7,13,17,12,6,13,5,13,3,2,4,5,7,7,1,1,3,2,1,0
54 | 0,0,2,2,1,3,6,6,4,3,8,5,4,9,13,4,8,15,7,7,6,19,12,16,10,14,3,10,3,9,7,7,7,2,4,3,1,1,2,1
55 | 0,0,0,2,1,4,3,6,7,9,5,7,11,3,7,6,10,5,6,15,10,14,10,5,15,15,7,13,5,5,9,2,7,5,4,3,4,1,1,0
56 | 0,1,1,3,1,2,4,6,5,5,6,8,10,7,8,11,15,17,4,10,10,10,6,5,5,11,6,7,11,6,3,4,8,1,3,4,2,2,1,0
57 | 0,1,1,0,1,1,2,2,4,3,2,11,4,4,13,3,8,7,5,5,18,9,18,17,7,7,7,10,5,10,2,9,3,4,4,3,1,2,0,0
58 | 0,1,1,2,3,5,2,2,7,8,7,5,3,13,3,14,11,14,14,14,14,5,13,15,6,12,6,8,9,8,9,7,4,7,1,2,1,2,0,0
59 | 0,0,2,2,3,1,2,6,3,2,7,8,6,11,4,12,12,11,18,14,6,11,8,16,9,3,7,13,6,3,4,3,3,2,1,1,3,2,2,1
60 | 0,0,2,0,1,2,2,7,3,2,4,4,9,7,6,8,10,5,14,5,16,16,8,6,5,3,5,9,12,6,8,7,3,6,3,1,1,3,2,0
61 |
--------------------------------------------------------------------------------
/data/sample-data/sample-data-12.csv:
--------------------------------------------------------------------------------
1 | 0,0,2,3,3,1,6,6,3,6,10,6,8,5,5,8,16,12,13,5,13,18,11,12,11,9,10,13,9,4,4,7,7,3,1,5,3,1,1,1
2 | 0,1,0,1,1,1,1,1,7,7,4,2,7,8,4,6,16,17,13,5,17,5,17,8,5,10,3,5,5,5,8,9,4,4,3,4,1,3,0,0
3 | 0,1,2,0,4,5,6,2,5,3,8,3,8,11,7,9,7,4,8,11,5,18,4,5,6,6,5,13,7,4,7,9,4,3,5,5,2,2,2,1
4 | 0,0,2,0,3,2,6,7,5,6,8,5,8,11,13,8,5,11,10,11,9,12,17,5,4,15,7,5,11,3,5,8,4,4,5,4,2,0,2,1
5 | 0,1,1,3,3,1,1,3,4,3,6,3,9,6,6,7,5,15,18,4,9,12,9,4,9,4,9,11,10,8,10,2,6,1,6,4,4,2,0,1
6 | 0,1,0,0,4,2,2,4,4,6,5,9,8,3,14,11,5,7,5,14,9,7,15,10,11,5,11,12,4,7,10,6,6,2,6,3,4,0,2,1
7 | 0,1,2,1,3,1,5,3,8,6,3,3,12,13,12,6,15,10,5,4,16,10,12,14,15,10,6,4,8,7,7,4,5,4,3,5,4,0,0,0
8 | 0,1,1,0,4,5,5,2,7,5,3,5,3,8,11,13,15,9,14,19,16,11,10,17,7,8,3,7,8,9,9,5,4,2,4,1,4,3,1,0
9 | 0,0,2,2,3,5,1,4,6,4,7,7,3,13,7,3,7,6,18,18,10,12,17,8,7,15,13,13,11,4,9,9,8,1,3,1,4,2,1,1
10 | 0,0,1,2,4,5,1,7,7,7,8,2,8,13,10,10,5,16,17,13,8,6,9,5,11,15,5,6,5,10,2,3,8,4,6,4,2,2,0,1
11 | 0,0,2,3,3,2,3,4,8,7,6,11,5,6,8,8,9,16,7,13,14,11,17,9,9,3,11,8,3,6,4,9,8,3,6,3,4,3,2,0
12 | 0,0,1,3,4,1,3,4,8,2,9,6,3,11,11,6,8,6,17,13,17,12,5,11,4,15,7,4,9,4,8,6,3,4,1,5,4,1,1,0
13 | 0,0,0,0,3,4,5,6,6,9,10,6,7,12,9,6,15,15,9,7,10,14,15,10,5,15,13,12,8,5,7,5,4,2,2,1,2,1,0,0
14 | 0,1,0,0,3,5,2,4,3,2,6,5,9,3,6,13,12,16,10,4,15,10,4,15,13,15,4,3,11,2,8,4,5,2,5,1,3,0,0,1
15 | 0,0,2,3,2,2,6,5,2,2,9,2,6,12,14,12,6,6,17,4,8,10,8,10,6,12,13,11,8,5,10,8,7,1,5,2,1,2,1,1
16 | 0,0,0,0,3,2,2,3,7,9,4,9,4,10,6,14,6,10,6,10,12,13,5,6,12,14,9,8,11,3,10,2,5,6,3,5,3,3,1,0
17 | 0,1,1,2,4,2,6,4,8,9,8,6,5,9,12,8,9,6,11,8,6,18,4,16,11,14,9,10,3,10,9,6,6,1,5,2,4,1,1,0
18 | 0,0,0,1,3,3,3,6,7,8,10,8,11,10,10,10,5,6,5,9,15,11,5,17,6,13,5,11,11,3,3,3,6,1,3,2,1,2,2,1
19 | 0,0,1,0,1,5,3,1,8,3,8,5,3,6,7,14,14,5,7,17,13,14,11,6,14,11,10,13,8,6,3,8,3,5,2,3,4,3,0,0
20 | 0,1,1,1,1,4,3,6,4,4,6,8,6,13,10,12,5,15,17,8,15,16,5,10,4,12,12,13,10,4,7,7,2,4,2,2,4,3,2,0
21 | 0,1,1,3,4,3,3,4,7,2,3,10,4,8,10,6,14,5,9,5,14,5,4,17,11,11,7,7,12,8,10,6,6,2,3,1,2,1,2,1
22 | 0,1,2,0,2,4,3,1,7,2,10,2,11,7,3,13,7,11,9,14,10,7,14,4,5,10,8,12,8,6,10,9,2,4,4,5,3,0,1,0
23 | 0,1,0,1,2,4,3,6,5,5,8,11,6,5,11,5,15,7,11,15,17,5,16,5,11,7,11,4,12,7,8,3,8,5,3,1,1,3,0,0
24 | 0,1,2,0,4,5,6,1,5,2,4,4,8,9,7,12,8,12,9,7,5,6,14,10,14,13,10,8,4,9,8,4,3,5,5,3,4,1,0,0
25 | 0,1,1,1,4,3,1,3,6,5,6,2,5,10,8,11,4,8,4,15,20,19,11,4,10,7,8,10,6,6,3,3,6,1,4,3,3,0,2,1
26 | 0,1,2,1,3,1,4,1,4,6,7,2,11,13,6,12,13,14,12,18,18,7,12,6,14,15,3,11,6,5,7,4,6,1,2,4,3,2,2,1
27 | 0,1,1,1,4,2,6,3,7,7,7,2,6,11,3,6,10,15,10,16,6,17,16,7,8,3,10,7,3,8,6,3,7,2,5,5,2,1,1,1
28 | 0,0,2,2,4,5,4,7,6,8,4,8,3,3,3,13,5,16,5,19,16,16,7,13,16,11,7,12,7,11,5,9,5,7,2,4,3,1,0,0
29 | 0,0,2,3,3,5,1,6,3,8,6,6,4,10,5,11,6,8,11,12,12,7,18,8,13,9,4,7,6,6,2,5,4,3,3,1,2,0,1,0
30 | 0,1,1,2,3,1,1,5,5,8,6,11,8,11,13,13,16,16,5,6,18,12,6,9,13,10,12,11,8,5,6,9,2,7,3,5,2,2,1,0
31 | 0,0,1,2,3,1,3,2,2,9,9,10,11,5,5,3,7,16,8,11,9,15,4,12,4,5,9,9,3,3,10,3,7,6,1,3,2,1,0,1
32 | 0,1,1,2,2,3,2,5,4,7,9,10,9,12,14,15,6,7,11,8,17,17,18,9,16,12,7,9,9,8,4,9,8,6,1,5,1,2,1,1
33 | 0,0,1,2,2,2,5,3,4,5,6,10,11,11,12,9,14,10,15,9,14,14,5,15,9,14,13,3,7,10,4,5,5,7,4,3,2,1,1,1
34 | 0,1,1,3,2,1,2,4,6,9,2,6,5,4,10,7,8,12,8,5,19,15,14,16,16,9,13,11,4,4,2,9,8,1,6,5,4,2,2,0
35 | 0,0,0,0,4,5,1,1,7,2,6,9,11,13,4,6,6,4,9,7,17,6,4,16,12,10,5,9,3,2,4,8,8,1,3,5,2,1,2,0
36 | 0,0,2,1,3,1,1,2,6,3,4,3,4,3,7,14,12,6,9,16,10,8,8,7,9,3,7,7,6,3,4,2,7,3,2,3,4,1,1,1
37 | 0,1,0,1,2,2,5,2,5,8,8,7,5,6,13,15,5,6,5,19,6,8,7,12,12,6,10,9,7,3,7,7,3,1,4,2,1,1,0,0
38 | 0,1,0,3,4,5,5,6,6,4,5,9,9,9,4,6,16,14,8,10,10,9,16,10,7,4,5,12,9,8,2,8,6,4,2,1,2,0,2,1
39 | 0,1,0,2,3,2,5,1,7,4,6,3,6,3,9,5,12,5,7,12,6,6,5,17,5,15,12,7,11,6,2,8,3,2,1,3,4,2,2,1
40 | 0,1,2,2,1,5,2,6,3,6,2,2,6,8,9,3,15,5,9,14,8,8,10,5,6,14,14,10,11,11,4,8,2,7,5,5,1,0,1,1
41 | 0,0,0,1,3,3,5,4,3,5,7,3,9,10,13,12,14,13,4,14,17,17,6,4,5,12,3,9,6,6,7,4,5,2,2,2,4,3,1,1
42 | 0,0,0,1,2,4,3,4,8,8,6,7,8,11,3,14,12,14,7,5,5,13,12,14,10,9,8,4,10,5,2,2,3,2,6,5,4,0,2,1
43 | 0,0,0,2,4,4,6,5,2,2,2,7,7,3,12,8,14,11,10,5,16,4,8,10,13,7,8,12,12,4,2,8,4,5,5,2,4,3,2,0
44 | 0,0,0,1,2,3,6,6,2,3,8,2,3,13,14,5,10,5,10,7,16,11,18,7,7,15,11,4,6,4,8,6,8,4,5,2,2,1,1,0
45 | 0,0,0,2,4,3,1,7,4,3,10,8,4,7,14,11,10,13,12,6,13,6,17,11,8,14,9,6,7,7,4,3,5,3,1,4,2,0,1,1
46 | 0,1,0,1,4,5,2,4,5,6,9,9,5,10,11,11,14,4,13,4,19,14,16,13,6,10,3,13,5,2,8,7,3,5,1,1,2,1,2,0
47 | 0,0,2,2,1,1,4,5,3,7,8,10,10,13,5,9,6,7,5,5,10,15,10,17,14,8,12,6,8,7,3,5,5,3,5,4,2,0,2,0
48 | 0,1,0,1,2,3,6,2,6,2,3,11,10,10,5,6,5,7,18,19,14,19,14,15,10,4,13,13,6,10,7,3,7,1,2,3,1,0,1,0
49 | 0,0,0,1,3,1,5,2,5,8,9,2,10,8,5,11,10,17,8,18,7,19,8,13,10,14,8,11,6,5,6,4,3,5,2,3,3,3,1,1
50 | 0,1,2,2,4,5,4,3,2,8,9,4,4,11,6,12,13,17,10,18,13,18,9,7,10,14,11,6,12,9,6,3,4,2,5,1,2,1,0,1
51 | 0,0,2,1,2,4,4,1,8,3,9,6,3,13,9,6,14,15,9,17,14,12,12,4,12,3,11,9,11,10,8,6,8,2,2,3,2,1,1,0
52 | 0,1,1,2,3,5,2,1,6,7,2,9,7,5,7,4,10,6,9,15,11,5,6,7,8,4,10,13,12,5,6,8,4,2,3,1,2,3,2,0
53 | 0,1,0,2,1,1,6,2,8,9,5,11,6,12,11,9,7,16,14,18,8,4,7,5,14,10,4,9,4,2,7,5,4,6,3,4,4,2,0,1
54 | 0,1,0,1,4,2,3,6,4,6,5,3,6,10,7,11,7,13,17,7,18,13,10,14,6,9,4,12,7,5,5,6,8,2,1,1,1,3,0,0
55 | 0,0,0,3,1,1,2,4,7,7,6,11,3,5,8,11,14,12,6,7,13,9,6,5,5,15,6,7,11,9,6,5,6,3,4,3,3,0,1,0
56 | 0,1,0,1,3,3,6,7,2,6,7,11,7,13,11,7,6,4,14,8,8,15,16,8,9,5,7,8,6,9,5,4,7,6,1,5,1,3,0,1
57 | 0,0,1,3,4,1,4,5,7,7,2,3,7,7,6,15,14,8,17,4,20,16,14,4,9,9,4,9,10,7,8,7,6,6,3,4,1,1,2,1
58 | 0,0,2,1,3,5,3,7,2,8,5,4,12,13,7,15,13,16,16,9,18,15,8,4,16,5,13,11,10,5,6,5,2,2,3,2,3,3,2,1
59 | 0,1,0,2,1,5,5,2,6,5,6,7,5,7,13,6,10,8,18,5,7,14,15,7,16,12,8,3,11,11,10,3,3,3,2,2,1,0,0,1
60 | 0,0,0,1,3,3,4,7,8,2,10,11,8,11,3,15,9,4,9,11,11,15,17,11,15,15,14,7,11,4,2,6,7,7,2,5,4,3,0,1
61 |
--------------------------------------------------------------------------------
/data/sample-data/sample-data-05.csv:
--------------------------------------------------------------------------------
1 | 0,1,0,2,4,4,5,1,2,5,5,8,10,12,10,9,15,9,7,9,10,7,5,8,9,6,7,5,11,9,3,8,6,7,5,1,3,0,2,1
2 | 0,0,2,1,1,4,4,6,2,4,4,4,7,12,11,15,10,9,12,15,7,17,14,12,6,12,5,11,3,9,7,8,8,3,3,3,1,1,0,1
3 | 0,1,0,0,1,2,2,3,4,8,5,2,7,13,14,13,15,16,15,13,18,4,10,11,6,3,14,4,4,6,10,8,6,2,6,2,3,0,0,1
4 | 0,1,0,2,1,3,6,1,3,4,10,2,8,11,11,12,14,12,15,15,20,11,12,7,4,15,9,11,9,5,10,7,5,2,3,1,4,2,0,0
5 | 0,0,2,2,3,3,5,1,4,2,9,7,5,7,11,10,14,6,9,7,18,15,15,5,6,14,5,5,11,9,8,9,8,1,6,4,2,1,2,1
6 | 0,0,0,2,3,4,4,5,3,2,9,8,8,12,11,6,15,8,17,14,20,7,8,10,4,11,9,6,7,7,2,3,5,6,3,4,3,3,0,0
7 | 0,1,1,1,3,1,6,4,5,5,2,6,9,13,13,11,10,6,15,16,14,16,14,10,5,9,8,4,9,4,5,9,7,5,6,1,2,1,2,1
8 | 0,0,2,3,1,4,6,6,4,5,3,5,10,8,6,8,4,14,7,17,7,5,17,8,10,10,10,3,11,3,9,6,6,7,2,1,3,1,2,1
9 | 0,0,2,1,4,4,4,7,5,5,10,8,6,12,14,12,6,6,16,5,6,15,10,5,15,13,13,7,3,11,9,3,7,4,5,4,1,2,1,1
10 | 0,1,2,1,2,3,6,1,2,6,10,7,12,6,3,4,4,16,16,18,9,7,10,10,16,12,11,6,3,10,6,8,5,3,4,1,4,2,1,1
11 | 0,0,1,3,2,1,1,4,4,5,10,9,6,5,12,13,4,16,11,19,11,15,13,13,9,7,12,5,3,7,8,8,6,2,5,5,3,3,2,1
12 | 0,0,1,0,1,3,1,3,4,7,7,8,8,6,7,5,10,12,6,15,15,8,12,8,14,5,5,7,9,4,9,2,3,4,5,3,4,2,2,1
13 | 0,1,0,0,2,1,5,1,8,3,7,2,5,13,9,9,10,12,9,5,12,7,5,8,16,5,6,5,4,4,2,2,4,1,3,5,2,1,0,0
14 | 0,1,1,2,1,4,2,3,3,9,2,7,6,7,6,3,13,11,13,15,14,15,8,15,14,13,8,9,10,8,5,9,7,4,6,2,4,3,1,0
15 | 0,1,1,3,2,4,2,7,3,8,5,9,10,7,9,4,4,5,4,10,13,4,9,9,12,8,7,5,3,4,5,9,6,1,4,1,2,0,0,1
16 | 0,0,2,2,4,5,6,2,5,3,5,5,11,6,8,8,6,6,10,17,19,9,11,8,7,11,4,5,12,6,3,8,7,5,2,5,1,3,0,0
17 | 0,1,0,2,4,3,6,7,7,9,2,7,9,5,12,7,8,5,15,12,13,16,18,5,13,15,4,8,3,4,7,8,6,1,5,4,2,1,2,0
18 | 0,1,1,3,4,5,4,3,4,9,10,5,11,10,7,6,10,7,15,18,14,17,15,16,13,14,6,4,6,8,9,6,5,2,4,5,4,1,2,0
19 | 0,0,2,1,3,4,3,6,8,5,6,2,10,11,11,10,5,15,9,18,10,15,11,15,8,15,7,13,7,5,4,3,8,6,5,1,1,0,0,1
20 | 0,1,1,0,2,1,4,4,4,5,10,11,12,10,7,10,7,16,16,8,14,18,8,16,7,13,14,12,9,2,10,9,7,7,2,2,3,2,0,0
21 | 0,1,0,1,1,2,4,1,4,5,5,7,3,12,10,9,5,5,17,4,8,12,5,11,11,4,13,7,6,4,6,8,7,3,6,5,2,1,1,1
22 | 0,0,2,2,2,4,1,4,7,5,8,11,12,5,3,4,6,6,17,17,16,7,4,17,16,4,11,3,11,4,4,2,2,5,3,3,2,1,0,0
23 | 0,1,0,1,4,2,6,3,7,6,9,8,4,9,10,7,7,6,6,5,5,13,17,4,11,15,13,3,10,5,10,4,4,2,4,4,2,2,0,0
24 | 0,0,2,3,2,4,6,4,3,5,6,5,10,10,8,9,15,16,17,14,5,18,17,6,7,6,7,11,7,10,3,2,5,2,2,3,4,3,1,1
25 | 0,0,1,0,1,1,3,1,4,7,8,10,11,11,8,13,9,7,12,14,16,10,10,15,9,4,9,10,3,10,10,9,8,5,2,2,3,3,1,0
26 | 0,0,1,1,4,5,3,4,8,2,10,6,6,5,9,3,16,16,18,10,16,19,11,8,15,3,11,3,6,3,3,5,5,2,1,4,3,1,2,0
27 | 0,0,2,2,3,5,4,5,5,7,4,2,4,12,11,6,7,17,18,4,10,5,8,15,16,10,7,12,6,4,4,8,2,3,4,3,4,1,2,1
28 | 0,0,0,3,2,1,2,7,4,7,10,11,12,3,13,5,6,14,10,16,13,10,11,8,11,13,11,8,12,8,6,3,2,6,5,1,2,1,2,0
29 | 0,1,1,2,4,1,5,7,6,5,4,3,11,10,4,10,9,6,16,12,5,4,4,10,9,5,14,5,6,4,2,4,7,6,3,4,4,2,2,1
30 | 0,0,2,0,3,3,3,7,2,4,3,8,6,13,5,9,7,12,13,18,8,13,6,6,15,3,10,7,10,7,5,5,3,6,4,5,3,1,0,1
31 | 0,0,1,2,4,1,5,7,6,5,4,3,12,12,13,5,15,8,12,5,12,4,7,6,5,9,3,3,7,3,7,7,2,4,4,2,3,3,0,0
32 | 0,0,0,0,1,2,6,3,4,2,2,10,3,9,6,10,6,11,11,19,12,15,14,10,15,9,11,7,3,3,8,7,7,7,5,1,3,0,1,1
33 | 0,0,2,2,1,1,5,6,6,7,5,7,12,5,7,5,15,11,7,13,15,19,14,13,15,4,11,5,6,7,2,4,7,5,5,5,3,1,1,0
34 | 0,0,0,2,1,4,5,3,3,2,7,7,5,4,9,6,16,8,13,12,16,17,5,15,13,6,8,13,12,6,3,7,7,2,2,2,2,1,2,0
35 | 0,1,2,1,4,5,5,1,7,6,5,10,9,4,4,5,16,4,5,4,6,9,11,4,4,5,4,8,10,7,6,7,8,1,6,2,4,1,2,1
36 | 0,1,2,3,4,2,2,1,3,2,9,2,8,9,8,13,5,11,13,8,20,7,6,15,4,7,14,4,8,9,7,6,3,3,5,5,4,2,0,1
37 | 0,0,2,0,4,4,6,3,4,8,4,8,10,13,6,10,10,15,6,13,10,6,16,6,5,3,10,6,9,3,6,7,4,6,1,4,3,2,2,1
38 | 0,0,0,2,3,3,3,3,6,7,5,6,10,8,13,5,14,9,11,6,10,17,7,10,15,3,4,10,12,11,7,7,4,5,6,4,1,1,0,0
39 | 0,1,2,0,3,1,4,7,8,2,5,4,7,11,11,14,12,17,10,11,5,18,14,14,9,7,5,8,9,7,9,8,2,7,3,1,2,1,2,1
40 | 0,0,0,2,1,4,2,1,7,5,9,8,8,6,9,3,11,9,17,6,10,11,17,16,16,10,13,13,6,10,6,9,2,2,2,1,2,0,0,0
41 | 0,0,1,2,4,4,3,5,3,3,2,6,9,13,6,13,6,4,15,6,15,11,6,14,6,7,13,4,3,11,4,4,8,4,1,3,2,1,0,0
42 | 0,0,2,2,4,5,5,1,5,2,9,6,6,7,14,15,11,17,13,19,18,18,16,4,7,15,6,5,6,8,2,4,6,7,5,5,2,2,2,0
43 | 0,0,2,1,2,3,6,5,8,5,3,8,11,4,6,5,15,17,9,7,16,9,18,6,9,13,12,10,6,10,2,7,6,5,3,4,2,0,1,1
44 | 0,0,0,2,1,5,4,2,5,6,7,6,6,9,3,15,9,11,14,14,14,10,5,10,11,11,12,10,6,4,8,7,4,5,2,2,3,3,1,1
45 | 0,0,0,1,1,1,6,3,3,4,7,7,9,7,14,3,7,8,12,7,6,7,7,6,8,14,4,6,8,10,4,3,3,5,6,5,2,3,1,0
46 | 0,0,2,2,4,3,4,2,8,6,2,8,12,9,5,10,11,16,16,14,9,15,7,17,13,11,10,10,3,4,3,6,5,7,3,3,2,2,0,0
47 | 0,0,2,0,3,1,4,4,4,4,9,11,4,9,12,15,4,13,9,13,11,17,5,15,8,6,8,3,12,8,7,3,2,7,3,3,4,0,0,1
48 | 0,0,0,1,1,3,1,5,4,8,8,5,9,3,14,15,7,11,10,17,20,8,13,10,9,7,6,8,3,2,4,4,3,3,1,1,4,0,0,1
49 | 0,1,0,1,4,5,3,7,2,3,9,7,3,11,3,12,6,16,16,13,12,8,14,17,9,13,8,8,9,4,2,8,5,6,1,5,3,2,0,1
50 | 0,0,0,1,4,1,5,6,4,9,3,5,7,9,11,15,10,9,8,18,18,19,12,4,6,4,11,11,5,11,10,3,8,5,4,1,4,2,0,1
51 | 0,1,1,0,3,4,1,7,7,4,2,8,7,12,14,8,6,8,12,15,18,8,12,17,14,4,12,7,10,8,5,2,8,4,2,4,2,0,1,0
52 | 0,1,1,2,2,4,5,2,7,9,7,6,10,9,9,4,16,4,11,12,6,10,16,12,7,11,14,8,12,7,6,7,8,1,4,4,1,0,2,0
53 | 0,0,1,3,3,1,3,3,3,2,6,9,6,3,13,15,7,16,17,15,10,16,4,17,8,13,4,10,12,3,5,7,6,6,4,3,4,0,1,0
54 | 0,0,0,3,2,3,2,5,8,8,7,4,8,6,8,4,8,4,4,4,9,19,8,9,7,8,10,12,4,11,8,9,6,6,6,3,3,1,1,0
55 | 0,0,1,2,3,5,6,4,8,4,10,7,3,6,12,6,6,15,9,19,7,15,16,11,9,9,9,6,8,2,7,7,4,5,6,4,4,0,1,1
56 | 0,0,1,0,1,3,5,5,5,3,4,9,10,5,6,5,13,9,4,6,5,16,5,11,5,12,10,5,7,10,6,9,6,3,4,5,3,2,0,0
57 | 0,1,1,1,2,2,4,1,2,8,9,8,5,11,3,12,4,7,6,7,5,5,11,12,7,12,5,8,6,10,6,7,4,2,1,4,2,1,0,1
58 | 0,0,2,1,3,5,6,2,3,8,6,6,3,3,11,5,4,14,10,11,5,15,10,15,13,12,13,10,3,2,2,5,7,6,1,5,4,0,1,1
59 | 0,1,2,3,4,5,1,2,2,6,7,2,4,8,8,14,14,9,13,13,9,8,10,17,14,15,13,13,9,4,2,6,6,3,2,5,4,1,2,1
60 | 0,0,1,0,3,4,4,3,3,9,3,2,8,11,8,7,9,15,7,19,16,15,6,16,5,13,9,11,5,3,6,9,5,3,3,2,4,1,0,1
61 |
--------------------------------------------------------------------------------
/data/sample-data/sample-data-01.csv:
--------------------------------------------------------------------------------
1 | 0,0,1,3,1,2,4,7,8,3,3,3,10,5,7,4,7,7,12,18,6,13,11,11,7,7,4,6,8,8,4,4,5,7,3,4,2,3,0,0
2 | 0,1,2,1,2,1,3,2,2,6,10,11,5,9,4,4,7,16,8,6,18,4,12,5,12,7,11,5,11,3,3,5,4,4,5,5,1,1,0,1
3 | 0,1,1,3,3,2,6,2,5,9,5,7,4,5,4,15,5,11,9,10,19,14,12,17,7,12,11,7,4,2,10,5,4,2,2,3,2,2,1,1
4 | 0,0,2,0,4,2,2,1,6,7,10,7,9,13,8,8,15,10,10,7,17,4,4,7,6,15,6,4,9,11,3,5,6,3,3,4,2,3,2,1
5 | 0,1,1,3,3,1,3,5,2,4,4,7,6,5,3,10,8,10,6,17,9,14,9,7,13,9,12,6,7,7,9,6,3,2,2,4,2,0,1,1
6 | 0,0,1,2,2,4,2,1,6,4,7,6,6,9,9,15,4,16,18,12,12,5,18,9,5,3,10,3,12,7,8,4,7,3,5,4,4,3,2,1
7 | 0,0,2,2,4,2,2,5,5,8,6,5,11,9,4,13,5,12,10,6,9,17,15,8,9,3,13,7,8,2,8,8,4,2,3,5,4,1,1,1
8 | 0,0,1,2,3,1,2,3,5,3,7,8,8,5,10,9,15,11,18,19,20,8,5,13,15,10,6,10,6,7,4,9,3,5,2,5,3,2,2,1
9 | 0,0,0,3,1,5,6,5,5,8,2,4,11,12,10,11,9,10,17,11,6,16,12,6,8,14,6,13,10,11,4,6,4,7,6,3,2,1,0,0
10 | 0,1,1,2,1,3,5,3,5,8,6,8,12,5,13,6,13,8,16,8,18,15,16,14,12,7,3,8,9,11,2,5,4,5,1,4,1,2,0,0
11 | 0,1,0,0,4,3,3,5,5,4,5,8,7,10,13,3,7,13,15,18,8,15,15,16,11,14,12,4,10,10,4,3,4,5,5,3,3,2,2,1
12 | 0,1,0,0,3,4,2,7,8,5,2,8,11,5,5,8,14,11,6,11,9,16,18,6,12,5,4,3,5,7,8,3,5,4,5,5,4,0,1,1
13 | 0,0,2,1,4,3,6,4,6,7,9,9,3,11,6,12,4,17,13,15,13,12,8,7,4,7,12,9,5,6,5,4,7,3,5,4,2,3,0,1
14 | 0,0,0,0,1,3,1,6,6,5,5,6,3,6,13,3,10,13,9,16,15,9,11,4,6,4,11,11,12,3,5,8,7,4,6,4,1,3,0,0
15 | 0,1,2,1,1,1,4,1,5,2,3,3,10,7,13,5,7,17,6,9,12,13,10,4,12,4,6,7,6,10,8,2,5,1,3,4,2,0,2,0
16 | 0,1,1,0,1,2,4,3,6,4,7,5,5,7,5,10,7,8,18,17,9,8,12,11,11,11,14,6,11,2,10,9,5,6,5,3,4,2,2,0
17 | 0,0,0,0,2,3,6,5,7,4,3,2,10,7,9,11,12,5,12,9,13,19,14,17,5,13,8,11,5,10,9,8,7,5,3,1,4,0,2,1
18 | 0,0,0,1,2,1,4,3,6,7,4,2,12,6,12,4,14,7,8,14,13,19,6,9,12,6,4,13,6,7,2,3,6,5,4,2,3,0,1,0
19 | 0,0,2,1,2,5,4,2,7,8,4,7,11,9,8,11,15,17,11,12,7,12,7,6,7,4,13,5,7,6,6,9,2,1,1,2,2,0,1,0
20 | 0,1,2,0,1,4,3,2,2,7,3,3,12,13,11,13,6,5,9,16,9,19,16,11,8,9,14,12,11,9,6,6,6,1,1,2,4,3,1,1
21 | 0,1,1,3,1,4,4,1,8,2,2,3,12,12,10,15,13,6,5,5,18,19,9,6,11,12,7,6,3,6,3,2,4,3,1,5,4,2,2,0
22 | 0,0,2,3,2,3,2,6,3,8,7,4,6,6,9,5,12,12,8,5,12,10,16,7,14,12,5,4,6,9,8,5,6,6,1,4,3,0,2,0
23 | 0,0,0,3,4,5,1,7,7,8,2,5,12,4,10,14,5,5,17,13,16,15,13,6,12,9,10,3,3,7,4,4,8,2,6,5,1,0,1,0
24 | 0,1,1,1,1,3,3,2,6,3,9,7,8,8,4,13,7,14,11,15,14,13,5,13,7,14,9,10,5,11,5,3,5,1,1,4,4,1,2,0
25 | 0,1,1,1,2,3,5,3,6,3,7,10,3,8,12,4,12,9,15,5,17,16,5,10,10,15,7,5,3,11,5,5,6,1,1,1,1,0,2,1
26 | 0,0,2,1,3,3,2,7,4,4,3,8,12,9,12,9,5,16,8,17,7,11,14,7,13,11,7,12,12,7,8,5,7,2,2,4,1,1,1,0
27 | 0,0,1,2,4,2,2,3,5,7,10,5,5,12,3,13,4,13,7,15,9,12,18,14,16,12,3,11,3,2,7,4,8,2,2,1,3,0,1,1
28 | 0,0,1,1,1,5,1,5,2,2,4,10,4,8,14,6,15,6,12,15,15,13,7,17,4,5,11,4,8,7,9,4,5,3,2,5,4,3,2,1
29 | 0,0,2,2,3,4,6,3,7,6,4,5,8,4,7,7,6,11,12,19,20,18,9,5,4,7,14,8,4,3,7,7,8,3,5,4,1,3,1,0
30 | 0,0,0,1,4,4,6,3,8,6,4,10,12,3,3,6,8,7,17,16,14,15,17,4,14,13,4,4,12,11,6,9,5,5,2,5,2,1,0,1
31 | 0,1,1,0,3,2,4,6,8,6,2,3,11,3,14,14,12,8,8,16,13,7,6,9,15,7,6,4,10,8,10,4,2,6,5,5,2,3,2,1
32 | 0,0,2,3,3,4,5,3,6,7,10,5,10,13,14,3,8,10,9,9,19,15,15,6,8,8,11,5,5,7,3,6,6,4,5,2,2,3,0,0
33 | 0,1,2,2,2,3,6,6,6,7,6,3,11,12,13,15,15,10,14,11,11,8,6,12,10,5,12,7,7,11,5,8,5,2,5,5,2,0,2,1
34 | 0,0,2,1,3,5,6,7,5,8,9,3,12,10,12,4,12,9,13,10,10,6,10,11,4,15,13,7,3,4,2,9,7,2,4,2,1,2,1,1
35 | 0,0,1,2,4,1,5,5,2,3,4,8,8,12,5,15,9,17,7,19,14,18,12,17,14,4,13,13,8,11,5,6,6,2,3,5,2,1,1,1
36 | 0,0,0,3,1,3,6,4,3,4,8,3,4,8,3,11,5,7,10,5,15,9,16,17,16,3,8,9,8,3,3,9,5,1,6,5,4,2,2,0
37 | 0,1,2,2,2,5,5,1,4,6,3,6,5,9,6,7,4,7,16,7,16,13,9,16,12,6,7,9,10,3,6,4,5,4,6,3,4,3,2,1
38 | 0,1,1,2,3,1,5,1,2,2,5,7,6,6,5,10,6,7,17,13,15,16,17,14,4,4,10,10,10,11,9,9,5,4,4,2,1,0,1,0
39 | 0,1,0,3,2,4,1,1,5,9,10,7,12,10,9,15,12,13,13,6,19,9,10,6,13,5,13,6,7,2,5,5,2,1,1,1,1,3,0,1
40 | 0,1,1,3,1,1,5,5,3,7,2,2,3,12,4,6,8,15,16,16,15,4,14,5,13,10,7,10,6,3,2,3,6,3,3,5,4,3,2,1
41 | 0,0,0,2,2,1,3,4,5,5,6,5,5,12,13,5,7,5,11,15,18,7,9,10,14,12,11,9,10,3,2,9,6,2,2,5,3,0,0,1
42 | 0,0,1,3,3,1,2,1,8,9,2,8,10,3,8,6,10,13,11,17,19,6,4,11,6,12,7,5,5,4,4,8,2,6,6,4,2,2,0,0
43 | 0,1,1,3,4,5,2,1,3,7,9,6,10,5,8,15,11,12,15,6,12,16,6,4,14,3,12,9,6,11,5,8,5,5,6,1,2,1,2,0
44 | 0,0,1,3,1,4,3,6,7,8,5,7,11,3,6,11,6,10,6,19,18,14,6,10,7,9,8,5,8,3,10,2,5,1,5,4,2,1,0,1
45 | 0,1,1,3,3,4,4,6,3,4,9,9,7,6,8,15,12,15,6,11,6,18,5,14,15,12,9,8,3,6,10,6,8,7,2,5,4,3,1,1
46 | 0,1,2,2,4,3,1,4,8,9,5,10,10,3,4,6,7,11,16,6,14,9,11,10,10,7,10,8,8,4,5,8,4,4,5,2,4,1,1,0
47 | 0,0,2,3,4,5,4,6,2,9,7,4,9,10,8,11,16,12,15,17,19,10,18,13,15,11,8,4,7,11,6,7,6,5,1,3,1,0,0,0
48 | 0,1,1,3,1,4,6,2,8,2,10,3,11,9,13,15,5,15,6,10,10,5,14,15,12,7,4,5,11,4,6,9,5,6,1,1,2,1,2,1
49 | 0,0,1,3,2,5,1,2,7,6,6,3,12,9,4,14,4,6,12,9,12,7,11,7,16,8,13,6,7,6,10,7,6,3,1,5,4,3,0,0
50 | 0,0,1,2,3,4,5,7,5,4,10,5,12,12,5,4,7,9,18,16,16,10,15,15,10,4,3,7,5,9,4,6,2,4,1,4,2,2,2,1
51 | 0,1,2,1,1,3,5,3,6,3,10,10,11,10,13,10,13,6,6,14,5,4,5,5,9,4,12,7,7,4,7,9,3,3,6,3,4,1,2,0
52 | 0,1,2,2,3,5,2,4,5,6,8,3,5,4,3,15,15,12,16,7,20,15,12,8,9,6,12,5,8,3,8,5,4,1,3,2,1,3,1,0
53 | 0,0,0,2,4,4,5,3,3,3,10,4,4,4,14,11,15,13,10,14,11,17,9,11,11,7,10,12,10,10,10,8,7,5,2,2,4,1,2,1
54 | 0,0,2,1,1,4,4,7,2,9,4,10,12,7,6,6,11,12,9,15,15,6,6,13,5,12,9,6,4,7,7,6,5,4,1,4,2,2,2,1
55 | 0,1,2,1,1,4,5,4,4,5,9,7,10,3,13,13,8,9,17,16,16,15,12,13,5,12,10,9,11,9,4,5,5,2,2,5,1,0,0,1
56 | 0,0,1,3,2,3,6,4,5,7,2,4,11,11,3,8,8,16,5,13,16,5,8,8,6,9,10,10,9,3,3,5,3,5,4,5,3,3,0,1
57 | 0,1,1,2,2,5,1,7,4,2,5,5,4,6,6,4,16,11,14,16,14,14,8,17,4,14,13,7,6,3,7,7,5,6,3,4,2,2,1,1
58 | 0,1,1,1,4,1,6,4,6,3,6,5,6,4,14,13,13,9,12,19,9,10,15,10,9,10,10,7,5,6,8,6,6,4,3,5,2,1,1,1
59 | 0,0,0,1,4,5,6,3,8,7,9,10,8,6,5,12,15,5,10,5,8,13,18,17,14,9,13,4,10,11,10,8,8,6,5,5,2,0,2,0
60 | 0,0,1,0,3,2,5,4,8,2,9,3,3,10,12,9,14,11,13,8,6,18,11,9,13,11,8,5,5,2,8,5,3,5,4,1,3,1,1,0
61 |
--------------------------------------------------------------------------------
/data/sample-data/sample-data-04.csv:
--------------------------------------------------------------------------------
1 | 0,1,2,2,4,4,2,5,2,4,8,4,10,7,3,13,10,11,7,7,9,17,7,6,12,13,12,6,5,4,8,6,7,3,5,1,1,0,1,0
2 | 0,1,1,1,2,1,4,1,4,9,3,10,10,4,7,10,5,15,17,9,6,12,10,11,9,15,7,11,11,9,3,4,8,3,6,2,3,0,1,0
3 | 0,0,1,2,4,1,2,3,6,8,5,6,4,3,8,12,7,4,14,11,15,17,13,4,11,13,10,9,5,6,4,9,4,3,4,2,4,2,1,0
4 | 0,0,2,1,1,2,4,1,5,8,3,2,6,10,6,5,11,9,15,9,5,9,17,13,9,12,5,4,6,3,5,8,8,7,4,2,2,3,2,0
5 | 0,1,2,3,4,2,2,5,5,5,2,9,11,11,5,15,15,16,15,17,18,18,8,12,5,10,12,11,8,2,7,7,4,2,1,5,1,2,0,0
6 | 0,1,0,0,2,2,1,5,6,8,9,7,11,6,4,14,15,11,13,11,18,9,5,16,6,11,10,10,10,2,5,8,7,2,6,4,2,2,2,0
7 | 0,0,0,2,4,5,1,1,5,2,10,6,12,5,12,6,13,15,11,12,19,14,15,17,13,9,14,4,12,8,6,4,7,6,6,4,1,2,0,0
8 | 0,0,1,2,2,2,2,4,2,5,6,6,10,12,8,15,11,14,15,15,20,9,7,9,10,7,9,12,11,2,8,6,2,2,3,5,1,1,2,1
9 | 0,0,1,3,2,5,5,5,7,4,4,3,5,7,9,13,4,13,16,11,13,10,16,13,12,9,6,10,12,6,7,8,8,1,2,3,2,0,0,1
10 | 0,1,0,3,3,4,1,7,7,8,8,10,5,6,11,5,16,5,16,19,9,7,12,15,5,3,7,8,9,8,6,2,2,7,6,3,1,1,1,0
11 | 0,0,0,1,4,1,6,6,2,8,7,10,4,8,11,9,5,4,11,18,7,19,4,5,8,9,5,12,4,11,8,5,3,2,2,5,4,0,1,0
12 | 0,1,2,2,1,2,4,5,5,8,2,10,8,7,12,4,14,14,9,15,20,5,14,12,11,6,12,12,6,9,9,6,5,4,6,4,2,3,1,1
13 | 0,1,0,1,1,3,1,5,5,6,5,11,5,12,14,12,8,16,5,7,15,12,12,10,5,9,14,13,10,6,2,5,4,3,1,5,2,1,0,0
14 | 0,1,0,2,3,5,4,4,5,9,4,8,9,11,12,5,8,4,16,5,14,15,14,12,11,9,3,8,8,6,9,3,7,2,6,1,2,2,2,0
15 | 0,1,2,3,2,4,6,3,7,3,10,2,5,13,10,11,10,17,7,9,7,17,17,13,15,7,9,6,10,10,5,9,8,5,1,4,2,2,0,0
16 | 0,0,0,3,4,2,4,6,4,5,4,3,12,9,3,8,9,8,12,17,20,11,4,9,12,9,3,12,7,8,7,2,2,5,2,5,3,3,0,0
17 | 0,0,0,3,2,5,4,7,3,9,2,2,6,3,3,15,5,7,14,19,11,13,6,16,5,6,8,13,6,2,8,4,3,5,4,5,2,3,0,1
18 | 0,1,0,2,3,3,4,7,7,9,2,3,9,3,6,14,6,4,11,7,17,7,16,11,6,13,7,7,11,2,10,2,8,5,2,4,2,1,1,1
19 | 0,1,1,3,2,1,1,1,8,2,8,10,3,10,9,7,16,17,8,19,18,6,5,7,8,14,14,10,12,5,7,7,2,2,6,3,4,2,2,0
20 | 0,1,1,1,2,2,2,7,5,4,8,3,4,6,4,12,9,11,12,14,6,6,18,12,9,9,11,8,4,3,3,8,3,1,1,2,1,1,1,1
21 | 0,1,1,3,2,2,6,2,7,2,4,5,11,10,13,5,8,6,13,14,19,8,13,4,15,8,12,10,12,8,5,9,2,6,2,4,1,2,1,0
22 | 0,0,1,2,2,5,2,5,8,7,5,2,11,5,14,10,6,14,11,6,18,6,14,9,14,5,6,3,6,11,7,7,4,1,4,1,2,1,2,0
23 | 0,1,1,2,2,3,6,4,6,7,10,10,12,12,6,15,5,15,10,19,7,15,16,10,7,14,12,6,7,2,3,9,8,5,6,4,1,2,1,0
24 | 0,1,0,3,2,3,5,2,2,7,3,6,7,9,12,12,15,15,15,13,14,8,17,12,15,4,9,13,12,4,6,3,5,7,2,5,1,1,0,0
25 | 0,0,0,1,2,4,1,4,2,2,6,4,10,8,5,14,6,11,10,10,17,10,14,16,8,13,3,4,7,3,5,7,2,3,5,5,1,0,2,1
26 | 0,0,0,1,3,4,4,5,6,6,8,7,11,7,9,6,15,7,12,10,16,16,15,11,4,5,14,8,5,9,8,2,6,5,5,1,3,2,0,1
27 | 0,1,0,3,4,2,5,3,2,7,10,2,5,8,4,8,14,15,15,8,15,6,17,14,12,5,12,8,9,9,2,5,4,5,2,5,4,2,1,1
28 | 0,1,0,1,4,3,1,6,4,6,2,6,10,12,6,15,9,7,10,8,15,5,8,16,8,4,7,12,11,4,4,7,6,7,3,4,3,2,2,0
29 | 0,1,2,1,1,2,1,7,2,3,4,6,8,12,3,11,9,11,15,16,17,4,17,5,8,6,3,5,10,11,4,6,4,2,1,4,1,3,0,1
30 | 0,0,1,3,4,5,3,5,5,8,7,6,8,5,14,15,14,9,8,16,20,19,5,6,8,9,5,12,9,2,9,6,6,3,5,5,4,0,0,0
31 | 0,0,0,2,3,2,4,2,6,8,5,10,3,6,12,9,10,4,7,6,15,19,5,7,10,15,6,12,12,10,2,8,6,3,5,4,2,0,1,0
32 | 0,1,0,2,2,4,4,2,8,4,6,7,11,5,4,7,13,11,12,5,9,18,15,4,11,6,11,6,9,4,4,5,6,6,6,5,3,1,2,1
33 | 0,0,0,2,3,2,5,2,5,9,3,4,9,10,10,9,5,12,10,16,12,6,15,9,6,3,8,13,7,8,2,5,4,3,5,4,1,2,2,1
34 | 0,0,0,3,3,2,6,1,8,3,3,5,12,6,8,13,4,14,9,6,14,10,15,13,15,11,12,8,4,4,10,3,4,7,1,2,4,2,2,0
35 | 0,0,1,1,3,4,1,6,5,5,10,9,6,5,11,14,7,14,6,10,11,15,11,10,16,7,4,3,11,7,5,3,3,2,2,3,3,2,0,0
36 | 0,0,1,0,2,3,5,3,5,6,5,3,5,6,6,9,11,10,11,19,19,19,14,5,7,13,5,8,5,6,8,2,8,1,6,3,1,1,1,1
37 | 0,0,1,2,3,2,4,6,8,4,3,7,10,4,5,7,8,6,14,15,6,4,9,17,6,6,8,5,7,8,6,9,3,7,4,1,3,0,0,1
38 | 0,1,1,1,4,4,4,5,2,2,4,7,4,12,11,11,15,13,7,11,10,6,8,4,5,11,13,4,7,11,7,3,8,5,2,1,1,3,0,1
39 | 0,1,1,1,1,2,5,6,5,7,6,3,8,11,13,8,14,14,8,12,8,5,15,13,13,15,10,9,3,4,6,4,7,1,4,4,3,3,2,1
40 | 0,1,0,1,1,4,2,4,3,3,3,8,7,4,10,13,10,6,17,16,20,7,12,16,6,6,11,12,7,4,2,7,7,1,4,4,1,1,1,0
41 | 0,1,2,0,1,2,6,5,8,6,7,6,11,6,7,12,9,7,16,7,10,12,14,9,15,11,5,3,6,9,9,3,5,2,3,5,3,3,1,0
42 | 0,0,1,0,4,4,1,7,4,5,6,9,11,6,3,7,10,15,11,17,19,15,8,14,16,14,14,8,3,2,9,6,5,1,3,5,2,0,1,1
43 | 0,1,0,3,4,5,5,2,8,2,2,4,6,5,6,13,7,9,7,6,8,10,13,4,4,6,14,8,10,3,9,6,7,6,2,1,2,3,1,0
44 | 0,1,1,0,3,3,2,4,2,6,4,3,11,11,6,3,10,10,18,13,14,8,12,8,8,13,6,7,6,5,9,7,8,3,6,5,4,3,2,0
45 | 0,0,1,2,4,3,4,4,4,8,6,8,5,11,13,4,16,11,11,7,6,18,13,9,10,10,5,9,10,4,2,5,8,5,3,5,4,1,1,1
46 | 0,0,0,2,1,2,3,2,6,2,10,2,12,7,8,15,16,8,16,13,11,14,14,16,15,14,7,5,3,4,2,2,2,1,2,2,1,0,2,1
47 | 0,0,1,0,3,4,5,6,5,8,3,4,10,5,3,10,9,15,4,13,5,17,9,4,15,6,6,3,3,3,10,7,7,7,1,1,4,0,0,1
48 | 0,0,0,3,4,5,1,5,4,5,5,5,4,12,14,6,10,14,11,19,12,11,8,16,14,6,13,8,8,9,3,9,3,1,2,5,3,1,2,1
49 | 0,0,2,2,3,2,2,1,7,3,3,8,12,3,12,5,12,11,5,12,10,8,17,16,16,12,5,7,3,2,3,6,8,3,1,5,2,1,1,0
50 | 0,1,2,1,4,5,1,6,2,3,10,7,11,6,11,5,6,4,17,5,5,5,16,6,10,12,11,5,10,11,9,2,2,5,1,2,4,3,0,1
51 | 0,1,2,2,4,2,3,2,4,3,2,3,3,8,8,11,4,6,9,11,14,9,14,14,15,15,10,6,7,2,9,9,6,1,2,2,3,1,0,0
52 | 0,0,2,2,2,1,5,4,7,7,2,9,12,6,7,15,10,4,12,4,20,7,18,16,9,15,4,11,4,10,4,8,5,2,3,1,4,0,0,1
53 | 0,1,0,3,2,4,1,5,8,5,5,10,9,12,10,4,4,14,16,4,20,14,10,15,6,6,6,8,7,5,7,5,5,1,6,5,4,3,1,1
54 | 0,0,0,2,2,3,4,1,8,5,6,5,8,12,14,6,4,10,18,10,10,11,7,15,6,14,11,10,9,2,2,9,3,6,6,2,4,2,2,0
55 | 0,0,2,3,2,4,2,3,2,6,2,10,10,7,4,13,14,11,17,16,6,8,4,16,12,15,6,11,12,5,10,3,6,4,6,3,2,2,1,0
56 | 0,0,0,3,2,1,5,3,4,3,6,5,5,9,13,11,6,6,7,11,8,17,11,16,14,8,13,7,9,9,7,3,2,2,1,2,2,1,0,0
57 | 0,0,1,3,3,3,3,3,5,4,4,9,9,13,4,11,14,5,13,10,11,18,11,8,11,6,8,5,5,2,4,2,6,1,1,5,2,2,1,0
58 | 0,0,1,2,3,5,4,7,3,3,7,7,3,3,8,4,16,9,9,9,5,4,12,6,4,15,3,11,4,4,3,5,4,6,5,2,4,0,1,0
59 | 0,0,2,3,2,1,4,7,8,4,4,11,12,6,9,13,10,11,13,4,17,16,12,5,4,11,11,5,12,2,10,2,4,3,4,2,4,2,0,1
60 | 0,0,2,2,1,1,4,4,5,2,8,10,4,9,13,5,11,5,10,5,9,15,18,14,11,11,7,6,11,10,4,8,2,7,2,2,2,1,0,1
61 |
--------------------------------------------------------------------------------
/utils/classifier_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | tools for teaching image classification with sklearn
4 |
5 | """
6 | import numpy as np
7 | import pandas as pd
8 | import seaborn as sns
9 | import matplotlib.pyplot as plt
10 |
11 | def plot_sample(X, y, classes, samples_per_class):
12 | """
13 | Plots a grid of samples for each class
14 |
15 | data: the data to be plotted
16 | classes: list of all classes
17 | samples_per_class: number of samples to show
18 | """
19 | nclasses = len(classes)
20 | figure = plt.figure(figsize=(nclasses*2,(1+samples_per_class*2)))
21 |
22 | # for each value in classes
23 | for idx_cls, cls in enumerate(classes):
24 | # pick some at random to plot
25 | idxs = np.flatnonzero(y == cls)
26 | idxs = np.random.choice(idxs, samples_per_class, replace=False)
27 | # plot on a grid for comparison
28 | for i, idx in enumerate(idxs):
29 | plt_idx = i * nclasses + idx_cls + 1
30 | p = plt.subplot(samples_per_class, nclasses, plt_idx);
31 | p = sns.heatmap(np.reshape(X[idx], (28,28)), cmap=plt.cm.gray,
32 | xticklabels=False, yticklabels=False, cbar=False)
33 | p = plt.axis('off')
34 |
35 | return None
36 |
37 | def plot_coefs(coefficients, nclasses):
38 | """
39 | Plot the coefficients for each label
40 |
41 | coefficients: output from clf.coef_
42 | nclasses: total number of possible classes
43 | """
44 | scale = np.max(np.abs(coefficients))
45 |
46 | p = plt.figure(figsize=(25, 5))
47 |
48 | for i in range(nclasses):
49 | p = plt.subplot(1, nclasses, i + 1)
50 | p = plt.imshow(coefficients[i].reshape(28, 28),
51 | cmap=plt.cm.RdBu, vmin=-scale, vmax=scale)
52 | p = plt.axis('off')
53 | p = plt.title('Class %i' % i)
54 |
55 | return None
56 |
57 | def plot_individual(X, y, sample_idx):
58 | """
59 | Show individual data point
60 |
61 | X: data source
62 | y: label source
63 | sample_idx: index of sample to be plotted
64 | """
65 | #plotting image
66 | plt.imshow(X[sample_idx].reshape(28,28), cmap='gray')
67 | plt.title(f'Label: {y[sample_idx]}\n')
68 | plt.axis('off')
69 |
70 | return None
71 |
72 | def plot_probs(X, sample_idx, model, classes):
73 | """
74 | Plot probability distribution for individual test case
75 |
76 | X: input data source
77 | sample_idx: the data point to study
78 | model: trained classifier model
79 | classes: predefined list of classes
80 | """
81 | nclasses = len(classes)
82 | z = [model.intercept_[k] + np.dot(model.coef_[k], X[sample_idx]) for k in range(nclasses)]
83 | #conditional probability
84 | exps = [np.exp(z[k])/1+np.exp(z[k]) for k in range(10)]
85 | exps_sum = np.sum(exps)
86 | probs = exps/exps_sum
87 | #plot
88 | sns.barplot(x=classes, y=probs);
89 | plt.ylabel("Probability");
90 | plt.xlabel("Class");
91 |
92 | #predictied label
93 | idx_cls = np.argmax(probs)
94 | print(f"I think that this is class {classes[idx_cls]}")
95 |
96 | return None
97 |
98 | def plot_cm(y_test, y_pred, normalized:bool):
99 | """
100 | Plot confusion matrix
101 | """
102 | if normalized == False:
103 | cm = pd.crosstab(y_test, y_pred,
104 | rownames=['Actual'], colnames=['Predicted'])
105 | p = plt.figure(figsize=(10,10));
106 | p = sns.heatmap(cm, annot=True, fmt="d", cbar=False)
107 | elif normalized == True:
108 | cm = pd.crosstab(y_test, y_pred,
109 | rownames=['Actual'], colnames=['Predicted'], normalize='index')
110 | p = plt.figure(figsize=(10,10));
111 | p = sns.heatmap(cm, annot=True, fmt=".2f", cbar=False)
112 |
113 | def predict_unseen(image, model, classes):
114 | """
115 | Predict the category of unseen data, show probabilities
116 |
117 | image: unseen data
118 | model: trained model
119 | classes: list of possible classes
120 | """
121 | # Reshape array
122 | test_probs = model.predict_proba(image.reshape(1,784))
123 | # plot prediction
124 | sns.barplot(x=classes, y=test_probs.squeeze());
125 | plt.ylabel("Probability");
126 | plt.xlabel("Class")
127 |
128 | #predictied label
129 | idx_cls = np.argmax(test_probs)
130 | print(f"I think that this is class {classes[idx_cls]}")
131 |
132 | return None
133 |
134 | def prediction_coefficients(image, model, classes):
135 | # get number of classes
136 | nclasses = len(classes)
137 | # scale the output based on max values
138 | scale = np.max(np.abs(model.coef_))
139 |
140 | p = plt.figure(figsize=(25, 5));
141 |
142 | for i in range(nclasses):
143 | p = plt.subplot(2, nclasses, i + 1)
144 | p = plt.imshow(model.coef_[i].reshape(28, 28),
145 | cmap=plt.cm.RdBu, vmin=-scale, vmax=scale);
146 | p = plt.title('Class %i' % i);
147 | p = plt.axis('off')
148 |
149 | for i in range(nclasses):
150 | p = plt.subplot(2, nclasses, nclasses + i + 1)
151 | p = plt.imshow(image*model.coef_[i].reshape(28, 28),
152 | cmap=plt.cm.RdBu, vmin=-scale/2, vmax=scale/2);
153 | # note: you can adjust the scaling factor if necessary,
154 | # to make the visualization easier to understand
155 | p = plt.axis('off')
156 |
157 | return None
158 |
159 | if __name__=="__main__":
160 | pass
--------------------------------------------------------------------------------
/data/viz/python-zero-index.svg:
--------------------------------------------------------------------------------
1 |
142 |
--------------------------------------------------------------------------------
/syllabus/readme.md:
--------------------------------------------------------------------------------
1 | # Syllabus Cultural Data Science - Visual #
2 |
3 | **NB: The information presented here has been taken from the [AU Course Catalogue](https://kursuskatalog.au.dk/en/course/129661/Visual-Analytics).**
4 |
5 | **This page should be viewed as indicative, rather than definitive. In the case of any errors, the official AU version is binding.**
6 |
7 | ## Overview ##
8 |
9 | The purpose of the course is to enable students to conduct systematic computational analyses of visual objects such as paintings, photographs, archaeological artefacts, and digital products. Students will learn to understand the composition of collections of visual objects, and to apply statistical and machine learning methods for analysing them. The course will enable students to carry out projects within their primary subject area, and to reflect critically on others' analytical decisions. Students will also obtain the ability to present the result of their own analyses, and to visualize their results.
10 |
11 | The course introduces basic programming and visualization skills for the systematic analysis of collections of visual materials. Students will learn how to develop research questions about visual materials, to structure research projects to address their research questions, and to apply computational tools in their projects to provide answers to their questions.
12 |
13 | ### Academic Objectives ###
14 |
15 | In the evaluation of the student’s performance, emphasis is placed on the extent to which the student is able to:
16 |
17 | 1. Knowledge:
18 | * explain central theories underlying computational approaches to the analysis of visual objects
19 | * reflect on the creation, composition, and limitations of a data corpus of visual materials.
20 | 2. Skills:
21 | * develop a collection of visual materials for analysis
22 | * conduct large scale analyses of visual materials using computational methods
23 | * choose the appropriate visualization of results.
24 | 3. Competences:
25 | * independently reflect critically on the integration of hermeneutical-conceptual and quantitative-methodological choices for an analysis
26 | * apply acquired methods and procedures to topics from the student’s core field.
27 |
28 | ## Course Assessment ##
29 | The exam consists of a portfolio containing a number of assignments. The portfolio will consist of 3-7 assignments.
30 | The number of assignments as well as their form and length will be announced at the start of the semester. The portfolio may include products. Depending on their length, and subject to the teacher’s approval, these products can replace some of the standard pages in the portfolio.
31 |
32 | ### Participation ###
33 | Students will be expected to complete the in-class assignments in order to progress to the examination. These assignments are designed first and foremost to develop skills rather than “prove” you have learned concepts.
34 |
35 | I encourage you to communicate and work together, so long as you write and explain your code yourself and do not copy work wholesale. You can learn a lot from replicating others’ code but you will learn nothing if you copy it without knowing how it works.
36 |
37 |
38 | ## Schedule ##
39 | Each course element (1-13) is a four hour session, consisting of a two-hour lecture 1hr and a two-hour code-along session.
40 |
41 |
42 | |Week | Session | Lecture | Classroom |Reading |
43 | | :---: | :-----: | ----------| -------| ---|
44 | | 6 | 1 | Introducing Visual Analytics | Thinking about images with Python | NO ASSIGNED READINGS |
45 | | 7 | 2 | Basic image processing | Exploring colour channels | *Arnold & Tilton (2019)* |
46 | | 8 | 3 | More image processing | Comparing colour histograms | *Manovich(2012)* |
47 | | 9 | 4 | Convolutional kernels | Thresholds and blurring | *Wevers & Smits (2020)* |
48 | | 10 | 5 | Image classification 1 | Logistic Regression w/ Scikit-Learn | *Mitchell (1997), VanderPlas (2016), chapter 5* |
49 | | 11 | 6 | Image classification 2 | Simple neural networks | *Nielsen (2015), Chapter 2&3* |
50 | | 12 | 7 | From shallow to deep learning | Introducing TensorFlow | *Nielsen (2015), Chapter 5* |
51 | | 13 | 8 | Convolutional Neural Networks | Building ConvNets w/ Tensorflow | *Krizhevsky et al. (2017)* |
52 | | 14 | 9 | Pretrained CNNs and transfer learning| Search algorithm with image embeddings | *Madhu et al (2020), Tarp & Kristensen-McLachlan (2022)* |
53 | | 15 | 10 | More on image embeddings |Image search | *Gatys et al. (2015)* |
54 | | 16 | -- | *NO TEACHING* | *NO TEACHING* | *CRFM (2019), specific sections to be assigned* |
55 | | 17 | -- | *NO TEACHING* | *NO TEACHING* | *CRFM (2019), specific sections to be assigned* |
56 | | 18 | 11 | Reading words: OCR |OCR with Tesseract | *Jiang et al. (2021)* |
57 | | 19 | 12 | Text-to-Image models |Grid search | *NO ASSIGNED READINGS* |
58 | | 20 | 13 | Project development | Project development | *NO ASSIGNED READINGS* |
59 |
60 | ## Reading ##
61 |
62 | * Arnold, A. & Tilton, L. (2019). "Distant viewing: analyzing large visual corpora", *Digital Scholarship in the Humanities*, 34(1), 1-14. DOI:[https://doi.org/10.1093/llc/fqz013](https://doi.org/10.1093/llc/fqz013)
63 | * Center for Research on Foundation Models (CRFM) (2019). "On the Opportunities and Risks of Foundation Models", [arXiv:2108.07258](https://arxiv.org/abs/2108.07258) [cs.LG]
64 | * Krizhevsky, A., Sutskever, I., & Hinton, G.E. (2017). "ImageNet classification with deep convolutional neural networks",* Commun. ACM*, 60(6), 84–90. https://doi.org/10.1145/3065386
65 | * Gatys, L.A., Ecker, A.S., & Bethge, M. (2015), “A Neural Algorithm of Artistic Style,” [arXiv:1508.06576](http://arxiv.org/abs/1508.06576).
66 | * Jiang, M., Hu, Y., Worthey, G., Dubnicek, R. C., Underwood, T., & Downie, J. S.(2021). Impact of OCR quality on BERT embeddings in the domain classification of book excerpts. CEUR Workshop Proceedings, 2989, 266-279. [Link](https://ceur-ws.org/Vol-2989/long_paper43.pdf)
67 | * Manovich, L. (2012). How to Compare One Million Images?. In: Berry, D.M. (eds) Understanding Digital Humanities. Palgrave Macmillan, London. DOI:[https://doi.org/10.1057/9780230371934_14](https://doi.org/10.1057/9780230371934_14)
68 | * Madhu, P., Kosti, R., Mührenberg, L., Bell, P., Maier, A. & Christlein, V. (2020) “Recognizing Characters in Art History Using Deep Learning,” [arXiv:2003.14171](https://arxiv.org/abs/2003.14171)
69 | * Mitchell, T.M. (1997) "Does Machine Learning Really Work?,” [Available online](https://www.aaai.org/ojs/index.php/aimagazine/article/view/1303)
70 | * Nielsen, M.A. (2015). Neural Networks and Deep Learning*, Determination Press. [Online](http://neuralnetworksanddeeplearning.com/chap1.html)
71 | * Tarp, L. & Kristensen-McLachlan, R.D. (2021). "The reduced artefact: A case study in data visualisation and digital art history", *Perspectives*, Nov. 21. [Online](https://perspective.smk.dk/en/reduced-artefact-case-study-data-visualisation-and-digital-art-history)
72 | * [Also available in Danish](https://perspective.smk.dk/det-reducerede-vaerk-datavisualisering-af-tusindvis-af-vaerkfotografier)
73 | * VanderPlas, J. (2016). _Python Data Science Handbook_. [Online](https://jakevdp.github.io/PythonDataScienceHandbook/)
74 | * Wevers, M., & Smits, T. (2020). "Detecting Faces, Visual Medium Types, and Gender in Historical Advertisements, 1950–1995", In: Bartoli, A., Fusiello, A. (eds), *Computer Vision – ECCV 2020 Workshops. ECCV 2020. Lecture Notes in Computer Science()*, vol 12536. Springer, Cham. DOI:[https://doi.org/10.1007/978-3-030-66096-3_7](https://doi.org/10.1007/978-3-030-66096-3_7)
75 |
76 |
77 | ## Additional Resources
78 | The following resources are *not* compulsory assigned readings. Instead, these are a mixture of textbooks and other resources which can be used as reference texts. Specifically, these will be useful for people who want to improve their understanding of linear algebra and neural networks. I strongly recommend all of the textbooks by Gilbert Strang - he's a fantastically clear writer, which is a rare skill among mathematicians.
79 |
80 | We'll be using VanderPlas (2016) in session 4, but it'ss a useful reference text for basic data science using Python (pandas, matplotlib, scikit-learn). It's a little below the level we'll be working at but it's good to have nevertheless.
81 |
82 | * Bittinger, M.L., Ellenbogen, D.J., & Surgent, S.A. (2012). _Calculus and its Applications, 10th Edition_. Boston, MA: Addison-Wesley.
83 | * Strang, G. (2009). _Introduction to Linear Algebra (4th Edition)_. Wellesley, MA: Wellesley-Cambridge Press.
84 | * (2016). _Linear Algebra and its Applications, (5th Edition)_. Wellesley, MA: Wellesley-Cambridge Press.
85 | * (2019). _Linear Algebra and Learning from Data_. Wellesley, MA: Wellesley-Cambridge Press.
86 | * (2020). _Linear Algebra for Everyone_. Wellesley, MA: Wellesley-Cambridge Press.
87 | * VanderPlas, J. (2016). _Python Data Science Handbook_. [Access](https://jakevdp.github.io/PythonDataScienceHandbook/)
--------------------------------------------------------------------------------
/nbs/session7_inclass_rdkm.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "# Session 7 - Neural Networks with TensorFlow\n",
9 | "\n",
10 | "In this notebook, we're going to see how we can train simple neural networks using ```TensorFlow```, a machine learning and deep learning framework developed by Google Research. You can find the documentation [here](https://www.tensorflow.org/).\n",
11 | "\n",
12 | "We're still working on greyscale images at this point - next week, we'll start thinking about working with full colour images."
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": null,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "# generic tools\n",
22 | "import numpy as np\n",
23 | "\n",
24 | "# tools from sklearn\n",
25 | "from sklearn.datasets import fetch_openml\n",
26 | "from sklearn.preprocessing import LabelBinarizer\n",
27 | "from sklearn.metrics import classification_report\n",
28 | "from sklearn.model_selection import train_test_split\n",
29 | "\n",
30 | "# tools from tensorflow\n",
31 | "import tensorflow as tf\n",
32 | "from tensorflow.keras.models import Sequential\n",
33 | "from tensorflow.keras.layers import Dense\n",
34 | "from tensorflow.keras.optimizers import SGD\n",
35 | "from tensorflow.keras import backend as K\n",
36 | "from tensorflow.keras.utils import plot_model\n",
37 | "\n",
38 | "# matplotlib\n",
39 | "import matplotlib.pyplot as plt"
40 | ]
41 | },
42 | {
43 | "attachments": {},
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "## Download data, train-test split"
48 | ]
49 | },
50 | {
51 | "attachments": {},
52 | "cell_type": "markdown",
53 | "metadata": {},
54 | "source": [
55 | "We're going to download the ```MNIST``` dataset again, so that we compare this pipeline to the baseline benchmarks we created previously."
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": null,
61 | "metadata": {},
62 | "outputs": [],
63 | "source": [
64 | "data, labels = fetch_openml('mnist_784', version=1, return_X_y=True)\n",
65 | "\n",
66 | "# normalise data\n",
67 | "data = data.astype(\"float\")/255.0\n",
68 | "\n",
69 | "# split data\n",
70 | "(X_train, X_test, y_train, y_test) = train_test_split(data,\n",
71 | " labels, \n",
72 | " test_size=0.2)"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "metadata": {},
79 | "outputs": [],
80 | "source": [
81 | "# convert labels to one-hot encoding\n",
82 | "lb = LabelBinarizer()\n",
83 | "y_train = lb.fit_transform(y_train)\n",
84 | "y_test = lb.fit_transform(y_test)"
85 | ]
86 | },
87 | {
88 | "attachments": {},
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "## Define neural network architecture using ```tf.keras```\n",
93 | "\n",
94 | "We're now going to create our neural network using ```TensorFlow```. In particular, we're going to using the ```keras``` wrapper which makes the syntax a bit simpler to work with.\n",
95 | "\n",
96 | "The code below makes a fully-connected, feed-forward neural network with the following features:\n",
97 | "\n",
98 | "- Input layer of 784\n",
99 | "- One hidden layer of 256\n",
100 | "- Second hidden layer of 128\n",
101 | "- An output layer of 10 nodes"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {},
108 | "outputs": [],
109 | "source": [
110 | "# define architecture 784x256x128x10\n",
111 | "model = Sequential()\n",
112 | "model.add(Dense(256, \n",
113 | " input_shape=(784,), \n",
114 | " activation=\"relu\"))\n",
115 | "model.add(Dense(128, \n",
116 | " activation=\"relu\"))\n",
117 | "model.add(Dense(10, \n",
118 | " activation=\"softmax\"))"
119 | ]
120 | },
121 | {
122 | "attachments": {},
123 | "cell_type": "markdown",
124 | "metadata": {},
125 | "source": [
126 | "## Show summary of model architecture"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": null,
132 | "metadata": {},
133 | "outputs": [],
134 | "source": [
135 | "model.summary()"
136 | ]
137 | },
138 | {
139 | "cell_type": "markdown",
140 | "metadata": {},
141 | "source": [
142 | "## Visualise model layers"
143 | ]
144 | },
145 | {
146 | "attachments": {},
147 | "cell_type": "markdown",
148 | "metadata": {},
149 | "source": [
150 | "If you want to do view this, there are some extra things that you can install - ```TensorFlow``` gives you instructions to do that.\n",
151 | "\n",
152 | "**NB:** This might not work on Windows (but I'm not sure)."
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": null,
158 | "metadata": {},
159 | "outputs": [],
160 | "source": [
161 | "plot_model(model, show_shapes=True, show_layer_names=True)"
162 | ]
163 | },
164 | {
165 | "attachments": {},
166 | "cell_type": "markdown",
167 | "metadata": {},
168 | "source": [
169 | "## Compile model loss function, optimizer, and preferred metrics"
170 | ]
171 | },
172 | {
173 | "attachments": {},
174 | "cell_type": "markdown",
175 | "metadata": {},
176 | "source": [
177 | "One of the big differences with ```TensorFlow``` vs ```scikit-learn``` is that we have much more control over how the optimization algorithm works.\n",
178 | "\n",
179 | "We initalize the optimizer and then we have to *compile* the model."
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "metadata": {},
186 | "outputs": [],
187 | "source": [
188 | "# train model using SGD\n",
189 | "sgd = SGD(0.01)\n",
190 | "model.compile(loss=\"categorical_crossentropy\", \n",
191 | " optimizer=sgd, \n",
192 | " metrics=[\"accuracy\"])"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {},
198 | "source": [
199 | "## Train model and save history"
200 | ]
201 | },
202 | {
203 | "attachments": {},
204 | "cell_type": "markdown",
205 | "metadata": {},
206 | "source": [
207 | "Once we've done that, it's just a case of fitting the model to the data."
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": null,
213 | "metadata": {},
214 | "outputs": [],
215 | "source": [
216 | "history = model.fit(X_train, y_train, \n",
217 | " validation_split=0.1,\n",
218 | " epochs=10, \n",
219 | " batch_size=32)"
220 | ]
221 | },
222 | {
223 | "cell_type": "markdown",
224 | "metadata": {},
225 | "source": [
226 | "## Visualise using ```matplotlib```"
227 | ]
228 | },
229 | {
230 | "cell_type": "markdown",
231 | "metadata": {},
232 | "source": [
233 | "Here we plot two things simultaneously - the accuracy on training and test data over time; and the loss curves over time.\n",
234 | "\n",
235 | "Some terminology:\n",
236 | "\n",
237 | "- If two loss curves are \"close to each other\" and both of them but have a low score, the model suffers from an underfitting problem (High Bias)\n",
238 | "\n",
239 | "- If there are large gaps between two loss curves, then the model suffer from an overfitting problem (High Variance)\n"
240 | ]
241 | },
242 | {
243 | "cell_type": "code",
244 | "execution_count": null,
245 | "metadata": {},
246 | "outputs": [],
247 | "source": [
248 | "plt.style.use(\"fivethirtyeight\")\n",
249 | "plt.figure()\n",
250 | "plt.plot(np.arange(0, 10), history.history[\"loss\"], label=\"train_loss\")\n",
251 | "plt.plot(np.arange(0, 10), history.history[\"val_loss\"], label=\"val_loss\", linestyle=\":\")\n",
252 | "plt.plot(np.arange(0, 10), history.history[\"accuracy\"], label=\"train_acc\")\n",
253 | "plt.plot(np.arange(0, 10), history.history[\"val_accuracy\"], label=\"val_acc\", linestyle=\":\")\n",
254 | "plt.title(\"Training Loss and Accuracy\")\n",
255 | "plt.xlabel(\"Epoch #\")\n",
256 | "plt.ylabel(\"Loss/Accuracy\")\n",
257 | "plt.tight_layout()\n",
258 | "plt.legend()\n",
259 | "plt.show()"
260 | ]
261 | },
262 | {
263 | "attachments": {},
264 | "cell_type": "markdown",
265 | "metadata": {},
266 | "source": [
267 | "## Classifier metrics"
268 | ]
269 | },
270 | {
271 | "attachments": {},
272 | "cell_type": "markdown",
273 | "metadata": {},
274 | "source": [
275 | "We need to do a little bit of extra work to get the classification report to work properly.\n",
276 | "\n",
277 | "Can you explain what you think is happening in these cells?"
278 | ]
279 | },
280 | {
281 | "cell_type": "code",
282 | "execution_count": null,
283 | "metadata": {},
284 | "outputs": [],
285 | "source": [
286 | "# evaluate network\n",
287 | "print(\"[INFO] evaluating network...\")\n",
288 | "predictions = model.predict(X_test, batch_size=32)"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": null,
294 | "metadata": {},
295 | "outputs": [],
296 | "source": [
297 | "print(classification_report(y_test.argmax(axis=1), \n",
298 | " predictions.argmax(axis=1), \n",
299 | " target_names=[str(x) for x in lb.classes_]))"
300 | ]
301 | },
302 | {
303 | "attachments": {},
304 | "cell_type": "markdown",
305 | "metadata": {},
306 | "source": [
307 | "### Task\n",
308 | "\n",
309 | "1. Turn the above into a ```.py``` script which can be run from the command line. \n",
310 | " - Use some of the things we've seen last week today - ```argparse```, ```venv```, etc.\n",
311 | "2. Use this notebook as a template to train a neural network on the ```Cifar-10``` dataset instead of ```MNIST```.\n",
312 | "3. Turn *that* notebook into a ```.py``` script, too"
313 | ]
314 | }
315 | ],
316 | "metadata": {
317 | "kernelspec": {
318 | "display_name": "Python 3",
319 | "language": "python",
320 | "name": "python3"
321 | },
322 | "language_info": {
323 | "codemirror_mode": {
324 | "name": "ipython",
325 | "version": 3
326 | },
327 | "file_extension": ".py",
328 | "mimetype": "text/x-python",
329 | "name": "python",
330 | "nbconvert_exporter": "python",
331 | "pygments_lexer": "ipython3",
332 | "version": "3.11.8"
333 | },
334 | "orig_nbformat": 4,
335 | "vscode": {
336 | "interpreter": {
337 | "hash": "767d51c1340bd893661ea55ea3124f6de3c7a262a8b4abca0554b478b1e2ff90"
338 | }
339 | }
340 | },
341 | "nbformat": 4,
342 | "nbformat_minor": 2
343 | }
344 |
--------------------------------------------------------------------------------
/nbs/session10_inclass_rdkm.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "## Session 10 - Image search with VGG16 and K-Nearest Neighbours"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "# base tools\n",
18 | "import os, sys\n",
19 | "sys.path.append(os.path.join(\"..\"))\n",
20 | "\n",
21 | "# data analysis\n",
22 | "import numpy as np\n",
23 | "from numpy.linalg import norm\n",
24 | "from tqdm import notebook\n",
25 | "\n",
26 | "# tensorflow\n",
27 | "import tensorflow_hub as hub\n",
28 | "from tensorflow.keras.preprocessing.image import (load_img, \n",
29 | " img_to_array)\n",
30 | "from tensorflow.keras.applications.vgg16 import (VGG16, \n",
31 | " preprocess_input)\n",
32 | "# from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input\n",
33 | "\n",
34 | "# matplotlib\n",
35 | "import matplotlib.pyplot as plt\n",
36 | "import matplotlib.image as mpimg"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "## Helper functions"
44 | ]
45 | },
46 | {
47 | "attachments": {},
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "Q: What kind of preprocessing am I doing here? Why do you think I'm doing it?"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {},
58 | "outputs": [],
59 | "source": [
60 | "def extract_features(img_path, model):\n",
61 | " \"\"\"\n",
62 | " Extract features from image data using pretrained model (e.g. VGG16)\n",
63 | " \"\"\"\n",
64 | " # Define input image shape - remember we need to reshape\n",
65 | " input_shape = (224, 224, 3)\n",
66 | " # load image from file path\n",
67 | " img = load_img(img_path, target_size=(input_shape[0], \n",
68 | " input_shape[1]))\n",
69 | " # convert to array\n",
70 | " img_array = img_to_array(img)\n",
71 | " # expand to fit dimensions\n",
72 | " expanded_img_array = np.expand_dims(img_array, axis=0)\n",
73 | " # preprocess image - see last week's notebook\n",
74 | " preprocessed_img = preprocess_input(expanded_img_array)\n",
75 | " # use the predict function to create feature representation\n",
76 | " features = model.predict(preprocessed_img, verbose=False)\n",
77 | " # flatten\n",
78 | " flattened_features = features.flatten()\n",
79 | " # normalise features\n",
80 | " normalized_features = flattened_features / norm(features)\n",
81 | " return flattened_features"
82 | ]
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "metadata": {},
87 | "source": [
88 | "# Image search"
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "## Load VGG16"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "model = VGG16(weights='imagenet', \n",
105 | " include_top=False,\n",
106 | " pooling='avg',\n",
107 | " input_shape=(224, 224, 3))"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {},
113 | "source": [
114 | "## Extract features from single image"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": null,
120 | "metadata": {},
121 | "outputs": [],
122 | "source": [
123 | "target_image = os.path.join(\"..\", \"..\", \"cds-vis-data\", \"data\", \"img\", \"florence.jpg\")\n",
124 | "features = extract_features(target_image, model)"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": null,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "features"
134 | ]
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "metadata": {},
139 | "source": [
140 | "## Iterate over folder"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": null,
146 | "metadata": {},
147 | "outputs": [],
148 | "source": [
149 | "# path to the datasets\n",
150 | "root_dir = os.path.join(\"..\", \"..\", \"cds-vis-data\", \"flowers\")\n",
151 | "filenames = [root_dir + \"/\" + name for name in sorted(os.listdir(root_dir))]"
152 | ]
153 | },
154 | {
155 | "cell_type": "markdown",
156 | "metadata": {},
157 | "source": [
158 | "__Extract features for each image__"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": null,
164 | "metadata": {},
165 | "outputs": [],
166 | "source": [
167 | "feature_list = []\n",
168 | "# iterate over all files with a progress bar\n",
169 | "for i in notebook.tqdm(range(len(filenames))):\n",
170 | " feature_list.append(extract_features(filenames[i], model))"
171 | ]
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {},
176 | "source": [
177 | "## Nearest neighbours"
178 | ]
179 | },
180 | {
181 | "attachments": {},
182 | "cell_type": "markdown",
183 | "metadata": {},
184 | "source": [
185 | "Once we have our *database* of extracted embeddings, we can then use K-Nearest Neighbours to find similar images."
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "metadata": {},
192 | "outputs": [],
193 | "source": [
194 | "from sklearn.neighbors import NearestNeighbors\n",
195 | "neighbors = NearestNeighbors(n_neighbors=10, \n",
196 | " algorithm='brute',\n",
197 | " metric='cosine').fit(feature_list)"
198 | ]
199 | },
200 | {
201 | "cell_type": "markdown",
202 | "metadata": {},
203 | "source": [
204 | "__Calculate nearest neighbours for target__"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": null,
210 | "metadata": {},
211 | "outputs": [],
212 | "source": [
213 | "distances, indices = neighbors.kneighbors([feature_list[250]])"
214 | ]
215 | },
216 | {
217 | "cell_type": "markdown",
218 | "metadata": {},
219 | "source": [
220 | "__Save indices, print data__"
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": null,
226 | "metadata": {},
227 | "outputs": [],
228 | "source": [
229 | "idxs = []\n",
230 | "for i in range(1,6):\n",
231 | " print(distances[0][i], indices[0][i])\n",
232 | " idxs.append(indices[0][i])"
233 | ]
234 | },
235 | {
236 | "cell_type": "markdown",
237 | "metadata": {},
238 | "source": [
239 | "__Plot target image__"
240 | ]
241 | },
242 | {
243 | "cell_type": "code",
244 | "execution_count": null,
245 | "metadata": {},
246 | "outputs": [],
247 | "source": [
248 | "plt.imshow(mpimg.imread(filenames[250]))"
249 | ]
250 | },
251 | {
252 | "cell_type": "markdown",
253 | "metadata": {},
254 | "source": [
255 | "__Plot close images__"
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": null,
261 | "metadata": {},
262 | "outputs": [],
263 | "source": [
264 | "plt.imshow(mpimg.imread(filenames[249]))"
265 | ]
266 | },
267 | {
268 | "cell_type": "markdown",
269 | "metadata": {},
270 | "source": [
271 | "__Plot target and top 3 closest together__"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": null,
277 | "metadata": {},
278 | "outputs": [],
279 | "source": [
280 | "# plt target\n",
281 | "plt.imshow(mpimg.imread(filenames[250]))\n",
282 | "\n",
283 | "# plot 3 most similar\n",
284 | "f, axarr = plt.subplots(1,3)\n",
285 | "axarr[0].imshow(mpimg.imread(filenames[idxs[0]]))\n",
286 | "axarr[1].imshow(mpimg.imread(filenames[idxs[1]]))\n",
287 | "axarr[2].imshow(mpimg.imread(filenames[idxs[2]]))"
288 | ]
289 | },
290 | {
291 | "attachments": {},
292 | "cell_type": "markdown",
293 | "metadata": {},
294 | "source": [
295 | "## Simple style transfer"
296 | ]
297 | },
298 | {
299 | "attachments": {},
300 | "cell_type": "markdown",
301 | "metadata": {},
302 | "source": [
303 | "__Load a quick style transfer model from TF Hub__\n",
304 | "\n",
305 | "You can find more details [here](https://www.tensorflow.org/hub/tutorials/tf2_arbitrary_image_stylization).\n",
306 | "\n",
307 | "You can also play around with an interactive version with the same model at [this website](https://reiinakano.com/arbitrary-image-stylization-tfjs/)."
308 | ]
309 | },
310 | {
311 | "cell_type": "code",
312 | "execution_count": null,
313 | "metadata": {},
314 | "outputs": [],
315 | "source": [
316 | "from utils.styletransfer import st_load, show_n\n",
317 | "# Load TF-Hub module.\n",
318 | "hub_handle = 'https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2'\n",
319 | "hub_module = hub.load(hub_handle)"
320 | ]
321 | },
322 | {
323 | "attachments": {},
324 | "cell_type": "markdown",
325 | "metadata": {},
326 | "source": [
327 | "__Load the content image and the style image__"
328 | ]
329 | },
330 | {
331 | "cell_type": "code",
332 | "execution_count": null,
333 | "metadata": {},
334 | "outputs": [],
335 | "source": [
336 | "content_image = st_load(os.path.join(\"..\", \"..\", \"cds-vis-data\", \"data\", \"img\", \"florence.jpg\"))\n",
337 | "style_image = st_load(os.path.join(\"..\", \"..\", \"cds-vis-data\", \"data\", \"img\", \"starry_night.jpg\"))"
338 | ]
339 | },
340 | {
341 | "attachments": {},
342 | "cell_type": "markdown",
343 | "metadata": {},
344 | "source": [
345 | "__Process using the model__"
346 | ]
347 | },
348 | {
349 | "cell_type": "code",
350 | "execution_count": null,
351 | "metadata": {},
352 | "outputs": [],
353 | "source": [
354 | "outputs = hub_module(content_image, style_image)\n",
355 | "stylized_image = outputs[0]"
356 | ]
357 | },
358 | {
359 | "attachments": {},
360 | "cell_type": "markdown",
361 | "metadata": {},
362 | "source": [
363 | "__Show content, style, and stylized image__"
364 | ]
365 | },
366 | {
367 | "cell_type": "code",
368 | "execution_count": null,
369 | "metadata": {},
370 | "outputs": [],
371 | "source": [
372 | "show_n([content_image, style_image, stylized_image], \n",
373 | " titles=['Original content image', 'Style image', 'Stylized image'])"
374 | ]
375 | },
376 | {
377 | "attachments": {},
378 | "cell_type": "markdown",
379 | "metadata": {},
380 | "source": [
381 | "### Tasks\n",
382 | "\n",
383 | "- Take the code in this notebook and turn it into a Python script. You can then add this to the repo for your Assignment 1 solution for creating doing image search\n",
384 | " - I.e. your Assignment 1 repo would contain both code for image search using colour histograms *and* for image search using a pretrained CNN."
385 | ]
386 | }
387 | ],
388 | "metadata": {
389 | "kernelspec": {
390 | "display_name": "Python 3",
391 | "language": "python",
392 | "name": "python3"
393 | },
394 | "language_info": {
395 | "codemirror_mode": {
396 | "name": "ipython",
397 | "version": 3
398 | },
399 | "file_extension": ".py",
400 | "mimetype": "text/x-python",
401 | "name": "python",
402 | "nbconvert_exporter": "python",
403 | "pygments_lexer": "ipython3",
404 | "version": "3.10.12"
405 | }
406 | },
407 | "nbformat": 4,
408 | "nbformat_minor": 4
409 | }
410 |
--------------------------------------------------------------------------------
/nbs/session2_inclass_rdkm.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Basic Image Processing with ```OpenCV```"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "In this session, we'll be looking at how to explore the different colour channels that comprise an image.\n",
15 | "\n",
16 | "By the end of this class, you'll be able to extract colour histograms from images and qualitatively compare the results."
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "metadata": {
23 | "tags": []
24 | },
25 | "outputs": [],
26 | "source": [
27 | "# We need to include the home directory in our path, so we can read in our own module.\n",
28 | "import os\n",
29 | "\n",
30 | "# image processing tools\n",
31 | "import cv2\n",
32 | "import numpy as np\n",
33 | "\n",
34 | "# utility functions for this course\n",
35 | "import sys\n",
36 | "sys.path.append(os.path.join(\"..\"))\n",
37 | "from utils.imutils import jimshow\n",
38 | "from utils.imutils import jimshow_channel\n",
39 | "\n",
40 | "# plotting tool\n",
41 | "import matplotlib.pyplot as plt"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "## Loading an image\n",
49 | "\n",
50 | "We'll start by loading an image to work with - we're going to use the image in [data/img](../data/img/) called ```terasse.jpeg```."
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {
57 | "tags": []
58 | },
59 | "outputs": [],
60 | "source": [
61 | "# define filepath\n",
62 | "filename = os.path.join(\"..\", \"data\", \"img\", \"terasse.jpeg\")"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "metadata": {
69 | "tags": []
70 | },
71 | "outputs": [],
72 | "source": [
73 | "# load image\n",
74 | "image = cv2.imread(filename)"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": null,
80 | "metadata": {
81 | "tags": []
82 | },
83 | "outputs": [],
84 | "source": [
85 | "# print the shape of this image\n",
86 | "image.shape"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {
93 | "tags": []
94 | },
95 | "outputs": [],
96 | "source": [
97 | "# display the image\n",
98 | "jimshow(image)"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "## Splitting channels"
106 | ]
107 | },
108 | {
109 | "cell_type": "markdown",
110 | "metadata": {},
111 | "source": [
112 | "We saw last week and in the lecture today how images are represented digitally as 3-dimensional ```numpy``` arrays. These 3-dimensions are ```heigh```, ```width```, and ```colour channels```.\n",
113 | "\n",
114 | "We're working primarily with RGB colour models in the course. But rememember that ```OpenCV``` represents these as BGR!"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": null,
120 | "metadata": {
121 | "tags": []
122 | },
123 | "outputs": [],
124 | "source": [
125 | "# split the image into seperate colour channels called B, G, R\n",
126 | "(B, G, R) = cv2.split(image)"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": null,
132 | "metadata": {
133 | "tags": []
134 | },
135 | "outputs": [],
136 | "source": [
137 | "# use the jimshow_channel function to display individual channels\n",
138 | "jimshow_channel(R, \"Red\")"
139 | ]
140 | },
141 | {
142 | "cell_type": "markdown",
143 | "metadata": {},
144 | "source": [
145 | "You'll notice here that the displayed image is entirely greyscale here. \n",
146 | "\n",
147 | "**Question:** In your groups, can you explain why this is the case?"
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {},
153 | "source": [
154 | "## Showing channels in colour"
155 | ]
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {},
160 | "source": [
161 | "If we want to display the individual colour channels using their \"real\" colours, we need to make a little bit of a workaround. \n",
162 | "\n",
163 | "Essentially, we have to manually set all of the pixels on the other channels to zero."
164 | ]
165 | },
166 | {
167 | "cell_type": "markdown",
168 | "metadata": {},
169 | "source": [
170 | "__Creating an empty ```numpy``` array__"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": null,
176 | "metadata": {
177 | "tags": []
178 | },
179 | "outputs": [],
180 | "source": [
181 | "# create an array of zeroes\n",
182 | "zeros = np.zeros(image.shape[:2], dtype = \"uint8\")"
183 | ]
184 | },
185 | {
186 | "cell_type": "markdown",
187 | "metadata": {},
188 | "source": [
189 | "We then use this array to create an artificial image comprising the colour channel we want to see, with all zeroes on the other channels."
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": null,
195 | "metadata": {
196 | "tags": []
197 | },
198 | "outputs": [],
199 | "source": [
200 | "# show only blue\n",
201 | "jimshow(cv2.merge([zeros, zeros, B]))"
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": null,
207 | "metadata": {
208 | "tags": []
209 | },
210 | "outputs": [],
211 | "source": [
212 | "# show only green\n",
213 | "jimshow(cv2.merge([zeros, G, zeros]))"
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": null,
219 | "metadata": {
220 | "tags": []
221 | },
222 | "outputs": [],
223 | "source": [
224 | "# show only red\n",
225 | "jimshow(cv2.merge([zeros, zeros, R]))"
226 | ]
227 | },
228 | {
229 | "cell_type": "markdown",
230 | "metadata": {},
231 | "source": [
232 | "## Histograms"
233 | ]
234 | },
235 | {
236 | "cell_type": "markdown",
237 | "metadata": {},
238 | "source": [
239 | "In order to create a 1D (greyscale) histogram, we first need to manually convert the image to greyscale.\n",
240 | "\n",
241 | "We can do this in open CV using the function ```cv2.cvtColor()```."
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": null,
247 | "metadata": {
248 | "tags": []
249 | },
250 | "outputs": [],
251 | "source": [
252 | "# converting to greyscale\n",
253 | "greyed_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)"
254 | ]
255 | },
256 | {
257 | "cell_type": "markdown",
258 | "metadata": {},
259 | "source": [
260 | "__A note on ```COLOR_BRG2GRAY```__"
261 | ]
262 | },
263 | {
264 | "cell_type": "code",
265 | "execution_count": null,
266 | "metadata": {},
267 | "outputs": [],
268 | "source": [
269 | "jimshow_channel(greyed_image, \"Greyscale\")"
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {},
275 | "source": [
276 | "## A quick greyscale histogram using ```matplotlib```\n",
277 | "\n",
278 | "We're going to learn two things here: 1) how to use the Python plotting library ```matplotlib```; 2) how to create quick greyscale histograms."
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": null,
284 | "metadata": {
285 | "tags": []
286 | },
287 | "outputs": [],
288 | "source": [
289 | "# plotting with matplotlib\n",
290 | "# Create figure\n",
291 | "plt.figure()\n",
292 | "# Add histogram\n",
293 | "plt.hist(image.flatten(), 256, [0,256])\n",
294 | "# Plot title\n",
295 | "plt.title(\"Greyscale histogram\")\n",
296 | "plt.xlabel(\"Bins\")\n",
297 | "plt.ylabel(\"# of Pixels\")\n",
298 | "plt.show()"
299 | ]
300 | },
301 | {
302 | "cell_type": "markdown",
303 | "metadata": {},
304 | "source": [
305 | "## Plotting color histograms"
306 | ]
307 | },
308 | {
309 | "cell_type": "markdown",
310 | "metadata": {},
311 | "source": [
312 | "```OpenCV``` has a built in tool for extraction histograms for specific colour channels. Unfortunately, the syntax is quite confusing! If you look at the documentation, you'll see something like this:"
313 | ]
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "metadata": {},
318 | "source": [
319 | "```cv2.calcHist(images, channels, mask, histSize, ranges[, hist[, accumulate]])```\n",
320 | "\n",
321 | "But we can unpack this a little in the following way:\n",
322 | "\n",
323 | "- images : it is the source image of type uint8 or float32 represented as “[img]”.\n",
324 | "- channels : it is the index of channel for which we calculate histogram. \n",
325 | " - For grayscale image, its value is [0] and\n",
326 | " - color image, you can pass [0], [1] or [2] to calculate histogram of blue, green or red channel respectively.\n",
327 | "- mask : mask image. To find histogram of full image, it is given as “None”.\n",
328 | "- histSize : this represents our BIN count. For full scale, we pass [256].\n",
329 | "- ranges : this is our RANGE. Normally, it is [0,256]."
330 | ]
331 | },
332 | {
333 | "cell_type": "markdown",
334 | "metadata": {},
335 | "source": [
336 | "__Plot only blue__"
337 | ]
338 | },
339 | {
340 | "cell_type": "code",
341 | "execution_count": null,
342 | "metadata": {},
343 | "outputs": [],
344 | "source": [
345 | "hist = cv2.calcHist([image], [0], None, [256], [0,256])\n",
346 | "plt.plot(hist, color=\"Blue\")"
347 | ]
348 | },
349 | {
350 | "cell_type": "markdown",
351 | "metadata": {},
352 | "source": [
353 | "__Plot only green__"
354 | ]
355 | },
356 | {
357 | "cell_type": "code",
358 | "execution_count": null,
359 | "metadata": {},
360 | "outputs": [],
361 | "source": [
362 | "hist = cv2.calcHist([image], [1], None, [256], [0,256])\n",
363 | "plt.plot(hist, color=\"Green\")"
364 | ]
365 | },
366 | {
367 | "cell_type": "markdown",
368 | "metadata": {},
369 | "source": [
370 | "__Plot only red__"
371 | ]
372 | },
373 | {
374 | "cell_type": "code",
375 | "execution_count": null,
376 | "metadata": {},
377 | "outputs": [],
378 | "source": [
379 | "hist = cv2.calcHist([image], [2], None, [256], [0,256])\n",
380 | "plt.plot(hist, color=\"Red\")"
381 | ]
382 | },
383 | {
384 | "cell_type": "markdown",
385 | "metadata": {},
386 | "source": [
387 | "__An alternative approach__"
388 | ]
389 | },
390 | {
391 | "cell_type": "code",
392 | "execution_count": null,
393 | "metadata": {},
394 | "outputs": [],
395 | "source": [
396 | "(BLUE, GREEN, RED) = cv2.split(image)"
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": null,
402 | "metadata": {},
403 | "outputs": [],
404 | "source": [
405 | "hist = cv2.calcHist([RED], [0], None, [256], [0,256])\n",
406 | "plt.plot(hist, color=\"r\")"
407 | ]
408 | },
409 | {
410 | "cell_type": "code",
411 | "execution_count": null,
412 | "metadata": {
413 | "tags": []
414 | },
415 | "outputs": [],
416 | "source": [
417 | "# split channels\n",
418 | "channels = cv2.split(image)\n",
419 | "# names of colours\n",
420 | "colors = (\"b\", \"g\", \"r\")\n",
421 | "# create plot\n",
422 | "plt.figure()\n",
423 | "# add title\n",
424 | "plt.title(\"Histogram\")\n",
425 | "# Add xlabel\n",
426 | "plt.xlabel(\"Bins\")\n",
427 | "# Add ylabel\n",
428 | "plt.ylabel(\"# of Pixels\")\n",
429 | "\n",
430 | "# for every tuple of channel, colour\n",
431 | "for (channel, color) in zip(channels, colors):\n",
432 | " # Create a histogram\n",
433 | " hist = cv2.calcHist([channel], [0], None, [256], [0, 256])\n",
434 | " # Plot histogram\n",
435 | " plt.plot(hist, color=color)\n",
436 | " # Set limits of x-axis\n",
437 | " plt.xlim([0, 256])\n",
438 | "# Show plot\n",
439 | "plt.show()"
440 | ]
441 | },
442 | {
443 | "cell_type": "markdown",
444 | "metadata": {},
445 | "source": [
446 | "## Exercise\n",
447 | "\n",
448 | "We've now seen how to load images and split them into their separate colour channels. We've also seen how we can use this to extract colour histograms showing distributions of pixel intensities across each channel. Lastly we've seen how we can use this to plot colour histograms for any given image.\n",
449 | "\n",
450 | "- In your groups, rework the code from today to create and save colour histograms for each of the images in the data folder."
451 | ]
452 | }
453 | ],
454 | "metadata": {
455 | "kernelspec": {
456 | "display_name": "Python 3 (ipykernel)",
457 | "language": "python",
458 | "name": "python3"
459 | },
460 | "language_info": {
461 | "codemirror_mode": {
462 | "name": "ipython",
463 | "version": 3
464 | },
465 | "file_extension": ".py",
466 | "mimetype": "text/x-python",
467 | "name": "python",
468 | "nbconvert_exporter": "python",
469 | "pygments_lexer": "ipython3",
470 | "version": "3.9.13"
471 | },
472 | "vscode": {
473 | "interpreter": {
474 | "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
475 | }
476 | }
477 | },
478 | "nbformat": 4,
479 | "nbformat_minor": 4
480 | }
481 |
--------------------------------------------------------------------------------
/nbs/session5_inclass_rdkm.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "# Image Classification with ```scikit-learn```"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "## Import packages"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "metadata": {},
22 | "outputs": [],
23 | "source": [
24 | "import os\n",
25 | "import sys\n",
26 | "sys.path.append(\"..\")\n",
27 | "\n",
28 | "# Import teaching utils\n",
29 | "import numpy as np\n",
30 | "import utils.classifier_utils as clf_util\n",
31 | "\n",
32 | "# Import sklearn metrics\n",
33 | "from sklearn import metrics\n",
34 | "from sklearn.datasets import fetch_openml\n",
35 | "from sklearn.model_selection import train_test_split\n",
36 | "from sklearn.linear_model import LogisticRegression\n",
37 | "from sklearn.metrics import accuracy_score"
38 | ]
39 | },
40 | {
41 | "attachments": {},
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "## Fetch data\n",
46 | "\n",
47 | "We're using something called the MNIST dataset, a very famous dataset in image classification. It even has it's own [Wikipedia page](https://en.wikipedia.org/wiki/MNIST_database)!\n",
48 | "\n",
49 | "For this session, we're just fetching the data from a website called [OpenML](https://www.openml.org/), an open collection of datasets and experiments. I'd recommend digging around in there and seeing what kinds of datasets are available for experiments - there might be something for an exam project.\n",
50 | "\n",
51 | "We load the data here using a function from ```scikit-learn``` which takes the data from OpenML and assigns it directly to two variables."
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {},
58 | "outputs": [],
59 | "source": [
60 | "X, y = fetch_openml('mnist_784', \n",
61 | " version=1, \n",
62 | " parser=\"auto\",\n",
63 | " return_X_y=True)"
64 | ]
65 | },
66 | {
67 | "attachments": {},
68 | "cell_type": "markdown",
69 | "metadata": {},
70 | "source": [
71 | "Let's first check what the labels are in our data. This is what we're trying to predict."
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": null,
77 | "metadata": {},
78 | "outputs": [],
79 | "source": [
80 | "sorted(set(y))"
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {},
86 | "source": [
87 | "Depending on the versions of ```pandas``` and ```sklearn``` that you're using, you may need to expressly convert these to ```numpy``` arrays."
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": null,
93 | "metadata": {},
94 | "outputs": [],
95 | "source": [
96 | "X = np.array(X)\n",
97 | "y = np.array(y)"
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | "__Predifine classes and number of classes__"
105 | ]
106 | },
107 | {
108 | "attachments": {},
109 | "cell_type": "markdown",
110 | "metadata": {},
111 | "source": [
112 | "We're going to start by making variables which contain the list of classes, and the total number of classes. This isn't strictly necessary, but we'll be using them later on in our notebook."
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": null,
118 | "metadata": {},
119 | "outputs": [],
120 | "source": [
121 | "classes = sorted(set(y))\n",
122 | "nclasses = len(classes)"
123 | ]
124 | },
125 | {
126 | "attachments": {},
127 | "cell_type": "markdown",
128 | "metadata": {},
129 | "source": [
130 | "**Question!** \n",
131 | "\n",
132 | "What does the data look like? What can you tell me about each individual image?"
133 | ]
134 | },
135 | {
136 | "attachments": {},
137 | "cell_type": "markdown",
138 | "metadata": {},
139 | "source": [
140 | "__Plot random sample__\n",
141 | "\n",
142 | "I've created a util function called ```plot_sample()``` which simply plots a random sample of the data.\n",
143 | "\n",
144 | "**Group task:** Go inspect the ```plot_sample()``` function - can you explain what it does and how it works?"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": null,
150 | "metadata": {},
151 | "outputs": [],
152 | "source": [
153 | "clf_util.plot_sample(X, y, classes, 5)"
154 | ]
155 | },
156 | {
157 | "cell_type": "markdown",
158 | "metadata": {},
159 | "source": [
160 | "## Create training and test dataset"
161 | ]
162 | },
163 | {
164 | "attachments": {},
165 | "cell_type": "markdown",
166 | "metadata": {},
167 | "source": [
168 | "We then use our ```train_test_split()``` function to split the data into a training and a test set.\n",
169 | "\n",
170 | "Notice that we're not defining this split in terms of percentages, but instead using whole numbers. We have 7500 training examples, and 2500 test examples.\n",
171 | "\n",
172 | "**Group task:** What effect does it have if you make the training split larger or smaller? Test it!"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": null,
178 | "metadata": {},
179 | "outputs": [],
180 | "source": [
181 | "X_train, X_test, y_train, y_test = train_test_split(X, \n",
182 | " y, \n",
183 | " random_state=42,\n",
184 | " train_size=7500, \n",
185 | " test_size=2500)"
186 | ]
187 | },
188 | {
189 | "attachments": {},
190 | "cell_type": "markdown",
191 | "metadata": {},
192 | "source": [
193 | "As with our simple image search algorithm, it's a good idea to scale our data when working with images.\n",
194 | "\n",
195 | "Previously, we saw how to do what was called ```MinMax`` scaling. A simpler approach - and often just as effective - is simple to divide by the maximum possible value."
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": null,
201 | "metadata": {},
202 | "outputs": [],
203 | "source": [
204 | "#scaling the features\n",
205 | "X_train_scaled = X_train/255.0\n",
206 | "X_test_scaled = X_test/255.0"
207 | ]
208 | },
209 | {
210 | "cell_type": "markdown",
211 | "metadata": {},
212 | "source": [
213 | "## Train a logistic regression model"
214 | ]
215 | },
216 | {
217 | "attachments": {},
218 | "cell_type": "markdown",
219 | "metadata": {},
220 | "source": [
221 | "Next, I'm defining my Logistic Regression classifier. I've included a number of parameters here which we haven't yet looked at.\n",
222 | "\n",
223 | "**Group task:** Check out the ```scikit-learn``` documentation for ```LogisticRegression()``` and read how each of the parameters here are defined. Can you understand what each of them do? Try changing some of them to see how it affects performance!"
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": null,
229 | "metadata": {},
230 | "outputs": [],
231 | "source": [
232 | "clf = LogisticRegression(tol=0.1, \n",
233 | " solver='saga',\n",
234 | " multi_class='multinomial').fit(X_train_scaled, y_train)"
235 | ]
236 | },
237 | {
238 | "attachments": {},
239 | "cell_type": "markdown",
240 | "metadata": {},
241 | "source": [
242 | "When our model is finished learning, we have a collection of weights that the model has learned.\n",
243 | "\n",
244 | "**Group task:** Before running the following cell, try to figure out what shape this matrix will be."
245 | ]
246 | },
247 | {
248 | "cell_type": "code",
249 | "execution_count": null,
250 | "metadata": {},
251 | "outputs": [],
252 | "source": [
253 | "#to check the shape of the coefficient (weights) matrix\n",
254 | "clf.coef_.shape"
255 | ]
256 | },
257 | {
258 | "cell_type": "markdown",
259 | "metadata": {},
260 | "source": [
261 | "## Interpreting the coefficients"
262 | ]
263 | },
264 | {
265 | "cell_type": "markdown",
266 | "metadata": {},
267 | "source": [
268 | "We can plot the coefficients for our model, showing which 'features' are most informative when it comes to predicting each class"
269 | ]
270 | },
271 | {
272 | "cell_type": "code",
273 | "execution_count": null,
274 | "metadata": {},
275 | "outputs": [],
276 | "source": [
277 | "clf_util.plot_coefs(clf.coef_, nclasses)"
278 | ]
279 | },
280 | {
281 | "cell_type": "markdown",
282 | "metadata": {},
283 | "source": [
284 | "## Individual predictions"
285 | ]
286 | },
287 | {
288 | "cell_type": "markdown",
289 | "metadata": {},
290 | "source": [
291 | "We can then use this model to predict the class of an individual data point."
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": null,
297 | "metadata": {},
298 | "outputs": [],
299 | "source": [
300 | "clf_util.plot_individual(X_test_scaled, \n",
301 | " y_test, \n",
302 | " 50)"
303 | ]
304 | },
305 | {
306 | "cell_type": "markdown",
307 | "metadata": {},
308 | "source": [
309 | "We can compute the conditional probability for each class and then plot the probability distribution."
310 | ]
311 | },
312 | {
313 | "cell_type": "markdown",
314 | "metadata": {},
315 | "source": [
316 | "## Show the distribution of probabilities"
317 | ]
318 | },
319 | {
320 | "cell_type": "code",
321 | "execution_count": null,
322 | "metadata": {},
323 | "outputs": [],
324 | "source": [
325 | "clf_util.plot_probs(X_test_scaled, 50, clf, classes)"
326 | ]
327 | },
328 | {
329 | "cell_type": "markdown",
330 | "metadata": {},
331 | "source": [
332 | "We can find the class with the highest probability. \n",
333 | "\n",
334 | "If this matches the actual label for the first test sample, then our prediction is correct."
335 | ]
336 | },
337 | {
338 | "cell_type": "markdown",
339 | "metadata": {},
340 | "source": [
341 | "## Accuracy"
342 | ]
343 | },
344 | {
345 | "cell_type": "markdown",
346 | "metadata": {},
347 | "source": [
348 | "We can calculate predictions for all data the scaled test data."
349 | ]
350 | },
351 | {
352 | "cell_type": "code",
353 | "execution_count": null,
354 | "metadata": {},
355 | "outputs": [],
356 | "source": [
357 | "y_pred = clf.predict(X_test_scaled)"
358 | ]
359 | },
360 | {
361 | "cell_type": "markdown",
362 | "metadata": {},
363 | "source": [
364 | "We then calculate metrics for accuracy based on comparing the predicted labels with the actual labels."
365 | ]
366 | },
367 | {
368 | "cell_type": "code",
369 | "execution_count": null,
370 | "metadata": {},
371 | "outputs": [],
372 | "source": [
373 | "#method 2\n",
374 | "accuracy = accuracy_score(y_test, y_pred)\n",
375 | "print(accuracy)"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": null,
381 | "metadata": {},
382 | "outputs": [],
383 | "source": [
384 | "cm = metrics.classification_report(y_test, y_pred)\n",
385 | "print(cm)"
386 | ]
387 | },
388 | {
389 | "cell_type": "markdown",
390 | "metadata": {},
391 | "source": [
392 | "This can be visualised using a confusion matrix"
393 | ]
394 | },
395 | {
396 | "cell_type": "code",
397 | "execution_count": null,
398 | "metadata": {},
399 | "outputs": [],
400 | "source": [
401 | "clf_util.plot_cm(y_test, y_pred, normalized=False)"
402 | ]
403 | },
404 | {
405 | "cell_type": "markdown",
406 | "metadata": {},
407 | "source": [
408 | "We can also normalize the confusion matrix to show percentages instead of raw values"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": null,
414 | "metadata": {},
415 | "outputs": [],
416 | "source": [
417 | "clf_util.plot_cm(y_test, y_pred, normalized=True)"
418 | ]
419 | },
420 | {
421 | "cell_type": "markdown",
422 | "metadata": {},
423 | "source": [
424 | "## Test on new data\n",
425 | "\n",
426 | "We want to know how the model performs now on a completely unseen data point.\n",
427 | "\n",
428 | "Find another image of a hand-drawn number via internet searches, or draw one yourself using a tool on your own laptop (Preview on MacOS; Paint on MS; etc)."
429 | ]
430 | },
431 | {
432 | "cell_type": "code",
433 | "execution_count": null,
434 | "metadata": {},
435 | "outputs": [],
436 | "source": [
437 | "import cv2\n",
438 | "import matplotlib.pyplot as plt\n",
439 | "from utils.imutils import jimshow as show\n",
440 | "from utils.imutils import jimshow_channel as show_channel"
441 | ]
442 | },
443 | {
444 | "cell_type": "markdown",
445 | "metadata": {},
446 | "source": [
447 | "Question: Where should scaling come in this pipeline? Does it matter at which point you do the scaling?"
448 | ]
449 | },
450 | {
451 | "cell_type": "code",
452 | "execution_count": null,
453 | "metadata": {},
454 | "outputs": [],
455 | "source": [
456 | "test_image = cv2.imread(#load image)"
457 | ]
458 | },
459 | {
460 | "cell_type": "markdown",
461 | "metadata": {},
462 | "source": [
463 | "Use bitwise not to invert regular grayscale"
464 | ]
465 | },
466 | {
467 | "cell_type": "code",
468 | "execution_count": null,
469 | "metadata": {},
470 | "outputs": [],
471 | "source": [
472 | "gray = cv2.bitwise_not(cv2.cvtColor(test_image, cv2.COLOR_BGR2GRAY))\n",
473 | "plt.imshow(gray, cmap=plt.cm.gray)"
474 | ]
475 | },
476 | {
477 | "cell_type": "markdown",
478 | "metadata": {},
479 | "source": [
480 | "Resize down to 28x28 image"
481 | ]
482 | },
483 | {
484 | "cell_type": "code",
485 | "execution_count": null,
486 | "metadata": {},
487 | "outputs": [],
488 | "source": [
489 | "compressed = cv2.resize(gray, (28, 28), interpolation=cv2.INTER_AREA)\n",
490 | "plt.imshow(compressed, cmap=plt.cm.gray)"
491 | ]
492 | },
493 | {
494 | "cell_type": "markdown",
495 | "metadata": {},
496 | "source": [
497 | "### Qualitative evaluation"
498 | ]
499 | },
500 | {
501 | "cell_type": "code",
502 | "execution_count": null,
503 | "metadata": {},
504 | "outputs": [],
505 | "source": [
506 | "clf_util.predict_unseen(compressed, clf, classes)"
507 | ]
508 | },
509 | {
510 | "cell_type": "code",
511 | "execution_count": null,
512 | "metadata": {},
513 | "outputs": [],
514 | "source": [
515 | "clf_util.prediction_coefficients(compressed, clf, classes)"
516 | ]
517 | },
518 | {
519 | "attachments": {},
520 | "cell_type": "markdown",
521 | "metadata": {},
522 | "source": [
523 | "## Tasks\n",
524 | "\n",
525 | "- Check out the interactive visualization at the following website. How does it help with interpreting and understanding how the images are being classified?\n",
526 | " - https://adamharley.com/nn_vis/mlp/3d.html\n",
527 | "- Go back to the start of this notebook and work through again in groups. Stop at the point where it says \"Group Tasks\" and work on this in groups of 3 or 4. \n",
528 | " - If you get stuck, you can skip ahead to the next one, but try to cover all of them.\n",
529 | "- Try the same thing using the ```MLPCLassifier()``` from ```scikit-learn```\n",
530 | " - If someone from your group wasn't there, explain to them how it works!"
531 | ]
532 | },
533 | {
534 | "cell_type": "markdown",
535 | "metadata": {},
536 | "source": []
537 | }
538 | ],
539 | "metadata": {
540 | "kernelspec": {
541 | "display_name": "Python 3",
542 | "language": "python",
543 | "name": "python3"
544 | },
545 | "language_info": {
546 | "codemirror_mode": {
547 | "name": "ipython",
548 | "version": 3
549 | },
550 | "file_extension": ".py",
551 | "mimetype": "text/x-python",
552 | "name": "python",
553 | "nbconvert_exporter": "python",
554 | "pygments_lexer": "ipython3",
555 | "version": "3.9.13"
556 | },
557 | "vscode": {
558 | "interpreter": {
559 | "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
560 | }
561 | }
562 | },
563 | "nbformat": 4,
564 | "nbformat_minor": 4
565 | }
566 |
--------------------------------------------------------------------------------
/nbs/session11_inclass_rdkm.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "49582d16-8369-4de0-ae31-e9d7b1a39213",
6 | "metadata": {},
7 | "source": [
8 | "# OCR - From images to text"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "fa01cbea",
14 | "metadata": {},
15 | "source": [
16 | "In this notebook, we're going to see how we can extract text from images using the ```pytesseract``` library. However, we're going to touch on a lot of different skills we've learned this semester - including drawing on ideas from Language Analytics, too!"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "id": "0ed64493-bba7-491b-8b68-d87c65eaebf5",
23 | "metadata": {
24 | "execution": {
25 | "iopub.execute_input": "2022-05-02T08:19:58.666303Z",
26 | "iopub.status.busy": "2022-05-02T08:19:58.665804Z",
27 | "iopub.status.idle": "2022-05-02T08:19:58.675693Z",
28 | "shell.execute_reply": "2022-05-02T08:19:58.674344Z",
29 | "shell.execute_reply.started": "2022-05-02T08:19:58.666249Z"
30 | },
31 | "tags": []
32 | },
33 | "outputs": [],
34 | "source": [
35 | "# basic python tools\n",
36 | "import re, os, sys\n",
37 | "sys.path.append(\"..\")\n",
38 | "\n",
39 | "# OCR tools\n",
40 | "import cv2\n",
41 | "import pytesseract\n",
42 | "\n",
43 | "# util functions\n",
44 | "from utils.imutils import jimshow as show\n",
45 | "from utils.imutils import jimshow_channel as show_channel\n",
46 | "\n",
47 | "# data processing tools\n",
48 | "import numpy as np \n",
49 | "import pandas as pd \n",
50 | "\n",
51 | "# readymade spellchecker\n",
52 | "from autocorrect import Speller\n",
53 | "\n",
54 | "def clean_string(string):\n",
55 | " \"\"\"Removes punctuation to assist in OCR correction\"\"\"\n",
56 | " processed = string.replace(\"\\n\",\" \")\\\n",
57 | " .replace(\"\\n\\n\",\" \")\\\n",
58 | " .replace(\"__\",\" \")\\\n",
59 | " .replace(\" - \",\" \")\\\n",
60 | " .replace('-\"\"' ,\" \")\\\n",
61 | " .replace(\"|\", \"\")\\\n",
62 | " .replace(\"!\", \"\")\\\n",
63 | " .replace(\"\\s\\s\",\" \")\\\n",
64 | " .lstrip()\n",
65 | " return \" \".join(processed.split())"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "id": "b580d31b-1048-42ff-8117-efae16b4734c",
71 | "metadata": {},
72 | "source": [
73 | "## OCR using ```Tesseract```\n",
74 | "\n",
75 | "Tesseract/pytesseract is quite a rich library with lots of different functionality and small tweaks and tricks that can improve your OCR. Check out the documentation for more info:\n",
76 | "\n",
77 | "**Pytesseract:** [Github](https://github.com/h/pytesseract)
\n",
78 | "**Tesseract:** [Github](https://github.com/tesseract-ocr/tesseract); [Documentation](https://tesseract-ocr.github.io/)"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": null,
84 | "id": "2dbd3e08-0480-4ee3-acfb-90fa95a6d70c",
85 | "metadata": {
86 | "execution": {
87 | "iopub.execute_input": "2022-05-02T08:15:15.673722Z",
88 | "iopub.status.busy": "2022-05-02T08:15:15.673218Z",
89 | "iopub.status.idle": "2022-05-02T08:15:15.680060Z",
90 | "shell.execute_reply": "2022-05-02T08:15:15.679055Z",
91 | "shell.execute_reply.started": "2022-05-02T08:15:15.673668Z"
92 | },
93 | "tags": []
94 | },
95 | "outputs": [],
96 | "source": [
97 | "filepath = os.path.join(\"..\", \n",
98 | " \"..\",\n",
99 | " \"cds-viz-data\",\n",
100 | " \"data\", \n",
101 | " \"img\", \n",
102 | " \"jefferson.jpg\")"
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "id": "72312f24",
108 | "metadata": {},
109 | "source": [
110 | "The simplest way of using ```pytesseract``` is simply to call the ```.image_to_string()```. As the name suggests, this produces a single string with all of the text content found in the image:"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": null,
116 | "id": "ee09bc55-a216-40b9-aa60-5e6f9787c64c",
117 | "metadata": {
118 | "execution": {
119 | "iopub.execute_input": "2022-05-02T08:15:16.264825Z",
120 | "iopub.status.busy": "2022-05-02T08:15:16.263609Z",
121 | "iopub.status.idle": "2022-05-02T08:15:18.378799Z",
122 | "shell.execute_reply": "2022-05-02T08:15:18.377146Z",
123 | "shell.execute_reply.started": "2022-05-02T08:15:16.264743Z"
124 | },
125 | "tags": []
126 | },
127 | "outputs": [],
128 | "source": [
129 | "text = pytesseract.image_to_string(filepath)\n",
130 | "print(text)"
131 | ]
132 | },
133 | {
134 | "cell_type": "markdown",
135 | "id": "72f117bf",
136 | "metadata": {},
137 | "source": [
138 | "The library also has a method for returning the information as a dataframe which contains a detailed collection of information about its predictions:"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": null,
144 | "id": "0349b7da-5c32-4d32-b73a-ef264e8dad08",
145 | "metadata": {
146 | "execution": {
147 | "iopub.execute_input": "2022-05-02T08:15:40.014500Z",
148 | "iopub.status.busy": "2022-05-02T08:15:40.014007Z",
149 | "iopub.status.idle": "2022-05-02T08:15:42.005145Z",
150 | "shell.execute_reply": "2022-05-02T08:15:42.004540Z",
151 | "shell.execute_reply.started": "2022-05-02T08:15:40.014444Z"
152 | },
153 | "tags": []
154 | },
155 | "outputs": [],
156 | "source": [
157 | "df = pytesseract.image_to_data(filepath, \n",
158 | " output_type='data.frame')"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": null,
164 | "id": "62794e2e",
165 | "metadata": {},
166 | "outputs": [],
167 | "source": [
168 | "df"
169 | ]
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "id": "60caa12a-b4c1-4c8b-a355-3cba7566e9d3",
174 | "metadata": {},
175 | "source": [
176 | "## Preprocess with Open-CV"
177 | ]
178 | },
179 | {
180 | "cell_type": "markdown",
181 | "id": "0eb400fb-9209-4933-8227-2ced3534ed2b",
182 | "metadata": {},
183 | "source": [
184 | "Note that Tesseract on Github give a bunch of tips for how best to preprocess images to improve performance. \n",
185 | "\n",
186 | "You should have the skills to actually do all of these things using OpenCV: https://github.com/tesseract-ocr/tessdoc/blob/main/ImproveQuality.md#rescaling"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": null,
192 | "id": "388e4cd5-244a-48ef-b326-2747cb5bf7f2",
193 | "metadata": {
194 | "execution": {
195 | "iopub.execute_input": "2022-05-02T08:17:37.184050Z",
196 | "iopub.status.busy": "2022-05-02T08:17:37.183533Z",
197 | "iopub.status.idle": "2022-05-02T08:17:37.381681Z",
198 | "shell.execute_reply": "2022-05-02T08:17:37.380964Z",
199 | "shell.execute_reply.started": "2022-05-02T08:17:37.183994Z"
200 | },
201 | "tags": []
202 | },
203 | "outputs": [],
204 | "source": [
205 | "image = cv2.imread(filepath)"
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": null,
211 | "id": "568ffa91",
212 | "metadata": {},
213 | "outputs": [],
214 | "source": [
215 | "show(image)"
216 | ]
217 | },
218 | {
219 | "cell_type": "markdown",
220 | "id": "9e0ec601-241f-4ebf-afeb-e000ce06171a",
221 | "metadata": {},
222 | "source": [
223 | "__Crop__"
224 | ]
225 | },
226 | {
227 | "cell_type": "markdown",
228 | "id": "419b6b82",
229 | "metadata": {},
230 | "source": [
231 | "The first thing we want to do is to crop this around the center of the image to keep only the main text."
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": null,
237 | "id": "5a9b2f8e-f43f-4aba-a45a-216382e8c49e",
238 | "metadata": {
239 | "execution": {
240 | "iopub.execute_input": "2022-05-02T08:18:12.701700Z",
241 | "iopub.status.busy": "2022-05-02T08:18:12.701170Z",
242 | "iopub.status.idle": "2022-05-02T08:18:12.709571Z",
243 | "shell.execute_reply": "2022-05-02T08:18:12.708373Z",
244 | "shell.execute_reply.started": "2022-05-02T08:18:12.701645Z"
245 | }
246 | },
247 | "outputs": [],
248 | "source": [
249 | "(cX, cY) = (image.shape[1]//2, image.shape[0]//2)\n",
250 | "cropped = image[cY-750:cY+1150, cX-750:cX+700]"
251 | ]
252 | },
253 | {
254 | "cell_type": "markdown",
255 | "id": "8ea3d924-628a-44fb-a1f8-c735c1b782d7",
256 | "metadata": {},
257 | "source": [
258 | "__Greyscale__"
259 | ]
260 | },
261 | {
262 | "cell_type": "markdown",
263 | "id": "9cde247e",
264 | "metadata": {},
265 | "source": [
266 | "Next, we greyscale the image to attempt to remove extra noise."
267 | ]
268 | },
269 | {
270 | "cell_type": "code",
271 | "execution_count": null,
272 | "id": "25c7d07b-497b-4001-b4e1-86b42451daa9",
273 | "metadata": {
274 | "execution": {
275 | "iopub.execute_input": "2022-05-02T08:18:13.485604Z",
276 | "iopub.status.busy": "2022-05-02T08:18:13.485134Z",
277 | "iopub.status.idle": "2022-05-02T08:18:13.501766Z",
278 | "shell.execute_reply": "2022-05-02T08:18:13.500319Z",
279 | "shell.execute_reply.started": "2022-05-02T08:18:13.485550Z"
280 | }
281 | },
282 | "outputs": [],
283 | "source": [
284 | "grey = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)"
285 | ]
286 | },
287 | {
288 | "cell_type": "code",
289 | "execution_count": null,
290 | "id": "ec1ea391-b6f4-40c4-b7d6-0252c89c2e39",
291 | "metadata": {
292 | "execution": {
293 | "iopub.execute_input": "2022-05-02T08:20:03.459148Z",
294 | "iopub.status.busy": "2022-05-02T08:20:03.458653Z",
295 | "iopub.status.idle": "2022-05-02T08:20:04.357845Z",
296 | "shell.execute_reply": "2022-05-02T08:20:04.357240Z",
297 | "shell.execute_reply.started": "2022-05-02T08:20:03.459091Z"
298 | },
299 | "tags": []
300 | },
301 | "outputs": [],
302 | "source": [
303 | "show_channel(grey)"
304 | ]
305 | },
306 | {
307 | "cell_type": "markdown",
308 | "id": "543e2951-8ac4-494e-b644-88c90123d26c",
309 | "metadata": {},
310 | "source": [
311 | "__OCR again__"
312 | ]
313 | },
314 | {
315 | "cell_type": "markdown",
316 | "id": "54ab8173",
317 | "metadata": {},
318 | "source": [
319 | "Let's see how these simple steps improve performance of the OCR model."
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": null,
325 | "id": "79fec800-321d-494c-a164-bb5beba433f6",
326 | "metadata": {
327 | "execution": {
328 | "iopub.execute_input": "2022-05-02T08:20:30.150830Z",
329 | "iopub.status.busy": "2022-05-02T08:20:30.150308Z",
330 | "iopub.status.idle": "2022-05-02T08:20:31.987429Z",
331 | "shell.execute_reply": "2022-05-02T08:20:31.986620Z",
332 | "shell.execute_reply.started": "2022-05-02T08:20:30.150775Z"
333 | }
334 | },
335 | "outputs": [],
336 | "source": [
337 | "text = pytesseract.image_to_string(grey)"
338 | ]
339 | },
340 | {
341 | "cell_type": "code",
342 | "execution_count": null,
343 | "id": "4478cad9-3ac3-475e-9c13-c9cc6d9349dc",
344 | "metadata": {
345 | "execution": {
346 | "iopub.execute_input": "2022-05-02T08:20:36.698236Z",
347 | "iopub.status.busy": "2022-05-02T08:20:36.697584Z",
348 | "iopub.status.idle": "2022-05-02T08:20:36.705029Z",
349 | "shell.execute_reply": "2022-05-02T08:20:36.703979Z",
350 | "shell.execute_reply.started": "2022-05-02T08:20:36.698177Z"
351 | }
352 | },
353 | "outputs": [],
354 | "source": [
355 | "print(text)"
356 | ]
357 | },
358 | {
359 | "cell_type": "markdown",
360 | "id": "20274cbe-87f8-4087-9fd1-ebb7d0ad8840",
361 | "metadata": {},
362 | "source": [
363 | "__Thresholding__"
364 | ]
365 | },
366 | {
367 | "cell_type": "markdown",
368 | "id": "339132b0",
369 | "metadata": {},
370 | "source": [
371 | "Way back when we worked more with OpenCV, we learned that we could also *binarize* images using thresholding to make everything black or white (like when we created *masks*)."
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": null,
377 | "id": "ce83891b-8214-4e54-9a73-6243f1f73465",
378 | "metadata": {
379 | "execution": {
380 | "iopub.execute_input": "2022-05-02T08:20:53.550897Z",
381 | "iopub.status.busy": "2022-05-02T08:20:53.550370Z",
382 | "iopub.status.idle": "2022-05-02T08:20:53.563566Z",
383 | "shell.execute_reply": "2022-05-02T08:20:53.561336Z",
384 | "shell.execute_reply.started": "2022-05-02T08:20:53.550841Z"
385 | }
386 | },
387 | "outputs": [],
388 | "source": [
389 | "# threshold\n",
390 | "(T, thres) = cv2.threshold(grey, 110, 255, cv2.THRESH_BINARY)"
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": null,
396 | "id": "e9e676de-af70-483e-9188-b7984b538755",
397 | "metadata": {
398 | "execution": {
399 | "iopub.execute_input": "2022-05-02T08:21:00.687166Z",
400 | "iopub.status.busy": "2022-05-02T08:21:00.686655Z",
401 | "iopub.status.idle": "2022-05-02T08:21:01.222623Z",
402 | "shell.execute_reply": "2022-05-02T08:21:01.221271Z",
403 | "shell.execute_reply.started": "2022-05-02T08:21:00.687111Z"
404 | }
405 | },
406 | "outputs": [],
407 | "source": [
408 | "show_channel(thres)"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": null,
414 | "id": "a77ccba6-8a8f-4680-8869-98887b105a28",
415 | "metadata": {
416 | "execution": {
417 | "iopub.execute_input": "2022-05-02T08:24:58.775628Z",
418 | "iopub.status.busy": "2022-05-02T08:24:58.775058Z",
419 | "iopub.status.idle": "2022-05-02T08:24:59.690604Z",
420 | "shell.execute_reply": "2022-05-02T08:24:59.688867Z",
421 | "shell.execute_reply.started": "2022-05-02T08:24:58.775561Z"
422 | },
423 | "tags": []
424 | },
425 | "outputs": [],
426 | "source": [
427 | "text = pytesseract.image_to_string(thres)"
428 | ]
429 | },
430 | {
431 | "cell_type": "code",
432 | "execution_count": null,
433 | "id": "e0b3d1d9-5369-4a9a-8bae-43bcae4f7aa5",
434 | "metadata": {
435 | "execution": {
436 | "iopub.execute_input": "2022-05-02T08:25:00.533583Z",
437 | "iopub.status.busy": "2022-05-02T08:25:00.533131Z",
438 | "iopub.status.idle": "2022-05-02T08:25:00.540113Z",
439 | "shell.execute_reply": "2022-05-02T08:25:00.539063Z",
440 | "shell.execute_reply.started": "2022-05-02T08:25:00.533530Z"
441 | },
442 | "tags": []
443 | },
444 | "outputs": [],
445 | "source": [
446 | "print(text)"
447 | ]
448 | },
449 | {
450 | "cell_type": "markdown",
451 | "id": "8f805614-fa67-41ed-ba19-95614e259523",
452 | "metadata": {
453 | "tags": []
454 | },
455 | "source": [
456 | "## Quick and cheap spell checking"
457 | ]
458 | },
459 | {
460 | "cell_type": "markdown",
461 | "id": "a2b68dc3",
462 | "metadata": {},
463 | "source": [
464 | "One of the main issues we seem to see is single-character errors which give misspelled words. So, let's see how far we can get by doing some simple spell checking and correction with the ```autocorrect``` library:"
465 | ]
466 | },
467 | {
468 | "cell_type": "markdown",
469 | "id": "90c013d9-ceee-4ffc-bcbe-944876444990",
470 | "metadata": {},
471 | "source": [
472 | "__Initialize speller__"
473 | ]
474 | },
475 | {
476 | "cell_type": "code",
477 | "execution_count": null,
478 | "id": "39a7b07a-4325-44d0-9582-454122b808e6",
479 | "metadata": {
480 | "execution": {
481 | "iopub.execute_input": "2022-05-02T08:26:24.740086Z",
482 | "iopub.status.busy": "2022-05-02T08:26:24.739578Z",
483 | "iopub.status.idle": "2022-05-02T08:26:24.819909Z",
484 | "shell.execute_reply": "2022-05-02T08:26:24.818802Z",
485 | "shell.execute_reply.started": "2022-05-02T08:26:24.740030Z"
486 | },
487 | "tags": []
488 | },
489 | "outputs": [],
490 | "source": [
491 | "spell = Speller(only_replacements=True)"
492 | ]
493 | },
494 | {
495 | "cell_type": "code",
496 | "execution_count": null,
497 | "id": "6b42f9cb",
498 | "metadata": {},
499 | "outputs": [],
500 | "source": [
501 | "cleaned = clean_string(text)"
502 | ]
503 | },
504 | {
505 | "cell_type": "code",
506 | "execution_count": null,
507 | "id": "5bb26f03-ddff-4359-902c-2f6e9d0c818e",
508 | "metadata": {
509 | "execution": {
510 | "iopub.execute_input": "2022-05-02T08:26:41.998158Z",
511 | "iopub.status.busy": "2022-05-02T08:26:41.997503Z",
512 | "iopub.status.idle": "2022-05-02T08:26:42.042238Z",
513 | "shell.execute_reply": "2022-05-02T08:26:42.041154Z",
514 | "shell.execute_reply.started": "2022-05-02T08:26:41.998100Z"
515 | },
516 | "tags": []
517 | },
518 | "outputs": [],
519 | "source": [
520 | "spell(cleaned.lower())"
521 | ]
522 | },
523 | {
524 | "cell_type": "markdown",
525 | "id": "d3490f64",
526 | "metadata": {},
527 | "source": [
528 | "## Tasks\n",
529 | "\n",
530 | "__Spell checking with generative LLMs__\n",
531 | "- Head over to HuggingChat and check out some of the newest LLMs perform on this task. Test all of the available models and ask the following questions\n",
532 | "\n",
533 | "__Some test images__\n",
534 | "\n",
535 | "- I've attached some links to culturally significant images below. How well does the OCR pipeline work on these images? What do you need to do to get it to work? What does this suggest about the challenges or limitations of OCR?\n",
536 | " - [Image 1](https://www.techsmith.com/blog/wp-content/uploads/2021/09/Make-a-meme-butterfly.png)\n",
537 | " - [Image 2](https://datasciencedojo.com/wp-content/uploads/52.jpg)\n",
538 | " - [Image 3](https://datasciencedojo.com/wp-content/uploads/36.png)\n",
539 | " - [Image 4 (an actually serious example)](https://upload.wikimedia.org/wikipedia/commons/7/7e/King_James_Bible-Isaiah_26.jpg)\n",
540 | " - [Image 5](https://imgs.xkcd.com/comics/git.png)"
541 | ]
542 | }
543 | ],
544 | "metadata": {
545 | "kernelspec": {
546 | "display_name": "Python 3 ",
547 | "language": "python",
548 | "name": "python3"
549 | },
550 | "language_info": {
551 | "codemirror_mode": {
552 | "name": "ipython",
553 | "version": 3
554 | },
555 | "file_extension": ".py",
556 | "mimetype": "text/x-python",
557 | "name": "python",
558 | "nbconvert_exporter": "python",
559 | "pygments_lexer": "ipython3",
560 | "version": "3.11.9"
561 | }
562 | },
563 | "nbformat": 4,
564 | "nbformat_minor": 5
565 | }
566 |
--------------------------------------------------------------------------------
/nbs/session12_inclass_rdkm.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 1. Grid search - iterating over hyperparameters"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Load libraries"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {
21 | "execution": {
22 | "iopub.execute_input": "2022-05-04T07:30:49.911274Z",
23 | "iopub.status.busy": "2022-05-04T07:30:49.910756Z",
24 | "iopub.status.idle": "2022-05-04T07:30:49.942230Z",
25 | "shell.execute_reply": "2022-05-04T07:30:49.941282Z",
26 | "shell.execute_reply.started": "2022-05-04T07:30:49.911221Z"
27 | },
28 | "tags": []
29 | },
30 | "outputs": [],
31 | "source": [
32 | "# numpy\n",
33 | "import numpy as np\n",
34 | "\n",
35 | "# from scikit learn\n",
36 | "from sklearn import datasets\n",
37 | "from sklearn.preprocessing import LabelBinarizer\n",
38 | "from sklearn.model_selection import train_test_split, GridSearchCV\n",
39 | "from sklearn.metrics import classification_report\n",
40 | "from sklearn.linear_model import LogisticRegression\n",
41 | "from sklearn.pipeline import Pipeline\n",
42 | "\n",
43 | "# TensorFlow\n",
44 | "from tensorflow.keras import Sequential\n",
45 | "from tensorflow.keras.layers import InputLayer, Dense\n",
46 | "from tensorflow.keras.optimizers import SGD, Adam\n",
47 | "\n",
48 | "# scikeras wrapper\n",
49 | "from scikeras.wrappers import KerasClassifier"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {},
55 | "source": [
56 | "__Load and preprocess MNIST data__\n",
57 | "\n",
58 | "See Session 5 notebook for more details."
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {
65 | "execution": {
66 | "iopub.execute_input": "2022-05-04T07:25:46.040606Z",
67 | "iopub.status.busy": "2022-05-04T07:25:46.040082Z",
68 | "iopub.status.idle": "2022-05-04T07:25:46.090351Z",
69 | "shell.execute_reply": "2022-05-04T07:25:46.089364Z",
70 | "shell.execute_reply.started": "2022-05-04T07:25:46.040550Z"
71 | }
72 | },
73 | "outputs": [],
74 | "source": [
75 | "# Loading the Digits dataset\n",
76 | "digits = datasets.load_digits()\n",
77 | "\n",
78 | "# To apply an classifier on this data, we need to flatten the image, to\n",
79 | "# turn the data in a (samples, feature) matrix:\n",
80 | "n_samples = len(digits.images)\n",
81 | "X = digits.images.reshape((n_samples, -1))\n",
82 | "y = digits.target\n",
83 | "\n",
84 | "# Split the dataset in two equal parts\n",
85 | "X_train, X_test, y_train, y_test = train_test_split(X, y, \n",
86 | " test_size=0.2, \n",
87 | " random_state=0)"
88 | ]
89 | },
90 | {
91 | "cell_type": "markdown",
92 | "metadata": {},
93 | "source": [
94 | "__Initialise Gridsearch parameters__"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "metadata": {},
100 | "source": [
101 | "In the following cell, we initialise our Logistic Regression model as ```pipe```.\n",
102 | "\n",
103 | "We the make lists of possible values that can be assigned different values - for this, you should check the documentation over at sklearn."
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": null,
109 | "metadata": {
110 | "execution": {
111 | "iopub.execute_input": "2022-05-04T07:25:47.359743Z",
112 | "iopub.status.busy": "2022-05-04T07:25:47.359038Z",
113 | "iopub.status.idle": "2022-05-04T07:25:47.370202Z",
114 | "shell.execute_reply": "2022-05-04T07:25:47.368726Z",
115 | "shell.execute_reply.started": "2022-05-04T07:25:47.359683Z"
116 | }
117 | },
118 | "outputs": [],
119 | "source": [
120 | "# Initialise the default model, here given the name 'classifier'\n",
121 | "pipe = Pipeline([('classifier' , LogisticRegression())])\n",
122 | "\n",
123 | "\n",
124 | "# Set tunable parameters for grid search\n",
125 | "penalties = ['l1', 'l2'] # different regularization parameters\n",
126 | "C = [1.0, 0.1, 0.01] # different regularization 'strengths'\n",
127 | "solvers = ['liblinear'] # different solvers - check all of the sklearn docs\n",
128 | "\n",
129 | "# Create parameter grid (a Python dictionary)\n",
130 | "parameters = dict(classifier__penalty = penalties, # notice how we use the name 'classifier'\n",
131 | " classifier__C = C,\n",
132 | " classifier__solver = solvers)\n",
133 | "\n",
134 | "# Choose which metrics on which we want to optimise\n",
135 | "scores = ['precision', 'recall', 'f1']"
136 | ]
137 | },
138 | {
139 | "cell_type": "markdown",
140 | "metadata": {},
141 | "source": [
142 | "__Iterate over scoring types__\n",
143 | "\n",
144 | "For example, we first optimise for the parameters which result in the best weighted precision score; next we optimse for weighted recall; and lastly for weighted-F1. \n",
145 | "\n",
146 | "This allows us to inspet the model in a more nuanced way, seeing how different parameters affect performance across different metrics."
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": null,
152 | "metadata": {
153 | "execution": {
154 | "iopub.execute_input": "2022-05-04T07:25:59.086486Z",
155 | "iopub.status.busy": "2022-05-04T07:25:59.085717Z",
156 | "iopub.status.idle": "2022-05-04T07:26:19.609086Z",
157 | "shell.execute_reply": "2022-05-04T07:26:19.607983Z",
158 | "shell.execute_reply.started": "2022-05-04T07:25:59.086419Z"
159 | }
160 | },
161 | "outputs": [],
162 | "source": [
163 | "for score in scores:\n",
164 | " print(f\"# Tuning hyper-parameters for {score}\")\n",
165 | " print()\n",
166 | " \n",
167 | " # Initialise Gridsearch with predefined parameters\n",
168 | " clf = GridSearchCV(pipe, \n",
169 | " parameters, \n",
170 | " scoring= f\"{score}_weighted\",\n",
171 | " cv=10) # use 10-fold cross-validation\n",
172 | " # Fit\n",
173 | " clf.fit(X_train, y_train)\n",
174 | " \n",
175 | " # Print best results on training data\n",
176 | " print(\"Best parameters set found on training data:\")\n",
177 | " # add new lines to separate rows\n",
178 | " print()\n",
179 | " print(clf.best_params_)\n",
180 | " print()\n",
181 | " print(\"Grid scores on training data:\")\n",
182 | " print()\n",
183 | " # get all means\n",
184 | " means = clf.cv_results_['mean_test_score']\n",
185 | " # get all standard deviations\n",
186 | " stds = clf.cv_results_['std_test_score']\n",
187 | " # get parameter combinations\n",
188 | " params = clf.cv_results_['params']\n",
189 | "\n",
190 | " # print means, standard deviations , and parameters for all runs\n",
191 | " i = 0\n",
192 | " for mean, stdev, param in zip(means, stds, params):\n",
193 | " # 2*standard deviation covers 95% of the spread - check out the 68–95–99.7 rule\n",
194 | " print(f\"Run {i}: {round(mean,3)} (SD=±{round(stdev*2, 3)}), using {param}\")\n",
195 | " i += 1\n",
196 | " print()\n",
197 | " \n",
198 | " # Print details classification report\n",
199 | " print(\"Detailed classification report:\")\n",
200 | " print()\n",
201 | " print(\"The model is trained on the full training set.\")\n",
202 | " print(\"The scores are computed on the full test set.\")\n",
203 | " print()\n",
204 | " y_true, y_pred = y_test, clf.predict(X_test)\n",
205 | " print(classification_report(y_true, y_pred))\n",
206 | " print()"
207 | ]
208 | },
209 | {
210 | "cell_type": "markdown",
211 | "metadata": {},
212 | "source": [
213 | "## More complex DL models"
214 | ]
215 | },
216 | {
217 | "cell_type": "markdown",
218 | "metadata": {},
219 | "source": [
220 | "The same general pipeline can be applied to more complex deep learning models, such as those using CNNs or similar.\n",
221 | "\n",
222 | "To do this, we have to define our model using ```tf.keras``` in a slightly different way from what we are used to."
223 | ]
224 | },
225 | {
226 | "cell_type": "markdown",
227 | "metadata": {},
228 | "source": [
229 | "__Inspect image shapes for input layer size__"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": null,
235 | "metadata": {
236 | "execution": {
237 | "iopub.execute_input": "2022-05-04T07:27:16.494482Z",
238 | "iopub.status.busy": "2022-05-04T07:27:16.493941Z",
239 | "iopub.status.idle": "2022-05-04T07:27:16.503693Z",
240 | "shell.execute_reply": "2022-05-04T07:27:16.502499Z",
241 | "shell.execute_reply.started": "2022-05-04T07:27:16.494424Z"
242 | }
243 | },
244 | "outputs": [],
245 | "source": [
246 | "X[0].shape"
247 | ]
248 | },
249 | {
250 | "cell_type": "markdown",
251 | "metadata": {},
252 | "source": [
253 | "__Define model__\n",
254 | "\n",
255 | "We begin by defining our model, almost the same as we normally would. The main difference is that we wrap the whole thign in a function definition, which we've here called ```nn_model```."
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": null,
261 | "metadata": {
262 | "execution": {
263 | "iopub.execute_input": "2022-05-04T07:37:00.550732Z",
264 | "iopub.status.busy": "2022-05-04T07:37:00.550211Z",
265 | "iopub.status.idle": "2022-05-04T07:37:00.561353Z",
266 | "shell.execute_reply": "2022-05-04T07:37:00.560316Z",
267 | "shell.execute_reply.started": "2022-05-04T07:37:00.550677Z"
268 | },
269 | "tags": []
270 | },
271 | "outputs": [],
272 | "source": [
273 | "def nn_model(optimizer='adam'):\n",
274 | " # create a sequential model\n",
275 | " model = Sequential()\n",
276 | " # add input layer of 64 nodes and hidden layer of 32, ReLU activation\n",
277 | " model.add(InputLayer(shape=(64,)))\n",
278 | " model.add(Dense(32, \n",
279 | " activation=\"relu\"))\n",
280 | " # hidden layer of 16 nodes, ReLU activation\n",
281 | " model.add(Dense(16, \n",
282 | " activation=\"relu\"))\n",
283 | " # classificaiton layer, 10 classes with softmaxa ctivation\n",
284 | " model.add(Dense(10, \n",
285 | " activation=\"softmax\")) \n",
286 | " # categorical cross-entropy, optimizer defined in function call\n",
287 | " model.compile(loss=\"categorical_crossentropy\", \n",
288 | " optimizer=optimizer, \n",
289 | " metrics=[\"accuracy\"])\n",
290 | "\n",
291 | " # return the compiled model\n",
292 | " return model"
293 | ]
294 | },
295 | {
296 | "cell_type": "markdown",
297 | "metadata": {},
298 | "source": [
299 | "__Create model for ```sklearn```__\n",
300 | "\n",
301 | "We take the predefined neural network model above and run it through ```KerasClassifier```. \n",
302 | "\n",
303 | "This returns an object that can be used in the ```sklearn``` pipeline, just like a ```LogisticRegression()``` classifier, for example."
304 | ]
305 | },
306 | {
307 | "cell_type": "code",
308 | "execution_count": null,
309 | "metadata": {
310 | "execution": {
311 | "iopub.execute_input": "2022-05-04T07:37:01.152919Z",
312 | "iopub.status.busy": "2022-05-04T07:37:01.152445Z",
313 | "iopub.status.idle": "2022-05-04T07:37:01.159678Z",
314 | "shell.execute_reply": "2022-05-04T07:37:01.158390Z",
315 | "shell.execute_reply.started": "2022-05-04T07:37:01.152851Z"
316 | },
317 | "tags": []
318 | },
319 | "outputs": [],
320 | "source": [
321 | "model = KerasClassifier(model=nn_model, # build the model defined in nn_model\n",
322 | " verbose=0) # set to 1 for verbose output during training"
323 | ]
324 | },
325 | {
326 | "cell_type": "markdown",
327 | "metadata": {},
328 | "source": [
329 | "__Define grid search parameters__"
330 | ]
331 | },
332 | {
333 | "cell_type": "markdown",
334 | "metadata": {},
335 | "source": [
336 | "We now define our grid search parameters in exactly the same manner as we did above."
337 | ]
338 | },
339 | {
340 | "cell_type": "code",
341 | "execution_count": null,
342 | "metadata": {
343 | "execution": {
344 | "iopub.execute_input": "2022-05-04T07:37:02.161634Z",
345 | "iopub.status.busy": "2022-05-04T07:37:02.161161Z",
346 | "iopub.status.idle": "2022-05-04T07:37:02.169925Z",
347 | "shell.execute_reply": "2022-05-04T07:37:02.168589Z",
348 | "shell.execute_reply.started": "2022-05-04T07:37:02.161581Z"
349 | },
350 | "tags": []
351 | },
352 | "outputs": [],
353 | "source": [
354 | "# grid search epochs, batch size and optimizer\n",
355 | "optimizers = ['sgd', 'adam']\n",
356 | "# range of epochs to run\n",
357 | "epochs = [5, 10]\n",
358 | "# variable batch sizes\n",
359 | "batches = [5, 10]\n",
360 | "\n",
361 | "# create search grid\n",
362 | "param_grid = dict(optimizer=optimizers, \n",
363 | " epochs=epochs, \n",
364 | " batch_size=batches)"
365 | ]
366 | },
367 | {
368 | "cell_type": "markdown",
369 | "metadata": {},
370 | "source": [
371 | "__Initialise Gridsearch with model and parameter grid__"
372 | ]
373 | },
374 | {
375 | "cell_type": "markdown",
376 | "metadata": {},
377 | "source": [
378 | "We then run grid search using these parameters and the neural network model that we defined."
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | "execution_count": null,
384 | "metadata": {
385 | "execution": {
386 | "iopub.execute_input": "2022-05-04T07:37:03.207961Z",
387 | "iopub.status.busy": "2022-05-04T07:37:03.207472Z",
388 | "iopub.status.idle": "2022-05-04T07:37:03.215053Z",
389 | "shell.execute_reply": "2022-05-04T07:37:03.213846Z",
390 | "shell.execute_reply.started": "2022-05-04T07:37:03.207907Z"
391 | },
392 | "tags": []
393 | },
394 | "outputs": [],
395 | "source": [
396 | "grid = GridSearchCV(estimator=model, \n",
397 | " param_grid=param_grid, \n",
398 | " n_jobs=-1, # number of CPU cores to use: -1 means use all available\n",
399 | " cv=5, # 5-fold cross validation\n",
400 | " scoring='accuracy',\n",
401 | " verbose=3)"
402 | ]
403 | },
404 | {
405 | "cell_type": "markdown",
406 | "metadata": {},
407 | "source": [
408 | "__Fit to the data and labels__"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": null,
414 | "metadata": {},
415 | "outputs": [],
416 | "source": [
417 | "# create one-hot encodings\n",
418 | "lb = LabelBinarizer()\n",
419 | "y = lb.fit_transform(y)"
420 | ]
421 | },
422 | {
423 | "cell_type": "code",
424 | "execution_count": null,
425 | "metadata": {
426 | "execution": {
427 | "iopub.execute_input": "2022-05-04T07:37:07.974515Z",
428 | "iopub.status.busy": "2022-05-04T07:37:07.973986Z",
429 | "iopub.status.idle": "2022-05-04T07:37:22.069819Z",
430 | "shell.execute_reply": "2022-05-04T07:37:22.068679Z",
431 | "shell.execute_reply.started": "2022-05-04T07:37:07.974460Z"
432 | },
433 | "tags": []
434 | },
435 | "outputs": [],
436 | "source": [
437 | "grid_result = grid.fit(X, y)"
438 | ]
439 | },
440 | {
441 | "cell_type": "markdown",
442 | "metadata": {},
443 | "source": [
444 | "__Print best results__"
445 | ]
446 | },
447 | {
448 | "cell_type": "markdown",
449 | "metadata": {},
450 | "source": [
451 | "The following cell prints the parameters which return the best possible model from all combinations."
452 | ]
453 | },
454 | {
455 | "cell_type": "code",
456 | "execution_count": null,
457 | "metadata": {
458 | "execution": {
459 | "iopub.execute_input": "2022-05-04T07:37:28.170707Z",
460 | "iopub.status.busy": "2022-05-04T07:37:28.170184Z",
461 | "iopub.status.idle": "2022-05-04T07:37:28.178289Z",
462 | "shell.execute_reply": "2022-05-04T07:37:28.177311Z",
463 | "shell.execute_reply.started": "2022-05-04T07:37:28.170652Z"
464 | }
465 | },
466 | "outputs": [],
467 | "source": [
468 | "# print best results, rounding values to 3 decimal places\n",
469 | "print(f\"Best run: {round(grid_result.best_score_,3)} using {grid_result.best_params_}\")"
470 | ]
471 | },
472 | {
473 | "cell_type": "markdown",
474 | "metadata": {},
475 | "source": [
476 | "__Show all runs__"
477 | ]
478 | },
479 | {
480 | "cell_type": "markdown",
481 | "metadata": {},
482 | "source": [
483 | "We can also inspect all runs, in order to see if there are general tendencies or patterns."
484 | ]
485 | },
486 | {
487 | "cell_type": "code",
488 | "execution_count": null,
489 | "metadata": {
490 | "execution": {
491 | "iopub.execute_input": "2022-05-04T07:37:30.648754Z",
492 | "iopub.status.busy": "2022-05-04T07:37:30.648236Z",
493 | "iopub.status.idle": "2022-05-04T07:37:30.659933Z",
494 | "shell.execute_reply": "2022-05-04T07:37:30.658840Z",
495 | "shell.execute_reply.started": "2022-05-04T07:37:30.648698Z"
496 | }
497 | },
498 | "outputs": [],
499 | "source": [
500 | "# get all means\n",
501 | "means = grid_result.cv_results_['mean_test_score']\n",
502 | "# get all standard deviations\n",
503 | "stds = grid_result.cv_results_['std_test_score']\n",
504 | "# get parameter combinations\n",
505 | "params = grid_result.cv_results_['params']\n",
506 | "\n",
507 | "# print means, standard deviations, and parameters for all runs\n",
508 | "i = 0\n",
509 | "for mean, stdev, param in zip(means, stds, params):\n",
510 | " print(f\"Run {i}: {round(mean,3)} (SD=±{round(2*stdev, 3)}), using {param}\")\n",
511 | " i += 1"
512 | ]
513 | },
514 | {
515 | "cell_type": "markdown",
516 | "metadata": {},
517 | "source": [
518 | "# 2. Visualise feature maps"
519 | ]
520 | },
521 | {
522 | "cell_type": "markdown",
523 | "metadata": {},
524 | "source": [
525 | "A useful task in the context of visual anlaytics and Convolutional Neural Networks is the idea of visualising feature maps. In other words, can we see what a model is actually 'looking' at?\n",
526 | "\n",
527 | "One way to do this is to use an activation heatmap, to show which areas draw the most 'focus' from a model when classifying."
528 | ]
529 | },
530 | {
531 | "cell_type": "code",
532 | "execution_count": null,
533 | "metadata": {
534 | "execution": {
535 | "iopub.execute_input": "2022-05-04T07:38:23.367733Z",
536 | "iopub.status.busy": "2022-05-04T07:38:23.367212Z",
537 | "iopub.status.idle": "2022-05-04T07:38:23.866554Z",
538 | "shell.execute_reply": "2022-05-04T07:38:23.865924Z",
539 | "shell.execute_reply.started": "2022-05-04T07:38:23.367677Z"
540 | }
541 | },
542 | "outputs": [],
543 | "source": [
544 | "# path tools\n",
545 | "import sys,os\n",
546 | "sys.path.append(\"..\")\n",
547 | "# neural networks with numpy\n",
548 | "from utils.imutils import jimshow as show\n",
549 | "\n",
550 | "# image processing\n",
551 | "import numpy as np\n",
552 | "import matplotlib.pyplot as plt\n",
553 | "import cv2\n",
554 | "\n",
555 | "# tensorflow\n",
556 | "import tensorflow as tf\n",
557 | "import tensorflow.keras.backend as K\n",
558 | "from tensorflow.keras.preprocessing import image\n",
559 | "from tensorflow.keras.applications.vgg16 import (preprocess_input,\n",
560 | " decode_predictions,\n",
561 | " VGG16)"
562 | ]
563 | },
564 | {
565 | "cell_type": "markdown",
566 | "metadata": {},
567 | "source": [
568 | "__Import VGG16 model__"
569 | ]
570 | },
571 | {
572 | "cell_type": "markdown",
573 | "metadata": {},
574 | "source": [
575 | "We're choosing here to work with VGG16 but the same principle can be applied to other pretrained models, or your own models. The only thing that will need to be changed is the name of the final convolutional layer."
576 | ]
577 | },
578 | {
579 | "cell_type": "code",
580 | "execution_count": null,
581 | "metadata": {
582 | "execution": {
583 | "iopub.execute_input": "2022-05-04T07:38:27.500519Z",
584 | "iopub.status.busy": "2022-05-04T07:38:27.500001Z",
585 | "iopub.status.idle": "2022-05-04T07:38:38.604692Z",
586 | "shell.execute_reply": "2022-05-04T07:38:38.604022Z",
587 | "shell.execute_reply.started": "2022-05-04T07:38:27.500463Z"
588 | }
589 | },
590 | "outputs": [],
591 | "source": [
592 | "model = VGG16()\n",
593 | "\n",
594 | "model.summary()"
595 | ]
596 | },
597 | {
598 | "cell_type": "markdown",
599 | "metadata": {},
600 | "source": [
601 | "__Load image__"
602 | ]
603 | },
604 | {
605 | "cell_type": "markdown",
606 | "metadata": {},
607 | "source": [
608 | "We can choose to load an image - look back to the last few weeks to check up on how to load images with ```tf.keras```. Remember that your image needs to be the same dimensions as the pretrained model!"
609 | ]
610 | },
611 | {
612 | "cell_type": "code",
613 | "execution_count": null,
614 | "metadata": {
615 | "execution": {
616 | "iopub.execute_input": "2022-05-04T07:40:04.278636Z",
617 | "iopub.status.busy": "2022-05-04T07:40:04.277971Z",
618 | "iopub.status.idle": "2022-05-04T07:40:04.317805Z",
619 | "shell.execute_reply": "2022-05-04T07:40:04.317186Z",
620 | "shell.execute_reply.started": "2022-05-04T07:40:04.278577Z"
621 | },
622 | "tags": []
623 | },
624 | "outputs": [],
625 | "source": [
626 | "# file path - hard coded paths are bad practice!\n",
627 | "filepath = os.path.join(\"..\", \"..\", \"CDS-VIS\", \"img\", \"florence.jpg\")\n",
628 | "\n",
629 | "# load image using tf.keras\n",
630 | "img = image.load_img(filepath, target_size=(224, 224))\n",
631 | "\n",
632 | "# display image\n",
633 | "img"
634 | ]
635 | },
636 | {
637 | "cell_type": "markdown",
638 | "metadata": {},
639 | "source": [
640 | "__Preprocess and predict__\n",
641 | "\n",
642 | "See Session 10 notebook for more information."
643 | ]
644 | },
645 | {
646 | "cell_type": "code",
647 | "execution_count": null,
648 | "metadata": {
649 | "execution": {
650 | "iopub.execute_input": "2022-05-04T07:40:08.824616Z",
651 | "iopub.status.busy": "2022-05-04T07:40:08.824094Z",
652 | "iopub.status.idle": "2022-05-04T07:40:09.541205Z",
653 | "shell.execute_reply": "2022-05-04T07:40:09.540198Z",
654 | "shell.execute_reply.started": "2022-05-04T07:40:08.824559Z"
655 | }
656 | },
657 | "outputs": [],
658 | "source": [
659 | "# convert image to array\n",
660 | "x = image.img_to_array(img)\n",
661 | "# convert to rank 4 tensor\n",
662 | "x = np.expand_dims(x, axis=0)\n",
663 | "# preprocess to be in line with VGG16 data \n",
664 | "x = preprocess_input(x)\n",
665 | "\n",
666 | "# make predictions\n",
667 | "preds = model.predict(x)\n",
668 | "decode_predictions(preds)"
669 | ]
670 | },
671 | {
672 | "cell_type": "markdown",
673 | "metadata": {},
674 | "source": [
675 | "__Create activation heatmap for final layer__\n",
676 | "\n",
677 | "In order to visualise the layers, we use something called **[Gradient-weighted Class Activation Mapping (Grad-CAM)](https://arxiv.org/pdf/1610.02391.pdf)**. \n",
678 | "\n",
679 | "Essentially, we make use of the gradients in the final layer to highlight which regions are particularly informative when predicting a given class.\n",
680 | "\n",
681 | "The code here is a little complicated to follow - you can find more info [here](https://www.pyimagesearch.com/2020/03/09/grad-cam-visualize-class-activation-maps-with-keras-tensorflow-and-deep-learning/) and [here](https://www.tensorflow.org/guide/autodiff).\n",
682 | "\n",
683 | "The code below has been modified from Keras documentation, found [here](https://keras.io/examples/vision/grad_cam/)."
684 | ]
685 | },
686 | {
687 | "cell_type": "code",
688 | "execution_count": null,
689 | "metadata": {
690 | "execution": {
691 | "iopub.execute_input": "2022-05-04T07:40:16.363836Z",
692 | "iopub.status.busy": "2022-05-04T07:40:16.363321Z",
693 | "iopub.status.idle": "2022-05-04T07:40:17.061688Z",
694 | "shell.execute_reply": "2022-05-04T07:40:17.060939Z",
695 | "shell.execute_reply.started": "2022-05-04T07:40:16.363781Z"
696 | }
697 | },
698 | "outputs": [],
699 | "source": [
700 | "with tf.GradientTape() as tape:\n",
701 | " # make sure the name here corresponds to the final conv layer in your network\n",
702 | " last_conv_layer = model.get_layer('block5_conv3')\n",
703 | " \n",
704 | " # First, we create a model that maps the input image to the activations\n",
705 | " # of the last conv layer as well as the output predictions \n",
706 | " iterate = tf.keras.models.Model(model.inputs, \n",
707 | " [model.output, last_conv_layer.output])\n",
708 | " \n",
709 | " # Then, we compute the gradient of the top predicted class for our input image\n",
710 | " # with respect to the activations of the last conv layer\n",
711 | " model_out, last_conv_layer = iterate(x)\n",
712 | " class_out = model_out[:, np.argmax(model_out[0])]\n",
713 | " \n",
714 | " # This is the gradient of the output neuron of the last conv layer\n",
715 | " grads = tape.gradient(class_out, \n",
716 | " last_conv_layer)\n",
717 | " # Vector of mean intensity of the gradient over a specific feature map channel\n",
718 | " pooled_grads = K.mean(grads, \n",
719 | " axis=(0, 1, 2))"
720 | ]
721 | },
722 | {
723 | "cell_type": "code",
724 | "execution_count": null,
725 | "metadata": {
726 | "execution": {
727 | "iopub.execute_input": "2022-05-04T07:40:23.802519Z",
728 | "iopub.status.busy": "2022-05-04T07:40:23.801820Z",
729 | "iopub.status.idle": "2022-05-04T07:40:23.809881Z",
730 | "shell.execute_reply": "2022-05-04T07:40:23.808822Z",
731 | "shell.execute_reply.started": "2022-05-04T07:40:23.802462Z"
732 | }
733 | },
734 | "outputs": [],
735 | "source": [
736 | "# We multiply each channel in the feature map array\n",
737 | "# by \"how important this channel is\" with regard to the top predicted class\n",
738 | "# then sum all the channels to obtain the heatmap class activation\n",
739 | "heatmap = tf.reduce_mean(tf.multiply(pooled_grads, last_conv_layer), \n",
740 | " axis=-1)"
741 | ]
742 | },
743 | {
744 | "cell_type": "markdown",
745 | "metadata": {},
746 | "source": [
747 | "__Simple heatmap using matplotlib__"
748 | ]
749 | },
750 | {
751 | "cell_type": "markdown",
752 | "metadata": {},
753 | "source": [
754 | "We can then create a very simple heatmap showing where 'attention' is focused in the final layer when predicting our input image."
755 | ]
756 | },
757 | {
758 | "cell_type": "code",
759 | "execution_count": null,
760 | "metadata": {
761 | "execution": {
762 | "iopub.execute_input": "2022-05-04T07:40:27.719653Z",
763 | "iopub.status.busy": "2022-05-04T07:40:27.719128Z",
764 | "iopub.status.idle": "2022-05-04T07:40:27.847575Z",
765 | "shell.execute_reply": "2022-05-04T07:40:27.846917Z",
766 | "shell.execute_reply.started": "2022-05-04T07:40:27.719597Z"
767 | }
768 | },
769 | "outputs": [],
770 | "source": [
771 | "heatmap = np.maximum(heatmap, 0)\n",
772 | "heatmap /= np.max(heatmap)\n",
773 | "heatmap = heatmap.reshape((14, 14))\n",
774 | "plt.matshow(heatmap)\n",
775 | "plt.show()"
776 | ]
777 | },
778 | {
779 | "cell_type": "markdown",
780 | "metadata": {},
781 | "source": [
782 | "__Better visualisation with ```OpenCV```__"
783 | ]
784 | },
785 | {
786 | "cell_type": "code",
787 | "execution_count": null,
788 | "metadata": {
789 | "execution": {
790 | "iopub.execute_input": "2022-05-04T07:40:39.659719Z",
791 | "iopub.status.busy": "2022-05-04T07:40:39.659040Z",
792 | "iopub.status.idle": "2022-05-04T07:40:39.701801Z",
793 | "shell.execute_reply": "2022-05-04T07:40:39.701101Z",
794 | "shell.execute_reply.started": "2022-05-04T07:40:39.659661Z"
795 | },
796 | "tags": []
797 | },
798 | "outputs": [],
799 | "source": [
800 | "# Load the original image\n",
801 | "img = cv2.imread(filepath)\n",
802 | "\n",
803 | "# heatmap should be semi transparent\n",
804 | "intensity = 0.5\n",
805 | "\n",
806 | "# resize the heatmap to be the original dimensions of the input \n",
807 | "heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))\n",
808 | "\n",
809 | "# https://docs.opencv.org/master/d3/d50/group__imgproc__colormap.html\n",
810 | "heatmap = cv2.applyColorMap(np.uint8(255*heatmap), cv2.COLORMAP_JET)\n",
811 | "\n",
812 | "# multiply heatmap by intensity and 'add' this on top of the original image\n",
813 | "superimposed = (heatmap * intensity) + img"
814 | ]
815 | },
816 | {
817 | "cell_type": "markdown",
818 | "metadata": {},
819 | "source": [
820 | "__Write file to output__"
821 | ]
822 | },
823 | {
824 | "cell_type": "markdown",
825 | "metadata": {},
826 | "source": [
827 | "I've created a new folder called maps_out where I save the image."
828 | ]
829 | },
830 | {
831 | "cell_type": "code",
832 | "execution_count": null,
833 | "metadata": {
834 | "execution": {
835 | "iopub.execute_input": "2022-05-04T07:40:51.362071Z",
836 | "iopub.status.busy": "2022-05-04T07:40:51.361535Z",
837 | "iopub.status.idle": "2022-05-04T07:40:51.389279Z",
838 | "shell.execute_reply": "2022-05-04T07:40:51.388403Z",
839 | "shell.execute_reply.started": "2022-05-04T07:40:51.362014Z"
840 | }
841 | },
842 | "outputs": [],
843 | "source": [
844 | "cv2.imwrite(\"florence.jpg\", superimposed)"
845 | ]
846 | }
847 | ],
848 | "metadata": {
849 | "kernelspec": {
850 | "display_name": "Python 3 ",
851 | "language": "python",
852 | "name": "python3"
853 | },
854 | "language_info": {
855 | "codemirror_mode": {
856 | "name": "ipython",
857 | "version": 3
858 | },
859 | "file_extension": ".py",
860 | "mimetype": "text/x-python",
861 | "name": "python",
862 | "nbconvert_exporter": "python",
863 | "pygments_lexer": "ipython3",
864 | "version": "3.11.9"
865 | }
866 | },
867 | "nbformat": 4,
868 | "nbformat_minor": 4
869 | }
870 |
--------------------------------------------------------------------------------