├── .gitignore
├── images
    └── ministry_of_predictors.png
├── kaggle_arc
    ├── .project-root
    ├── README.md
    ├── base
    │   ├── __init__.py
    │   ├── field.py
    │   ├── iodata.py
    │   ├── transformers.py
    │   └── utils.py
    ├── constants.py
    ├── operations
    │   ├── basic.py
    │   ├── field2point.py
    │   ├── resizing.py
    │   ├── reversible.py
    │   └── subpatterns.py
    ├── predictors
    │   ├── __init__.py
    │   ├── availability_mixins.py
    │   ├── basic.py
    │   ├── boosting_tree.py
    │   ├── color_counting.py
    │   ├── complex.py
    │   ├── connector.py
    │   ├── convolution.py
    │   ├── decision_tree.py
    │   ├── draft_predictors
    │   │   ├── cam_predictor.py
    │   │   ├── cf_combinator.py
    │   │   ├── cf_filler.py
    │   │   ├── cf_selector.py
    │   │   └── cf_sorter.py
    │   ├── field2point.py
    │   ├── graph_boosting_tree.py
    │   ├── shapes.py
    │   └── subpattern.py
    ├── scripts
    │   ├── 0_idpredictor_on_test_script.py
    │   ├── 1_complexpredictor_on_test_script.py
    │   ├── 2_complexpredictor_coloring.py
    │   ├── convert2ipynb.py
    │   └── predictor_validator.py
    └── utils.py
├── pyproject.toml
└── readme.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | input
  2 | temp
  3 | working
  4 | 
  5 | 
  6 | **/__pycache__
  7 | **/.ipynb_checkpoints
  8 | 
  9 | # Byte-compiled / optimized / DLL files
 10 | __pycache__/
 11 | *.py[cod]
 12 | *$py.class
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | share/python-wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | cover/
 61 | 
 62 | # Translations
 63 | *.mo
 64 | *.pot
 65 | 
 66 | # Django stuff:
 67 | *.log
 68 | local_settings.py
 69 | db.sqlite3
 70 | db.sqlite3-journal
 71 | 
 72 | # Flask stuff:
 73 | instance/
 74 | .webassets-cache
 75 | 
 76 | # Scrapy stuff:
 77 | .scrapy
 78 | 
 79 | # Sphinx documentation
 80 | docs/_build/
 81 | 
 82 | # PyBuilder
 83 | .pybuilder/
 84 | target/
 85 | 
 86 | # Jupyter Notebook
 87 | .ipynb_checkpoints
 88 | 
 89 | # IPython
 90 | profile_default/
 91 | ipython_config.py
 92 | 
 93 | # pyenv
 94 | #   For a library or package, you might want to ignore these files since the code is
 95 | #   intended to run in multiple environments; otherwise, check them in:
 96 | # .python-version
 97 | 
 98 | # pipenv
 99 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
101 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
102 | #   install all needed dependencies.
103 | #Pipfile.lock
104 | 
105 | # poetry
106 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
107 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
108 | #   commonly ignored for libraries.
109 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
110 | #poetry.lock
111 | 
112 | # pdm
113 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
114 | #pdm.lock
115 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
116 | #   in version control.
117 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
118 | .pdm.toml
119 | .pdm-python
120 | .pdm-build/
121 | 
122 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
123 | __pypackages__/
124 | 
125 | # Celery stuff
126 | celerybeat-schedule
127 | celerybeat.pid
128 | 
129 | # SageMath parsed files
130 | *.sage.py
131 | 
132 | # Environments
133 | .env
134 | .venv
135 | env/
136 | venv/
137 | ENV/
138 | env.bak/
139 | venv.bak/
140 | 
141 | # Spyder project settings
142 | .spyderproject
143 | .spyproject
144 | 
145 | # Rope project settings
146 | .ropeproject
147 | 
148 | # mkdocs documentation
149 | /site
150 | 
151 | # mypy
152 | .mypy_cache/
153 | .dmypy.json
154 | dmypy.json
155 | 
156 | # Pyre type checker
157 | .pyre/
158 | 
159 | # pytype static type analyzer
160 | .pytype/
161 | 
162 | # Cython debug symbols
163 | cython_debug/
164 | 
165 | # PyCharm
166 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
167 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
168 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
169 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
170 | #.idea/
171 | 
172 | **/.DS_Store


--------------------------------------------------------------------------------
/images/ministry_of_predictors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/latticetower/kaggle-arc/e29bb298e68245048ffcc7aaa392d858e061adf2/images/ministry_of_predictors.png


--------------------------------------------------------------------------------
/kaggle_arc/.project-root:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/latticetower/kaggle-arc/e29bb298e68245048ffcc7aaa392d858e061adf2/kaggle_arc/.project-root


--------------------------------------------------------------------------------
/kaggle_arc/README.md:
--------------------------------------------------------------------------------
 1 | # kaggle-arc
 2 | https://www.kaggle.com/c/abstraction-and-reasoning-challenge
 3 | 
 4 | # Usage:
 5 | 
 6 | Direct script calls look similar to this:
 7 | ```
 8 | cd scripts
 9 | 
10 | PYTHONPATH=$(pwd)/..:$PYTHONPATH python 1_complexpredictor_on_test_script.py 1>out.txt
11 | ```
12 | 
13 | To convert script to single ipynb notebook, call command from the root repo folder:
14 | ```
15 | python scripts/convert2ipynb.py scripts/runner.py temp/sample1.ipynb
16 | ```
17 | first parameter is a path to main script, second is a path to save notebook.
18 | 
19 | # Evaluation
20 | 
21 | To evaluate predictors on train and evaluation datasets, the following command was used
22 | ```
23 | PYTHONPATH=$(pwd)/..:$PYTHONPATH python predictor_validator.py Id Zeros ColorCounting Repeating Fractal
24 | ```
25 | Table format: correct predictions on train / correct predictions on test / count of samples for which predictor is available
26 | Class name | Train | Evaluation
27 | -----------|-------|-----------
28 | IdPredictor | 4 / 0 / 400 | 2 / 0 / 400
29 | ZerosPredictor | 0 / 0 / 400 | 0 / 0 / 400
30 | ColorCountingPredictor | 16 / 5 / 262 | 2 / 1 / 270
31 | RepeatingPredictor | 0 / 0 / 17 | 0 / 0 / 23
32 | FractalPredictor | 1 / 1 / 17 | 2 / 1 / 23
33 | ResizingPredictor | 2 / 2 / 17 | 1 / 1 / 23
34 | ConstantShaper | 4 / 4 / 15 | 3 / 3 / 10
35 | BoostingTreePredictor | 136 / 24 / 262 | 134 / 8 / 270
36 | No augmentation, with painter | 217 / 31 / 262 | 188 / 8 / 270
37 | with square features  | 214 / 31 / 262 | 191 / 9 / 270
38 | with new features | 161 / 32 / 262 | 135 / 7 / 270
39 | Augmentation + repainter: | 79 / 8 / 262 | 43 / 2 / 270
40 | Augmentation, w/o repainter: | 83 / 7 / 262 | 37 / 1 / 270
41 | BoostingTreePredictor2 | 2 / 1 / 31 | 3 / 3 / 27
42 | BoostingTreePredictor3 | 218 / 31 / 262 | 198 / 12 / 270
43 | SubpatternMatcherPredictor | 2 / 2 / 10 | 0 / 0 / 2
44 | SimpleSummarizePredictor | 1 / 1 / 6 | 0 / 2 / 2
45 | GraphBoostingTreePredictor | 17 / 13 / 43 | 9 / 6 / 33
46 | GraphBoostingTreePredictor3 | 155 / 28 / 262 | 143 / 6 / 270
47 | PointConnectorPredictor | 24 / 3 / 90 | 11 / 2 / 68
48 | ConvolutionPredictor | 11 / 1 / 262 | 3 / 0 / 270


--------------------------------------------------------------------------------
/kaggle_arc/base/__init__.py:
--------------------------------------------------------------------------------
1 | import rootutils
2 | 
3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
4 | 
5 | from base.field import Field
6 | from base.iodata import IOData, Sample


--------------------------------------------------------------------------------
/kaggle_arc/base/field.py:
--------------------------------------------------------------------------------
  1 | import rootutils
  2 | 
  3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  4 | 
  5 | import os
  6 | import numpy as np
  7 | import matplotlib
  8 | 
  9 | # try:
 10 | #     if not matplotlib.is_interactive():
 11 | #         matplotlib.use("svg")
 12 | # except:
 13 | #     pass
 14 | 
 15 | import matplotlib.pyplot as plt
 16 | import seaborn as sns
 17 | 
 18 | import torch
 19 | from itertools import product
 20 | from collections import OrderedDict
 21 | import networkx as nx
 22 | 
 23 | from typing import NamedTuple
 24 | 
 25 | from constants import *
 26 | from base.utils import *
 27 | 
 28 | 
 29 | def binary_dice(a, b):
 30 |     s = np.sum(a) + np.sum(b)
 31 |     if s != 0:
 32 |         return 2 * np.sum(a * b) / s
 33 |     return None
 34 | 
 35 | 
 36 | def multiclass_dice(a, b, c):
 37 |     return binary_dice(1 * (a == c), 1 * (b == c))
 38 | 
 39 | 
 40 | def build_colormap(i, o=None, bg=0):
 41 |     colormap = {bg: 0}
 42 |     current_id = 1
 43 |     for line in i:
 44 |         for x in line:
 45 |             if x in colormap:
 46 |                 continue
 47 |             colormap[x] = current_id
 48 |             current_id += 1
 49 |     if o is not None:
 50 |         for line in o:
 51 |             for x in line:
 52 |                 if x in colormap:
 53 |                     continue
 54 |                 colormap[x] = current_id
 55 |                 current_id += 1
 56 |     return colormap
 57 | 
 58 | 
 59 | class Field:
 60 |     """ Represents the matrix with values [0-9], which corresponds to single input or output of ARC puzzles. Stores additional properties and colorization schemes, method for drawing, etc.
 61 |     """
 62 |     __slots__ = ["data", "multiplier", "colormap", "prop_names"]
 63 | 
 64 |     def __init__(self, data):
 65 |         if isinstance(data, list):
 66 |             self.data = np.asarray(
 67 |                 [[(x if x >= 0 else 10 - x) for x in line] for line in data],
 68 |                 dtype=np.uint8,
 69 |             )
 70 |         else:
 71 |             self.data = data.copy()
 72 |         self.multiplier = 0.5
 73 |         self.colormap = None
 74 |         self.prop_names = (
 75 |             "h w xmin ymin xmax ymax xmean ymean is_convex holes contour_size interior_size".split()
 76 |             + "is_rectangular is_square".split()
 77 |             + [f"flip_{i}" for i in range(10)]
 78 |             + [f"flip_conv_{i}" for i in range(10)]
 79 |         )
 80 | 
 81 |     def get(self, i, j, default_color=0):
 82 |         if i < 0 or j < 0:
 83 |             return default_color
 84 |         if i >= self.data.shape[0] or j >= self.data.shape[1]:
 85 |             return default_color
 86 |         return self.data[i, j]
 87 | 
 88 |     @property
 89 |     def processed(self):
 90 |         if self.colormap is None:
 91 |             self.colormap = build_colormap(self.data, o=None, bg=0)
 92 |         new_data = [[self.colormap.get(x, x) for x in line] for line in self.data]
 93 |         return Field(new_data)
 94 | 
 95 |     def reconstruct(self, field):
 96 |         if self.colormap is None:
 97 |             return field
 98 |         rev = {v: k for k, v in self.colormap.items()}
 99 |         rev = {k: v for k, v in rev.items() if k != v}
100 |         if len(rev) < 1:
101 |             return field
102 |         new_data = [[rev.get(x, x) for x in line] for line in field.data]
103 |         return Field(new_data)
104 | 
105 |     @property
106 |     def to_array(self):
107 |         return [[int(x) for x in line] for line in self.data]
108 | 
109 |     @property
110 |     def height(self):
111 |         return self.data.shape[0]
112 | 
113 |     @property
114 |     def width(self):
115 |         return self.data.shape[1]
116 | 
117 |     @property
118 |     def shape(self):
119 |         return self.data.shape
120 | 
121 |     @property
122 |     def dtype(self):
123 |         return self.data.dtype
124 | 
125 |     @property
126 |     def data_splitted(self):
127 |         return np.stack([1.0 * (self.data == i) for i in range(10)])
128 | 
129 |     def t(self):
130 |         return torch.tensor(self.data)
131 | 
132 |     def t_splitted(self):
133 |         return torch.tensor(self.data_splitted)
134 | 
135 |     @staticmethod
136 |     def from_splitted(data):
137 |         return Field(np.argmax(data, 0))
138 | 
139 |     def __eq__(self, b):
140 |         if not isinstance(b, Field):
141 |             return self.data == b  # this does all conversion magic
142 |         if not (self.height == b.height and self.width == b.width):
143 |             return False
144 |         return np.all(self.data == b.data)
145 | 
146 |     def __ne__(self, b):
147 |         # if not isinstance(b, Field):
148 |         #    return self.data != b
149 |         return ~(self == b)
150 | 
151 |     def __repr__(self):
152 |         return repr(self.data)
153 | 
154 |     def show(self, ax=None, label=None):
155 |         if ax is None:
156 |             plt.figure(
157 |                 figsize=(self.width * self.multiplier, self.height * self.multiplier)
158 |             )
159 |             ax = plt.gca()
160 |         ax.imshow(self.data, cmap=COLORMAP, norm=NORM)
161 |         for edge, spine in ax.spines.items():
162 |             spine.set_visible(False)
163 |         ax.set_xticks(np.arange(self.data.shape[1]) + 0.5, minor=True)
164 |         ax.set_yticks(np.arange(self.data.shape[0]) + 0.5, minor=True)
165 |         ax.set_xticklabels([])
166 |         ax.set_yticklabels([])
167 |         ax.grid(which="minor", color="black", linestyle="-", linewidth=1)
168 |         ax.tick_params(which="minor", bottom=False, left=False)
169 |         ax.set_aspect("equal")
170 |         if label is not None:
171 |             ax.set_title(label)
172 | 
173 |     @staticmethod
174 |     def compare_length(a, b):
175 |         return a.width == b.width and a.height == b.height
176 | 
177 |     @staticmethod
178 |     def dice(a, b):
179 |         dist = [multiclass_dice(a, b, i) for i in range(10)]
180 |         dist = [d for d in dist if d is not None]
181 |         return np.mean(dist)
182 | 
183 |     @staticmethod
184 |     def sized_dice(a, b):
185 |         if Field.compare_length(a, b):
186 |             return Field.dice(a, b)
187 |         h = min(a.height, b.height)
188 |         w = min(a.width, b.width)
189 |         a_ = Field(a.data[:h, :w])
190 |         b_ = Field(b.data[:h, :w])
191 |         d = Field.dice(a_, b_)
192 |         size_coef = 2 * w * h / (a.width * a.height + b.width * b.height)
193 |         return size_coef * d
194 | 
195 |     @classmethod
196 |     def distance(cls, a, b):
197 |         return 1 - cls.dice(a, b)
198 | 
199 |     @classmethod
200 |     def score(cls, a, b):
201 |         return cls.sized_dice(a, b)
202 | 
203 |     def str_iter(self):
204 |         yield "|"
205 |         for line in self.data:
206 |             for x in line:
207 |                 yield str(x)
208 |             yield "|"
209 | 
210 |     def __repr__(self):
211 |         return "".join(self.str_iter())
212 | 
213 |     def zeros(self, multiplier=1):
214 |         return self.consts(value=0, multiplier=multiplier)
215 | 
216 |     def consts(self, value=1, multiplier=1):
217 |         new_shape = tuple([x * multiplier for x in self.data.shape])
218 |         return Field(value * np.ones(new_shape, dtype=self.data.dtype))
219 | 
220 |     @staticmethod
221 |     def fromstring(s):
222 |         assert s[0] == "|"
223 |         data = [[int(x) for x in line] for line in s[1:-1].split("|")]
224 |         return Field(data)
225 | 
226 |     def build_nxgraph(self, connectivity={0: 4}, properties=None):
227 |         def get_features(data):
228 |             return np.stack([(data == i) * 1.0 for i in range(10)], 0)
229 | 
230 |         graph_nx = nx.Graph()
231 |         graph_nx.graph["global_features"] = np.asarray(
232 |             [[(np.sum(self.data == i) > 0) * 1.0 for i in range(10)]]
233 |         ).astype(np.float64)
234 |         all_features = get_features(self.data)
235 |         node_ids = OrderedDict()  # node id -> (i, j) pair
236 |         node_coords = OrderedDict()  # node (i, j) pair -> id
237 | 
238 |         regions0 = get_data_regions(self.data)
239 |         params0, maps0 = get_region_params(regions0)
240 | 
241 |         regions1 = get_data_regions(self.data, connectivity=1)
242 |         params1, maps1 = get_region_params(regions1, connectivity=1)
243 | 
244 |         for i in range(self.data.shape[0]):
245 |             for j in range(self.data.shape[1]):
246 |                 new_id = len(node_ids)
247 |                 node_ids[new_id] = (i, j)
248 |                 node_coords[(i, j)] = new_id
249 | 
250 |                 color = self.data[i, j]
251 | 
252 |                 left = i == 0  # left
253 |                 top = j == 0  # top
254 |                 right = i == self.data.shape[0] - 1  # right
255 |                 bottom = j == self.data.shape[1] - 1  # bottom
256 |                 features = [left, right, top, bottom]
257 |                 neighbours = [
258 |                     (i1, j1)
259 |                     for i1, j1 in product([i - 1, i, i + 1], [j - 1, j, j + 1])
260 |                     if (i1 != i or j1 != j)
261 |                     and i1 >= 0
262 |                     and j1 >= 0
263 |                     and i1 < self.data.shape[0]
264 |                     and j1 < self.data.shape[1]
265 |                 ]
266 |                 if connectivity.get(color, 4) == 4:
267 |                     neighbours = [
268 |                         (i1, j1) for i1, j1 in neighbours if (i1 == i or j1 == j)
269 |                     ]
270 |                 # angle 90
271 | 
272 |                 angle_props = []
273 |                 for d1, d2 in [(-1, -1), (-1, +1), (+1, -1), (+1, +1)]:
274 |                     angle_270 = False
275 |                     left_shift = False
276 |                     top_shift = False
277 |                     if self.get(i + d1, j + d2) != color:
278 |                         angle_270 = (
279 |                             self.get(i, j + d2) == color
280 |                             and self.get(i + d1, j) == color
281 |                         )
282 |                         left_shift = self.get(i + d1, j) != color
283 |                         top_shift = self.get(i, j + d2) != color
284 |                     angle_props.extend([angle_270, left_shift, top_shift])
285 |                 features.extend(angle_props)
286 |                 # for c in self.get(i - 1, j), self.get(i, j - 1), self.get(i+1, j), self.get
287 |                 # for i1 in (i - 1, i + 1) if i1 >=0 and i1 < self.data.shape[0]
288 |                 # if not left
289 |                 ncolors = set([self.data[i1, j1] for i1, j1 in neighbours])
290 |                 ncolors = [(i in ncolors) * 1 for i in range(10)]
291 |                 props = {
292 |                     "features": np.asarray(features).astype(np.float),
293 |                     "neighbours": neighbours,
294 |                     "neighbour_colors": np.asarray(ncolors),
295 |                     "color": self.data[i, j],
296 |                     "x": all_features[:, i, j].astype(np.float64),
297 |                     "pos": (i, j),
298 |                 }
299 |                 rid0 = regions0[i, j]
300 |                 p = [params0[rid0][k] for k in self.prop_names]
301 |                 rid1 = regions1[i, j]
302 |                 p += [params1[rid1][k] for k in self.prop_names]
303 | 
304 |                 if properties is not None:
305 |                     props["properties"] = properties[i, j]
306 |                 props["component_params"] = np.asarray(p)
307 | 
308 |                 graph_nx.add_node(
309 |                     new_id,
310 |                     # features=np.asarray(features).astype(np.float),
311 |                     # neighbours=neighbours,
312 |                     # neighbour_colors = np.asarray(ncolors),
313 |                     # color=self.data[i, j],
314 |                     # x=all_features[:, i, j].astype(np.float64),
315 |                     # pos=(i, j)
316 |                     **props,
317 |                 )
318 | 
319 |         for i in range(self.data.shape[0]):
320 |             for j in range(self.data.shape[1]):
321 |                 # neighbours = [
322 |                 #    (i1, j1)
323 |                 #    for i1, j1 in product([i - 1, i, i + 1], [j - 1, j, j + 1])
324 |                 #    if (i1 != i or j1 != j) and i1 >= 0 and j1 >= 0
325 |                 #    and i1 < self.data.shape[0] and j1 < self.data.shape[1]
326 |                 # ]
327 |                 id0 = node_coords[(i, j)]
328 |                 color0 = self.data[i, j]
329 | 
330 |                 neighbours = graph_nx.nodes[id0]["neighbours"]
331 | 
332 |                 for i1, j1 in neighbours:
333 |                     id1 = node_coords[(i1, j1)]
334 |                     color1 = self.data[i1, j1]
335 |                     if color0 == color1:
336 |                         graph_nx.add_edge(
337 |                             id0,
338 |                             id1,
339 |                             features=np.asarray(
340 |                                 [(color0 == x) * 1.0 for x in range(10)]
341 |                             ).astype(np.float64),
342 |                         )
343 |                         # graph_nx.add_edge(id1, id0, features=[color0])
344 | 
345 |                 # graph_nx.add_node()
346 |         return graph_nx
347 | 
348 | 
349 | class ComplexField:
350 |     def __init__(self, data, **params):
351 |         self.data = data
352 |         self.params = params
353 |         self.multiplier = 0.5
354 | 
355 |     @property
356 |     def shape(self):
357 |         if len(self.data) > 0:
358 |             if isinstance(self.data[0], list):
359 |                 return (len(self.data), len(self.data[0]))
360 |         return (len(self.data),)
361 | 
362 |     @property
363 |     def width(self):
364 |         if len(self.shape) == 1:
365 |             return 1
366 |         return self.shape[1]
367 | 
368 |     @property
369 |     def height(self):
370 |         return self.shape[0]
371 | 
372 |     def flat_iter(self):
373 |         for line in self.data:
374 |             if isinstance(line, list):
375 |                 for x in line:
376 |                     yield x
377 |             else:
378 |                 yield line
379 | 
380 |     def map(self, func):
381 |         new_data = [[func(x) for x in line] for line in self.data]
382 |         return ComplexField(new_data, **self.params)
383 | 
384 |     def show(self, ax=None, label=None):
385 |         if ax is None:
386 |             plt.figure(
387 |                 figsize=(self.width * self.multiplier, self.height * self.multiplier)
388 |             )
389 |             ax = plt.gca()
390 |         pass
391 | 
392 |     def __str__(self):
393 |         return f"ComplexField({self.shape}, {self.params})"
394 | 


--------------------------------------------------------------------------------
/kaggle_arc/base/iodata.py:
--------------------------------------------------------------------------------
  1 | import rootutils
  2 | 
  3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  4 | 
  5 | import json
  6 | import matplotlib
  7 | from matplotlib import cm
  8 | import matplotlib.gridspec as gridspec
  9 | import seaborn as sns
 10 | from itertools import islice
 11 | 
 12 | from base.field import *
 13 | 
 14 | 
 15 | class IOData:
 16 |     """Represent the example in the ARC puzzle. Example might contain pair of input and output, or, in case of the test data, only input.
 17 |     """
 18 |     __slots__ = ["input_field", "output_field", "colormap"]
 19 | 
 20 |     def __init__(self, data=None, input_field=None, output_field=None):
 21 |         # data['train'][0]['input']
 22 |         self.input_field = input_field
 23 |         self.output_field = output_field
 24 |         if data is not None:
 25 |             if "input" in data:
 26 |                 self.input_field = Field(data["input"])
 27 |             if "output" in data:
 28 |                 self.output_field = Field(data["output"])
 29 |         self.colormap = None
 30 | 
 31 |     @property
 32 |     def input_processed(self):
 33 |         i = self.input_field.data
 34 |         o = self.output_field
 35 |         if o is not None:
 36 |             o = o.data
 37 |         if self.colormap is None:
 38 |             self.colormap = build_colormap(i, o)
 39 |         data = [[self.colormap.get(x, x) for x in line] for line in i]
 40 |         return Field(data)
 41 | 
 42 |     @property
 43 |     def output_processed(self):
 44 |         if self.colormap is None:
 45 |             self.colormap = build_colormap(
 46 |                 self.input_field.data, self.output_field.data
 47 |             )
 48 |         data = [
 49 |             [self.colormap.get(x, x) for x in line] for line in self.output_field.data
 50 |         ]
 51 |         return Field(data)
 52 | 
 53 |     def reconstruct(self, field):
 54 |         if self.colormap is None:
 55 |             return field
 56 |         new_data = [[self.colormap.get(x, x) for x in line] for line in field.data]
 57 |         return Field(new_data)
 58 | 
 59 |     def show(self, fig=None, axes=None, predictor=None, npredictions=1):
 60 |         if fig is None:
 61 |             if predictor is not None:
 62 |                 fig, axes = plt.subplots(nrows=1, ncols=2 + npredictions)
 63 |             else:
 64 |                 fig, axes = plt.subplots(nrows=1, ncols=2)
 65 |         ax0, ax1 = axes[:2]
 66 |         ax0.set_xticks([])
 67 |         ax0.set_yticks([])
 68 |         ax1.set_xticks([])
 69 |         ax1.set_yticks([])
 70 |         if self.input_field is not None:
 71 |             self.input_field.show(ax0, label="input")
 72 |         # ax0.axis("off")
 73 |         if self.output_field is not None:
 74 |             self.output_field.show(ax1, label="output")
 75 |         # ax1.axis("off")
 76 |         if predictor is not None:
 77 |             for i, prediction in enumerate(
 78 |                 islice(predictor.predict(self.input_field), npredictions)
 79 |             ):
 80 |                 ax = axes[2 + i]
 81 |                 ax.set_xticks([])
 82 |                 ax.set_yticks([])
 83 |                 prediction.show(ax)
 84 |                 # ax.axis("off")
 85 | 
 86 |     def t(self):
 87 |         result = [self.input_field.t()]
 88 |         if self.output_field is not None:
 89 |             result.append(self.output_field.t())
 90 |         return tuple(result)
 91 | 
 92 |     def t_splitted(self):
 93 |         result = [self.input_field.t_splitted()]
 94 |         if self.output_field is not None:
 95 |             result.append(self.output_field.t_splitted())
 96 |         return tuple(result)
 97 | 
 98 | 
 99 | class Sample:
100 |     """Represents single puzzle.
101 |     """
102 |     __slots__ = ["name", "train", "test"]
103 | 
104 |     def __init__(self, name, path):
105 |         self.name = name
106 | 
107 |         if isinstance(path, str):
108 |             with open(path) as f:
109 |                 puzzle_data = json.load(f)
110 |             solutions = None
111 |             # self.test = [ IOData(sample) for sample in puzzle_data.get('test', []) ]
112 |         else:
113 |             (puzzle_data, solutions) = path
114 | 
115 |         self.train = [IOData(sample) for sample in puzzle_data.get("train", [])]
116 | 
117 |         if solutions is None or len(solutions) == 0:
118 |             self.test = [IOData(sample) for sample in puzzle_data.get("test", [])]
119 |         else:
120 |             self.test = [
121 |                 IOData(sample, output_field=Field(solution))
122 |                 for sample, solution in zip(puzzle_data.get("test", []), solutions)
123 |             ]
124 | 
125 |     def predict(self, predictors):
126 |         predictions = []
127 |         for sample in self.iterate_test():
128 |             pred = [predictor.predict(sample) for predictor in predictors]
129 |             predictions.append(pred)
130 |         return predictions
131 | 
132 |     def show(
133 |         self,
134 |         fig=None,
135 |         grids=[None, None, None],
136 |         w=2,
137 |         h=2,
138 |         ncols=2,
139 |         predictor=None,
140 |         npredictions=3,
141 |         title="",
142 |     ):
143 |         ntrain = len(self.train)
144 |         ntest = len(self.test)
145 |         ncols += npredictions
146 |         if predictor is not None:
147 |             if not predictor.is_available(self.train):
148 |                 predictor = None
149 |             else:
150 |                 predictor.train(self.train)
151 |         gs, train_gs, test_gs = grids
152 |         if fig is None:
153 |             fig = plt.figure(figsize=(ncols * w, (ntrain + ntest) * h))
154 |             plt.title(title)
155 |             ax = plt.gca()
156 |             for edge, spine in ax.spines.items():
157 |                 spine.set_visible(False)
158 |             ax.set_xticklabels([])
159 |             ax.set_yticklabels([])
160 |             ax.set_xticks([])
161 |             ax.set_yticks([])
162 | 
163 |         if gs is None:
164 |             gs = gridspec.GridSpec(
165 |                 2, 1, figure=fig, height_ratios=[ntrain, ntest], hspace=0.1
166 |             )
167 |         if train_gs is None:
168 |             train_gs = gridspec.GridSpecFromSubplotSpec(
169 |                 ntrain, ncols, subplot_spec=gs[0]
170 |             )
171 |         if test_gs is None:
172 |             test_gs = gridspec.GridSpecFromSubplotSpec(ntest, ncols, subplot_spec=gs[1])
173 | 
174 |         if train_gs is not None:
175 |             train_ax = fig.add_subplot(gs[0])
176 |             train_ax.set_xticks([])
177 |             train_ax.set_yticks([])
178 |             train_ax.set_ylabel("Train samples")
179 |             for i in range(ntrain):
180 |                 ax0 = fig.add_subplot(train_gs[i, 0])
181 |                 ax1 = fig.add_subplot(train_gs[i, 1])
182 |                 self.train[i].show(fig=fig, axes=[ax0, ax1])
183 |                 if predictor is not None:
184 |                     preds = islice(
185 |                         predictor.predict(self.train[i].input_field), npredictions
186 |                     )
187 |                     for k, prediction in enumerate(preds):
188 |                         ax = fig.add_subplot(train_gs[i, k + 2])
189 |                         ax.set_xticks([])
190 |                         ax.set_yticks([])
191 |                         dice = Field.score(prediction, self.train[i].output_field)
192 |                         prediction.show(ax, label=f"{dice:1.4f}")
193 | 
194 |         if test_gs is not None:
195 |             test_ax = fig.add_subplot(gs[1])
196 |             test_ax.set_xticks([])
197 |             test_ax.set_yticks([])
198 |             test_ax.set_ylabel("Test samples")
199 |             for i in range(ntest):
200 |                 ax0 = fig.add_subplot(test_gs[i, 0])
201 |                 ax1 = fig.add_subplot(test_gs[i, 1])
202 |                 # npredictions=1
203 |                 # pred_ax = [fig.add_subplot(test_gs[i, 2+k]) for k in range(npredictions)]
204 |                 self.test[i].show(fig=fig, axes=[ax0, ax1])
205 |                 # predictor=predictor, npredictions=npredictions)
206 |                 if predictor is not None:
207 |                     preds = islice(
208 |                         predictor.predict(self.test[i].input_field), npredictions
209 |                     )
210 |                     for k, prediction in enumerate(preds):
211 |                         ax = fig.add_subplot(test_gs[i, k + 2])
212 |                         ax.set_xticks([])
213 |                         ax.set_yticks([])
214 |                         if self.test[i].output_field is not None:
215 |                             dice = Field.score(prediction, self.test[i].output_field)
216 |                             dice = f"{dice:1.4f}"
217 |                         else:
218 |                             dice = "-"
219 |                         prediction.show(ax, label=dice)
220 | 


--------------------------------------------------------------------------------
/kaggle_arc/base/transformers.py:
--------------------------------------------------------------------------------
 1 | import rootutils
 2 | 
 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 4 | 
 5 | import numpy as np
 6 | from base.field import Field
 7 | from base.iodata import IOData
 8 | 
 9 | 
10 | def resize_output(iodata):
11 |     if isinstance(iodata, list):
12 |         return [resize_output(data) for data in iodata]
13 |     h, w = iodata.input_field.shape
14 |     if iodata.output_field is not None:
15 |         output = iodata.output_field.data[:h, :w]
16 |         output = Field(output)
17 |     else:
18 |         output = None
19 |     return IOData(input_field=iodata.input_field, output_field=output)
20 | 
21 | 
22 | def crop_data(data):
23 |     h = np.argwhere(data.std(0) > 0).flatten()
24 |     w = np.argwhere(data.std(1) > 0).flatten()
25 |     if len(h) < 1 or len(w) < 1:
26 |         return data
27 |     return data[min(h) : max(h) + 1, min(w) : max(w) + 1]
28 | 


--------------------------------------------------------------------------------
/kaggle_arc/base/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Common helper functions
  3 | """
  4 | 
  5 | import rootutils
  6 | 
  7 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  8 | 
  9 | import skimage.measure as sk_measure
 10 | from itertools import product
 11 | import numpy as np
 12 | 
 13 | 
 14 | def get_data_regions(data, connectivity=None):
 15 |     "returns distinct regions for colors 0-9"
 16 |     l = sk_measure.label(data, connectivity=connectivity)
 17 |     lz = sk_measure.label(data == 0, connectivity=connectivity)
 18 |     m = np.max(l)
 19 |     lz += m
 20 |     ids = np.where(data == 0)
 21 |     l[ids] = lz[ids]
 22 |     return l
 23 | 
 24 | 
 25 | def make_convex(r):
 26 |     mask = np.zeros(r.shape)
 27 |     mask[np.where(r)] = 1
 28 |     coords = np.argwhere(r)
 29 |     for xcoord in np.unique(coords[:, 0]):
 30 |         y = coords[np.argwhere(coords[:, 0] == xcoord), 1]
 31 |         mask[xcoord, np.min(y) : np.max(y)] = 1
 32 |     for ycoord in np.unique(coords[:, 1]):
 33 |         x = coords[np.argwhere(coords[:, 1] == ycoord), 0]
 34 |         mask[np.min(x) : np.max(x), ycoord] = 1
 35 |     # print)np.unique(coords[:, 0])
 36 |     return mask == 1
 37 | 
 38 | 
 39 | def fill_region_holes(r):
 40 |     mask = np.zeros(r.shape)
 41 |     mask[np.where(r)] = 1
 42 |     coords = np.argwhere(r)
 43 |     xmin, ymin = np.min(coords, 0)
 44 |     xmax, ymax = np.max(coords, 0)
 45 |     for i in range(xmin, xmax + 1):
 46 |         for j in range(ymin, ymax + 1):
 47 |             x = coords[np.argwhere(coords[:, 1] == j), 0]
 48 |             if not (np.min(x) <= i and i <= np.max(x)):
 49 |                 continue
 50 |             y = coords[np.argwhere(coords[:, 0] == i), 1]
 51 |             if not (np.min(y) <= j and j <= np.max(y)):
 52 |                 continue
 53 |             mask[i, j] = 1
 54 |     return mask == 1
 55 | 
 56 | 
 57 | def split_interior(r, connectivity=None):
 58 |     mask = np.zeros(r.shape)
 59 |     shifts = [
 60 |         (i, j) for i, j in product([-1, 0, 1], [-1, 0, 1]) if not (i == 0 and j == 0)
 61 |     ]
 62 |     if connectivity is not None:
 63 |         shifts = [(i, j) for (i, j) in shifts if i == 0 or j == 0]
 64 |     for x, y in np.argwhere(r):
 65 |         neighbours = [(x + i, y + j) for i, j in shifts]
 66 |         neighbours = [
 67 |             (i, j)
 68 |             for i, j in neighbours
 69 |             if i >= 0 and j >= 0 and i < r.shape[0] and j < r.shape[1]
 70 |         ]
 71 |         if np.any([r[i, j] != 1 for i, j in neighbours]):
 72 |             mask[x, y] = 1
 73 |     mask = mask == 1
 74 |     return mask, (~mask) * r
 75 | 
 76 | 
 77 | def get_region_params(r, connectivity=None):
 78 |     params = dict()
 79 |     maps = dict()
 80 |     for rid in np.unique(r):
 81 |         params[rid] = dict()
 82 |         maps[rid] = dict()
 83 |         region = r == rid
 84 |         m = np.argwhere(region)
 85 |         xmin, ymin = np.min(m, 0)
 86 |         xmax, ymax = np.max(m, 0)
 87 |         xmean, ymean = np.mean(m, 0)
 88 |         # print(xmin)
 89 |         params[rid]["h"] = ymax - ymin + 1
 90 |         params[rid]["w"] = xmax - xmin + 1
 91 |         params[rid]["xmin"] = xmin
 92 |         params[rid]["xmax"] = xmax
 93 |         params[rid]["ymin"] = ymin
 94 |         params[rid]["ymax"] = ymax
 95 |         params[rid]["xmean"] = int(xmean)
 96 |         params[rid]["ymean"] = int(ymean)
 97 |         conv = make_convex(region)
 98 | 
 99 |         maps[rid]["convex"] = conv
100 | 
101 |         params[rid]["is_convex"] = np.all(conv == region)
102 |         no_holes = fill_region_holes(region)
103 | 
104 |         maps[rid]["no_holes"] = no_holes
105 | 
106 |         is_rectangular = no_holes[xmin : xmax + 1, ymin : ymax + 1].mean() == 1
107 |         params[rid]["is_rectangular"] = is_rectangular
108 |         params[rid]["is_square"] = is_rectangular and xmax - xmin + 1 == ymax - ymin + 1
109 |         area = region[xmin : xmax + 1, ymin : ymax + 1]
110 |         convex_area = conv[xmin : xmax + 1, ymin : ymax + 1]
111 | 
112 |         operations = [
113 |             lambda inp: np.fliplr(inp),
114 |             lambda inp: np.rot90(np.fliplr(inp), 1),
115 |             lambda inp: np.rot90(np.fliplr(inp), 2),
116 |             lambda inp: np.rot90(np.fliplr(inp), 3),
117 |             lambda inp: np.flipud(inp),
118 |             lambda inp: np.rot90(np.flipud(inp), 1),
119 |             lambda inp: np.rot90(np.flipud(inp), 2),
120 |             lambda inp: np.rot90(np.flipud(inp), 3),
121 |             lambda inp: np.fliplr(np.flipud(inp)),
122 |             lambda inp: np.flipud(np.fliplr(inp)),
123 |         ]
124 |         for i, op in enumerate(operations):
125 |             transformed_area = op(area)
126 |             params[rid][f"flip_{i}"] = (transformed_area.shape == area.shape) and np.all(transformed_area == area)
127 |             transformed_convex_area = op(convex_area)
128 |             params[rid][f"flip_conv_{i}"] = (transformed_convex_area.shape == convex_area.shape) and np.all(transformed_convex_area == convex_area)
129 | 
130 |         inner_regions = [
131 |             x for x in np.unique(r[np.where(no_holes)]) if x != rid and x != 0
132 |         ]
133 |         params[rid]["inner_regions"] = inner_regions
134 |         params[rid]["holes"] = len(inner_regions)
135 |         contour, interior = split_interior(region, connectivity=connectivity)
136 | 
137 |         maps[rid]["contour"] = contour
138 |         maps[rid]["interior"] = interior
139 | 
140 |         params[rid]["contour_size"] = np.sum(contour)
141 |         params[rid]["interior_size"] = np.sum(interior)
142 | 
143 |     return params, maps
144 | 


--------------------------------------------------------------------------------
/kaggle_arc/constants.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | import seaborn as sns
 3 | 
 4 | DATADIR = "../input/abstraction-and-reasoning-challenge"
 5 | 
 6 | WORKDIR = "../working"
 7 | TEST_SAVEPATH = "../working/submission.csv"
 8 | 
 9 | PALETTE = sns.crayon_palette(
10 |     (
11 |         "Eggplant,Aquamarine,Jungle Green,Atomic Tangerine,Blue Bell,Wisteria,"
12 |         + "Banana Mania,Blue Violet,Carnation Pink,Cerise"
13 |     ).split(",")
14 | )  # list(sns.crayons)[:10])
15 | COLORMAP = matplotlib.colors.ListedColormap(PALETTE)
16 | NORM = matplotlib.colors.Normalize(vmin=0, vmax=9)
17 | 


--------------------------------------------------------------------------------
/kaggle_arc/operations/basic.py:
--------------------------------------------------------------------------------
 1 | import rootutils
 2 | 
 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 4 | 
 5 | import numpy as np
 6 | 
 7 | from base.field import Field
 8 | 
 9 | 
10 | def func_identity(x):
11 |     return x
12 | 
13 | 
14 | def const_func_wrapper(i):
15 |     return lambda x: i
16 | 
17 | 
18 | candidate_functions = [
19 |     func_identity,
20 | ] + [const_func_wrapper(i) for i in range(10)]
21 | 
22 | 
23 | class Operation:
24 |     def __call__(self, data):
25 |         pass
26 | 
27 | 
28 | class Replace(Operation):
29 |     def __init__(self, replacements):
30 |         # replacements is an array with 10 elements - some permutation of numbers 0..9
31 |         self.replacements = replacements
32 |         self.repl_func = np.vectorize(lambda x: self.replacements[x])
33 | 
34 |     def __call__(self, data):
35 |         c = data.copy()
36 |         c = self.repl_func(c)
37 |         return c  # Field(c)
38 | 
39 | 
40 | class Repaint(Operation):
41 |     def __init__(self, input_data):
42 |         data = np.unique(input_data, return_counts=True)
43 |         s = sorted(zip(*data), key=lambda x: x[1], reverse=True)
44 |         self.replacements = [x for x, y in s]
45 | 
46 |     def build_replacements_dict(self, data, filter_zero=False):
47 |         replacements = self.replacements
48 |         if filter_zero:
49 |             replacements = [k for k in self.replacements if k != 0]
50 |             data = [d for d in data if d != 0]
51 |         repl_dict = dict(list(zip(data, replacements)))
52 |         return repl_dict
53 | 
54 |     def __call__(self, input_data):
55 |         data = np.unique(input_data, return_counts=True)
56 |         data = sorted(zip(*data), key=lambda x: x[1], reverse=True)
57 |         data = [x for x, y in data]
58 |         replacements = self.build_replacements_dict(data, filter_zero=True)
59 |         # print(replacements)
60 |         if len(replacements) < 1:
61 |             return input_data
62 |         repl_coords = {k: np.where(input_data == k) for k in replacements}
63 |         result = input_data.copy()
64 |         for k, (x, y) in repl_coords.items():
65 |             c = replacements[k]
66 |             result[x, y] = c
67 |         return result
68 | 


--------------------------------------------------------------------------------
/kaggle_arc/operations/field2point.py:
--------------------------------------------------------------------------------
  1 | """
  2 | First we define methods for different field to color conversion operations. 
  3 | """
  4 | 
  5 | import rootutils
  6 | 
  7 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  8 | 
  9 | 
 10 | import numpy as np
 11 | 
 12 | from skimage.measure import label
 13 | 
 14 | from operations.basic import Operation
 15 | from base.field import *
 16 | from base.iodata import *
 17 | 
 18 | 
 19 | class IrreversibleOperation:
 20 |     def __init__(self):
 21 |         pass
 22 | 
 23 |     def do(self):
 24 |         pass
 25 | 
 26 | 
 27 | def most_frequent_color(data, bg=None):
 28 |     # print("most frequent color")
 29 |     if bg is None:
 30 |         return np.argmax([np.sum(data == i) for i in range(10)])
 31 |     s = [np.sum(data == i) for i in range(10) if i != bg]
 32 |     if np.sum(s) > 0:
 33 |         return np.argmax(s)
 34 |     return bg
 35 | 
 36 | 
 37 | def least_frequent_color(data, bg=None):
 38 |     # print("least frequent color")
 39 |     if bg is None:
 40 |         s = {i: np.sum(data == i) for i in range(10)}
 41 |         s = [(k, v) for k, v in s.items() if v > 0]
 42 |         s = sorted(s, key=lambda x: x[1])
 43 |         if len(s) > 0:
 44 |             # print(s)
 45 |             return s[0][0]
 46 |         return 0
 47 |     s = {i: np.sum(data == i) for i in range(10) if i != bg}
 48 |     s = [(k, v) for k, v in s.items() if v > 0]
 49 |     s = sorted(s, key=lambda x: x[1])
 50 |     if len(s) > 0:
 51 |         return s[0][0]
 52 |     return bg
 53 | 
 54 | 
 55 | def count_color_area(data, bg=0):
 56 |     # print("color area")
 57 |     return np.max(label(data != bg))
 58 | 
 59 | 
 60 | def count_color_area_bg(data, bg=0):
 61 |     # print("color area bg")
 62 |     return np.max(label(data == bg))
 63 | 
 64 | 
 65 | def compute_color_gradient(data):
 66 |     cg0 = [np.sum(data[i]) for i in range(data.shape[0])]
 67 |     cg1 = [np.sum(data[:, i]) for i in range(data.shape[1])]
 68 |     return tuple(cg0), tuple(cg1)
 69 | 
 70 | 
 71 | def compute_weight_gradient(data, bg=0):
 72 |     return compute_color_gradient(data != bg)
 73 | 
 74 | 
 75 | def count_colors(data, bg=None):
 76 |     # print("count colors")
 77 |     # print(len(np.unique(data)))
 78 |     return len(np.unique(data))
 79 | 
 80 | 
 81 | def make_positional_color_selector(x, y):
 82 |     def pos_color_selector(data, bg=None):
 83 |         if x >= data.shape[0] or x < -data.shape[0]:
 84 |             x0 = x % data.shape[0]
 85 |         else:
 86 |             x0 = x
 87 |         if y >= data.shape[1] or y < -data.shape[1]:
 88 |             y0 = y % data.shape[1]
 89 |         else:
 90 |             y0 = y
 91 |         return data.data[x0, y0]
 92 | 
 93 |     return pos_color_selector
 94 | 
 95 | 
 96 | class SimpleSummarizeOperation(IrreversibleOperation):
 97 |     def __init__(self):
 98 |         self.bg = None
 99 |         self.func = None  # lambda x, bg=0: x
100 | 
101 |     def train(self, iodata_list):
102 |         if isinstance(iodata_list[0], IOData):
103 |             iodata_list = [(x.input_field, x.output_field) for x in iodata_list]
104 |         # elif isinstance(complex_iodata_list[0][0], ComplexField):
105 |         #     complex_iodata_list = [
106 |         #         (xi, xo) for i, o in complex_iodata_list
107 |         #         for xi, xo in zip(i.flat_iter(), o.flat_iter())]
108 |         candidates = [
109 |             most_frequent_color,
110 |             least_frequent_color,
111 |             count_color_area,
112 |             count_color_area_bg,
113 |             count_colors,
114 |         ]
115 |         color_dict = {
116 |             tuple((i.data == i.data[0, 0]).flatten()): o.data[0, 0]
117 |             for i, o in iodata_list
118 |         }
119 |         # candidates.append(
120 |         #     lambda x, bg: color_dict.get(tuple((x.data == x.data[0, 0]).flatten()), 0)
121 |         # )
122 |         # candidates.extend([])
123 | 
124 |         h, w = list(zip(*[i.shape for i, o in iodata_list]))
125 |         hmin = np.min(h)
126 |         wmin = np.min(w)
127 |         for i in range(hmin):
128 |             for j in range(wmin):
129 |                 func = [
130 |                     make_positional_color_selector(i, j),
131 |                     make_positional_color_selector(i, -j),
132 |                     make_positional_color_selector(-i, j),
133 |                     make_positional_color_selector(-i, -j),
134 |                 ]
135 |                 candidates.extend(func)
136 | 
137 |         best_candidate = candidates[0]
138 |         best_bg = dict()
139 |         best_score = 0
140 | 
141 |         for candidate in candidates:
142 |             best_bg[candidate] = None
143 |             candidate_score = 0
144 |             # candidate_bg = None
145 |             scores = []
146 |             best_sample_score = 0
147 |             for bg in list(range(10)) + [None]:
148 |                 score = [
149 |                     Field.score(Field([[candidate(i.data, bg=bg)]]), o)
150 |                     for i, o in iodata_list
151 |                 ]
152 |                 mean_score = np.mean(score)
153 |                 if mean_score > best_sample_score:
154 |                     best_sample_score = mean_score
155 |                     best_bg[candidate] = [bg]
156 |             candidate_score = best_sample_score
157 |             # print(candidate_score, best_bg)
158 |             # print(candidate_score)
159 |             # best_bg[candidate_score] = (candidatecandidate_bg
160 |             if candidate_score > best_score:
161 |                 best_score = candidate_score
162 |                 best_candidate = candidate
163 | 
164 |         self.func = best_candidate
165 |         self.bg = best_bg[best_candidate]
166 |         # self.bg = [self.bg[k] for k in sorted(self.bg)]
167 |         # print(self.bg)
168 |         # most_frequent_color
169 |         pass
170 | 
171 |     def do(self, field, bg=None):
172 |         if len(self.bg) != 1:
173 |             # print("use bg from param", self.bg, bg)
174 |             pixel = self.func(field, bg=bg)
175 |         else:
176 |             # print(self.bg)
177 |             pixel = self.func(field, bg=self.bg[0])
178 |         # print(pixel, self.func)
179 |         # print(np.asarray(pixel))
180 |         return Field([[pixel]])
181 | 
182 | 
183 | class ComplexSummarizeOperation(IrreversibleOperation):
184 |     def __init__(self):
185 |         self.bg = None
186 |         self.func = None  # lambda x, bg=0: x
187 | 
188 |     def train(self, complex_iodata_list):
189 |         if isinstance(complex_iodata_list[0], IOData):
190 |             complex_iodata_list = [
191 |                 (x.input_field, x.output_field) for x in complex_iodata_list
192 |             ]
193 |         # elif isinstance(complex_iodata_list[0][0], ComplexField):
194 |         #     complex_iodata_list = [
195 |         #         (xi, xo) for i, o in complex_iodata_list
196 |         #         for xi, xo in zip(i.flat_iter(), o.flat_iter())]
197 |         candidates = [
198 |             most_frequent_color,
199 |             least_frequent_color,
200 |             count_color_area,
201 |             count_color_area_bg,
202 |             count_colors,
203 |         ]
204 |         best_candidate = candidates[0]
205 |         best_bg = dict()
206 |         best_score = 0
207 | 
208 |         for candidate in candidates:
209 |             best_bg[candidate] = dict()
210 |             candidate_score = 0
211 |             # candidate_bg = None
212 |             scores = []
213 |             for k, (inp, out) in enumerate(complex_iodata_list):
214 | 
215 |                 iodata_list = list(zip(inp.flat_iter(), out.flat_iter()))
216 |                 # iodata_list = list(zip([x for xs in inp for x in xs], [x for xs in out for x in xs]))
217 |                 best_sample_score = 0
218 |                 for bg in list(range(10)) + [None]:
219 |                     score = [
220 |                         Field.score(Field([[candidate(i.data, bg=bg)]]), o)
221 |                         for i, o in iodata_list
222 |                     ]
223 |                     mean_score = np.mean(score)
224 |                     if mean_score > best_sample_score:
225 |                         best_sample_score = mean_score
226 |                         best_bg[candidate][k] = bg
227 |                 scores.append(best_sample_score)
228 |             candidate_score = np.mean(scores)
229 |             # print(candidate_score, best_bg)
230 |             # print(candidate_score)
231 |             # best_bg[candidate_score] = (candidatecandidate_bg
232 |             if candidate_score > best_score:
233 |                 best_score = candidate_score
234 |                 best_candidate = candidate
235 | 
236 |         self.func = best_candidate
237 |         self.bg = best_bg[best_candidate]
238 |         self.bg = [self.bg[k] for k in sorted(self.bg)]
239 |         bg = set()
240 |         for k in self.bg:
241 |             bg.add(k)
242 |         self.bg = list(bg)
243 |         # print(self.bg)
244 |         # most_frequent_color
245 |         pass
246 | 
247 |     def do(self, field, bg=None):
248 |         if len(self.bg) != 1:
249 |             # print("use bg from param", self.bg, bg)
250 |             pixel = self.func(field, bg=bg)
251 |         else:
252 |             # print(self.bg)
253 |             pixel = self.func(field, bg=self.bg[0])
254 |         # print(pixel, self.func)
255 |         # print(np.asarray(pixel))
256 |         return Field([[pixel]])
257 | 


--------------------------------------------------------------------------------
/kaggle_arc/operations/resizing.py:
--------------------------------------------------------------------------------
 1 | import rootutils
 2 | 
 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 4 | 
 5 | import numpy as np
 6 | from operations.basic import Operation
 7 | 
 8 | 
 9 | class Repeater(Operation):
10 |     def __init__(self, m1, m2):
11 |         self.m1 = m1
12 |         self.m2 = m2
13 | 
14 |     def __call__(self, data):
15 |         height, width = data.shape
16 |         result = np.zeros((height * self.m1, width * self.m2), dtype=data.dtype)
17 |         # for offset1 in range(self.m1):
18 |         #    for offset2 in range(self.m2):
19 |         for i in range(height):
20 |             for j in range(width):
21 |                 result[i::height, j::width] = data[i, j]
22 |         return result
23 | 
24 | 
25 | class Mirror(Operation):
26 |     def __init__(self, m1, m2, horizontal=True, vertical=True):
27 |         self.m1 = m1
28 |         self.m2 = m2
29 |         self.horizontal = horizontal
30 |         self.vertical = vertical
31 | 
32 |     def __call__(self, data):
33 |         height, width = data.shape
34 |         result = np.zeros((height * self.m1, width * self.m2), dtype=data.dtype)
35 |         # for offset1 in range(self.m1):
36 |         #    for offset2 in range(self.m2):
37 |         for i in range(height):
38 |             for j in range(width):
39 |                 result[i::height, j::width] = data[i, j]
40 |                 if self.vertical:
41 |                     result[height + i :: 2 * height, j::width] = data[height - 1 - i, j]
42 |                 if self.horizontal:
43 |                     result[i::height, width + j :: 2 * width] = data[i, width - 1 - j]
44 |                 if self.horizontal and self.vertical:
45 |                     result[height + i :: 2 * height, width + j :: 2 * width] = data[
46 |                         height - 1 - i, width - 1 - j
47 |                     ]
48 |         return result
49 | 
50 | 
51 | class Resizer(Operation):
52 |     def __init__(self, m1, m2):
53 |         self.m1 = m1
54 |         self.m2 = m2
55 | 
56 |     def __call__(self, data):
57 |         height, width = data.shape
58 |         result = np.zeros((height * self.m1, width * self.m2), dtype=data.dtype)
59 |         for i in range(height):
60 |             for j in range(width):
61 |                 result[
62 |                     i * self.m1 : (i + 1) * self.m1, j * self.m2 : (j + 1) * self.m2
63 |                 ] = data[i, j]
64 |         return result
65 | 
66 | 
67 | class Fractal(Operation):
68 |     def __init__(self, m1, m2):
69 |         self.m1 = m1
70 |         self.m2 = m2
71 | 
72 |     def __call__(self, data):
73 |         height, width = data.shape
74 |         result = np.zeros((height * self.m1, width * self.m2), dtype=data.dtype)
75 |         for i in range(height):
76 |             for j in range(width):
77 |                 result[i::height, j::width] = data[i, j]
78 |         for i in range(height):
79 |             for j in range(width):
80 |                 if data[i, j] == 0:
81 |                     result[
82 |                         i * self.m1 : (i + 1) * self.m1, j * self.m2 : (j + 1) * self.m2
83 |                     ] = 0  # field.data[i, j]
84 |         return result
85 | 


--------------------------------------------------------------------------------
/kaggle_arc/operations/reversible.py:
--------------------------------------------------------------------------------
  1 | import rootutils
  2 | 
  3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  4 | 
  5 | import numpy as np
  6 | 
  7 | from base.field import *
  8 | from base.iodata import *
  9 | from operations.basic import candidate_functions
 10 | 
 11 | 
 12 | class ReversibleOperation:
 13 |     def __init__(self):
 14 |         pass
 15 | 
 16 |     def do(self, field):
 17 |         pass
 18 | 
 19 |     def od(self, field):
 20 |         pass
 21 | 
 22 | 
 23 | def split2shape(field, target_shape, hsep=0, wsep=0, outer_sep=False):
 24 |     h, w = target_shape
 25 |     fh, fw = field.shape
 26 |     if outer_sep:
 27 |         hpart = (fh - hsep) // h - hsep
 28 |         wpart = (fw - wsep) // w - wsep
 29 |     else:
 30 |         hpart = (fh + hsep) // h - hsep
 31 |         wpart = (fw + wsep) // w - wsep
 32 | 
 33 |     splitted = []
 34 |     splitters = np.ones(field.data.shape, dtype=field.dtype)
 35 |     for i in range(h):
 36 |         line = []
 37 |         hstart = outer_sep * hsep + i * (hpart + hsep)
 38 |         for j in range(w):
 39 |             wstart = outer_sep * wsep + j * (wpart + wsep)
 40 |             subfield = Field(
 41 |                 field.data[hstart : hstart + hpart, wstart : wstart + wpart]
 42 |             )
 43 |             line.append(subfield)
 44 |             splitters[hstart : hstart + hpart, wstart : wstart + wpart] = 0
 45 |         splitted.append(line)
 46 |     cf = ComplexField(splitted, separator=splitters * field.data, splitter=splitters)
 47 |     return cf
 48 | 
 49 | 
 50 | def split_by_shape(field, subshape, hsep=0, wsep=0, outer_sep=False):
 51 |     h, w = subshape
 52 |     fh, fw = field.shape
 53 |     # hpart = (fh - outer_sep*hsep) // h - hsep
 54 |     # wpart = (fw - outer_sep*wsep) // w - wsep
 55 | 
 56 |     splitted = []
 57 |     splitters = np.ones(field.data.shape, dtype=field.dtype)
 58 | 
 59 |     for i in range(outer_sep * 1, fh, h + hsep):
 60 |         line = []
 61 |         sep_line = []
 62 |         for j in range(outer_sep * 1, fw, w + wsep):
 63 |             subfield = Field(field.data[i : i + h, j : j + w])
 64 |             line.append(subfield)
 65 |             splitters[i : i + h, j : j + w] = 0
 66 |             # sep_line.append(Field(field.data[i + h:i+h+hsep]))
 67 |         splitted.append(line)
 68 |     sep = splitters * field.data
 69 |     cf = ComplexField(splitted, separator=sep, splitter=splitters)
 70 |     return cf
 71 | 
 72 | 
 73 | def collect_field(multifield, hsep=0, wsep=0, outer_sep=False, sep_color=0):
 74 |     all_lines = []
 75 |     for line in multifield.data:
 76 |         line_data = []
 77 |         shape = list({x.shape for x in line})[0]
 78 |         sep = np.ones((shape[0], wsep), dtype=np.uint8) * sep_color
 79 |         if outer_sep and hsep > 0:
 80 |             line_data.append(sep)
 81 |         for x in line:
 82 |             line_data.append(x.data)
 83 |             if wsep > 0:
 84 |                 line_data.append(sep)
 85 |         if not outer_sep and wsep > 0:
 86 |             line_data = line_data[:-1]
 87 |         line_data = np.concatenate(
 88 |             line_data, 1
 89 |         )  # np.concatenate([x.data for x in line], 1)
 90 |         all_lines.append(line_data)
 91 |     # collect all line parts
 92 |     shape = list({x.shape for x in all_lines})[0]
 93 |     sep = np.ones((hsep, shape[1])) * sep_color
 94 |     line_data = []
 95 |     if outer_sep:
 96 |         line_data.append(sep)
 97 |     for l in all_lines:
 98 |         line_data.append(l)
 99 |         if hsep > 0:
100 |             line_data.append(sep)
101 |     if not outer_sep and hsep > 0:
102 |         line_data = line_data[:-1]
103 |     all_lines = np.concatenate(line_data, 0)
104 |     return Field(all_lines)
105 | 
106 | 
107 | def increase2shape(data, target_shape):
108 |     h, w = target_shape
109 |     line = np.concatenate([data for i in range(w)], 1)
110 |     d = np.concatenate([line for j in range(h)], 0)
111 |     return d
112 | 
113 | 
114 | def decrease2color(data, background=0):
115 |     colors, counts = np.unique(data[np.where(data != background)], return_counts=True)
116 |     if len(colors) < 1:
117 |         return background
118 |     return colors[0]
119 | 
120 | 
121 | class ReversibleSplit(ReversibleOperation):
122 |     def __init__(
123 |         self,
124 |         shape,
125 |         hsep=0,
126 |         wsep=0,
127 |         outer_sep=False,
128 |         sep_color=0,
129 |         parent=None,
130 |         splitter_func=split2shape,
131 |     ):
132 |         self.shape = shape
133 |         self.hsep = hsep
134 |         self.wsep = wsep
135 |         self.outer_sep = outer_sep
136 |         self.sep_color = sep_color
137 |         parent = (None,)
138 |         self.splitter_func = splitter_func
139 | 
140 |     def do(self, field):
141 |         splitted = self.splitter_func(
142 |             field, self.shape, hsep=self.hsep, wsep=self.wsep, outer_sep=self.outer_sep
143 |         )
144 |         return splitted
145 | 
146 |     def od(self, multifield):
147 |         field = collect_field(
148 |             multifield,
149 |             hsep=self.hsep,
150 |             wsep=self.wsep,
151 |             outer_sep=self.outer_sep,
152 |             sep_color=self.sep_color,
153 |         )
154 |         return field
155 | 
156 |     def __str__(self):
157 |         return f"ReversibleSplit({self.shape})"
158 | 
159 | 
160 | class ReversibleCombine(ReversibleOperation):
161 |     def __init__(
162 |         self,
163 |         shape,
164 |         hsep=0,
165 |         wsep=0,
166 |         outer_sep=False,
167 |         sep_color=0,
168 |         parent=None,
169 |         splitter_func=split2shape,
170 |     ):
171 |         self.shape = shape
172 |         self.hsep = hsep
173 |         self.wsep = wsep
174 |         self.outer_sep = outer_sep
175 |         self.sep_color = sep_color
176 |         self.splitter_func = splitter_func
177 |         self.color_func = None
178 |         self.parent = None
179 | 
180 |     def train(self, io_list):
181 |         # todo: correctly process case when there is no splitter
182 |         get_color = lambda m: np.unique(
183 |             m.params["separator"][np.where(m.params["splitter"])]
184 |         )[0]
185 |         pairs = [(m, self.od(output_field)) for m, output_field in io_list]
186 |         color_pairs = [(get_color(m), get_color(o)) for m, o in pairs]
187 |         scores = []
188 |         for func in candidate_functions:
189 |             score = np.mean([func(i) == o for i, o in color_pairs])
190 |             scores.append((-score, func))
191 |         scores = sorted(scores, key=lambda x: x[0])
192 |         score, score_func = scores[0]
193 |         if score > -1:
194 |             color_dict = dict(color_pairs)
195 |             score_func = lambda x: color_dict.get(x, 0)
196 |         self.color_func = score_func
197 | 
198 |     def do(self, multifield):
199 |         if self.hsep > 0 or self.wsep > 0:
200 |             colors = np.unique(
201 |                 multifield.params["separator"][np.where(multifield.params["splitter"])]
202 |             )
203 |         else:
204 |             colors = []
205 |         sep_color = self.sep_color
206 |         if len(colors) > 0 and self.color_func is not None:
207 |             sep_color = self.color_func(colors[0])
208 | 
209 |         field = collect_field(
210 |             multifield,
211 |             hsep=self.hsep,
212 |             wsep=self.wsep,
213 |             outer_sep=self.outer_sep,
214 |             sep_color=sep_color,
215 |         )
216 |         return field
217 | 
218 |     def od(self, field):
219 |         splitted = self.splitter_func(
220 |             field, self.shape, hsep=self.hsep, wsep=self.wsep, outer_sep=self.outer_sep
221 |         )
222 |         return splitted
223 | 
224 |     def __str__(self):
225 |         return f"ReversibleCombine({self.shape})"
226 | 
227 | 
228 | class WrappedOperation:
229 |     def __init__(self, preprocess=None, postprocess=None):
230 |         self.preprocess = preprocess
231 |         self.postprocess = postprocess
232 | 
233 |     def wrap(self, iodata):
234 |         if isinstance(iodata, IOData):
235 |             i = iodata.input_field
236 |             o = iodata.output_field
237 |         else:
238 |             i, o = iodata
239 |         forward_i = self.preprocess.do(i)
240 |         if self.postprocess is None:
241 |             reverse_o = o
242 |         else:
243 |             reverse_o = self.postprocess.od(o)
244 |         return forward_i, reverse_o
245 | 
246 |     def train(self, iodata_list):
247 |         # TODO: need to implement this
248 |         data = [
249 |             (self.preprocess.do(iodata.input_field), iodata.output_field)
250 |             for iodata in iodata_list
251 |         ]
252 |         if self.postprocess is not None:
253 |             self.postprocess.train(data)
254 | 
255 |     def run(self, field, prev=lambda x: x):
256 |         x = self.preprocess.do(field)
257 |         if self.postprocess is None:
258 |             op = prev
259 |         else:
260 |             if prev is None:
261 |                 op = lambda t: self.postprocess.do(t)
262 |             else:
263 |                 op = lambda t: prev(self.postprocess.do(t))
264 |         return x, op
265 | 
266 | 
267 | class WrappedOperationList:
268 |     def __init__(self, operations):
269 |         self.operations = operations
270 |         pass
271 | 
272 |     def train(self, iodata_list):
273 |         il = iodata_list
274 |         for op in self.operations:
275 |             op.train(il)
276 |             il = [op.wrap(io) for io in il]
277 |         pass
278 | 
279 |     def wrap(self, iodata):
280 |         if isinstance(iodata, IOData):
281 |             i = iodata.input_field
282 |             o = iodata.output_field
283 |             x = (i, o)
284 |         else:
285 |             x = iodata
286 |         for op in self.operations:
287 |             x = op.wrap(x)
288 |         return x
289 | 
290 |     def run(self, field, prev=lambda x: x):
291 |         x = field
292 |         prev = None
293 |         for op in operations:
294 |             x, prev = op.run(x, prev)
295 |         return x, prev
296 | 


--------------------------------------------------------------------------------
/kaggle_arc/operations/subpatterns.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions for subpattern extraction
 3 | """
 4 | 
 5 | import rootutils
 6 | 
 7 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 8 | 
 9 | import numpy as np
10 | 
11 | 
12 | def get_suffixes(s, wildcard=0):
13 |     suffix_len = 0
14 |     m = len(s)
15 |     suffixes = [0 for i in range(m)]
16 |     i = 1
17 |     while i < m:
18 |         if s[i] == wildcard or s[suffix_len] == wildcard or s[i] == s[suffix_len]:
19 |             suffix_len += 1
20 |             suffixes[i] = suffix_len
21 |             i += 1
22 |         elif suffix_len != 0:
23 |             suffix_len = suffixes[suffix_len - 1]
24 |         else:
25 |             suffixes[i] = 0
26 |             i += 1
27 |     return suffixes
28 | 
29 | 
30 | def get_repeat_length(suffixes):
31 |     n = len(suffixes)
32 |     k = suffixes[-1]
33 |     if k < n - k:
34 |         return n
35 |     return n - k
36 | 
37 | 
38 | def check_subpattern(data, r, c, wildcard=0):
39 |     for line in data:
40 |         condition = np.all(
41 |             [x == y or x == wildcard or y == wildcard for x, y in zip(line, line[c:])]
42 |         )
43 |         if not condition:
44 |             return False
45 |     for line in data.T[:c]:
46 |         condition = np.all(
47 |             [x == y or x == wildcard or y == wildcard for x, y in zip(line, line[c:])]
48 |         )
49 |         if not condition:
50 |             return False
51 |     return True
52 | 
53 | 
54 | def get_subpattern(data, wildcard=0, check_passed=True):
55 |     repeats = []
56 |     for line in data:
57 |         s = get_suffixes(line, wildcard)
58 |         r = get_repeat_length(s)
59 |         repeats.append(r)
60 |     # print(repeats)
61 |     if check_passed:
62 |         col = int(np.median(repeats))
63 |     else:
64 |         col = np.lcm.reduce(repeats)
65 |     # print(col)
66 |     crepeats = []
67 |     if check_passed:
68 |         subset = data.T
69 |     else:
70 |         subset = data.T[:col]
71 |     for line in subset:
72 |         s = get_suffixes(line, wildcard)
73 |         r = get_repeat_length(s)
74 |         if check_passed:
75 |             if r == len(s):
76 |                 continue
77 |         crepeats.append(r)
78 |     # print(crepeats)
79 |     if check_passed:
80 |         if len(crepeats) == 0:
81 |             row = len(data)
82 |         else:
83 |             row = int(np.median(crepeats))
84 |     else:
85 |         row = np.lcm.reduce(crepeats)
86 |     return row, col
87 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/__init__.py:
--------------------------------------------------------------------------------
 1 | import rootutils
 2 | 
 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 4 | 
 5 | from predictors.basic import IdPredictor, ZerosPredictor, ConstPredictor, FillPredictor
 6 | from predictors.complex import ComplexPredictor
 7 | from predictors.color_counting import ColorCountingPredictor
 8 | from predictors.shapes import (
 9 |     RepeatingPredictor,
10 |     FractalPredictor,
11 |     ResizingPredictor,
12 |     MirrorPredictor,
13 |     ConstantShaper,
14 | )
15 | from predictors.boosting_tree import (
16 |     BoostingTreePredictor,
17 |     BoostingTreePredictor2,
18 |     BoostingTreePredictor3,
19 | )
20 | from predictors.convolution import ConvolutionPredictor
21 | from predictors.graph_boosting_tree import (
22 |     GraphBoostingTreePredictor,
23 |     GraphBoostingTreePredictor2,
24 |     GraphBoostingTreePredictor3,
25 | )
26 | from predictors.decision_tree import AugmentedPredictor
27 | from predictors.subpattern import SubpatternMatcherPredictor
28 | from predictors.connector import PointConnectorPredictor
29 | # from predictors.cf_combinator import WrappedCFPredictor
30 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/availability_mixins.py:
--------------------------------------------------------------------------------
  1 | import rootutils
  2 | 
  3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  4 | import numpy as np
  5 | import skimage.measure as sk_measure
  6 | from fractions import Fraction
  7 | 
  8 | from utils import check_if_can_be_mirrored
  9 | 
 10 | class AvailableAll:
 11 |     def is_available(self, iodata_list):
 12 |         return True
 13 | 
 14 | 
 15 | class AvailableEqualShape:
 16 |     def is_available(self, iodata_list):
 17 |         for iodata in iodata_list:
 18 |             if iodata.input_field.shape != iodata.output_field.shape:
 19 |                 return False
 20 |         return True
 21 | 
 22 | 
 23 | class AvailableShape2Point:
 24 |     def is_available(self, iodata_list):
 25 |         for iodata in iodata_list:
 26 |             if iodata.output_field.shape != (1, 1):
 27 |                 return False
 28 |         return True
 29 | 
 30 | 
 31 | class AvailableShape2PointOrConstColor:
 32 |     def is_available(self, iodata_list):
 33 |         for iodata in iodata_list:
 34 |             if iodata.output_field.shape != (1, 1):
 35 |                 if len(np.unique(iodata.output_field.data)) != 1:
 36 |                     return False
 37 |         return True
 38 | 
 39 | 
 40 | class AvailableEqualShapeAndMaxNColors:
 41 |     def is_available(self, iodata_list, n_colors=4):
 42 |         for iodata in iodata_list:
 43 |             if iodata.input_field.shape != iodata.output_field.shape:
 44 |                 return False
 45 |             if len(np.unique(iodata.input_field.data)) > n_colors:
 46 |                 return False
 47 |             if len(np.unique(iodata.output_field.data)) > n_colors:
 48 |                 return False
 49 |         return True
 50 | 
 51 | 
 52 | class AvailableWithIntMultiplier:
 53 |     def is_available(self, iodata_list):
 54 |         all_sizes = set()
 55 |         for iodata in iodata_list:
 56 |             m1 = iodata.output_field.height // iodata.input_field.height
 57 |             m2 = iodata.output_field.width // iodata.input_field.width
 58 |             all_sizes.add((m1, m2))
 59 |         if len(all_sizes) == 1:
 60 |             h, w = all_sizes.pop()
 61 |             if w > 1 and h > 1:
 62 |                 self.m1 = h
 63 |                 self.m2 = w
 64 |                 return True
 65 |         return False
 66 | 
 67 | 
 68 | class AvailableWithFractionalMultiplier:
 69 |     def is_available(self, iodata_list):
 70 |         all_sizes = set()
 71 |         for iodata in iodata_list:
 72 |             m1 = Fraction(iodata.output_field.height, iodata.input_field.height)
 73 |             m2 = Fraction(iodata.output_field.width, iodata.input_field.width)
 74 |             all_sizes.add((m1, m2))
 75 |         if len(all_sizes) == 1:
 76 |             h, w = all_sizes.pop()
 77 |             self.m1 = h
 78 |             self.m2 = w
 79 |             return True
 80 |         return False
 81 | 
 82 | 
 83 | class AvailableMirror(AvailableWithIntMultiplier):
 84 |     def is_available(self, iodata_list):
 85 |         availability_check = AvailableWithIntMultiplier()
 86 |         # print(isinstance(self, AvailableMirror))
 87 |         if not availability_check.is_available(iodata_list):
 88 |             # print(11)
 89 |             return False
 90 |         self.m1 = availability_check.m1
 91 |         self.m2 = availability_check.m2
 92 |         results = set()
 93 |         for iodata in iodata_list:
 94 |             h, w = iodata.input_field.shape
 95 |             res = check_if_can_be_mirrored(iodata.output_field.data, h=h, w=w)
 96 |             # print(res)
 97 |             if res is None:
 98 |                 return False
 99 |             results.add(res)
100 |         (vertical, horizontal) = results.pop()
101 |         if len(results) > 0:
102 |             return False
103 |         self.vertical = vertical
104 |         self.horizontal = horizontal
105 |         return True
106 | 
107 | 
108 | class AvailableEqualShapeAndLessThanNComponents:
109 |     def is_available(self, iodata_list, n_components=10):
110 |         for iodata in iodata_list:
111 |             if iodata.input_field.shape != iodata.output_field.shape:
112 |                 return False
113 |         for iodata in iodata_list:
114 |             region_labels = sk_measure.label(iodata.input_field.data)
115 |             max_region_id = np.max(region_labels)
116 |             if max_region_id > n_components:
117 |                 return False
118 |         return True


--------------------------------------------------------------------------------
/kaggle_arc/predictors/basic.py:
--------------------------------------------------------------------------------
  1 | import rootutils
  2 | 
  3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  4 | 
  5 | import numpy as np
  6 | from itertools import islice
  7 | # from fractions import Fraction
  8 | 
  9 | from base.iodata import IOData
 10 | from base.field import Field
 11 | # from utils import check_if_can_be_mirrored
 12 | from operations.subpatterns import get_subpattern
 13 | from operations.subpatterns import check_subpattern
 14 | import predictors.availability_mixins as mixins
 15 | 
 16 | class Predictor:
 17 |     """Base class for all predictors.
 18 | 
 19 |     Methods
 20 |     -------
 21 |     train(iodata_list)
 22 |         Trains the given predictor with a list of IOData objects.
 23 |         Each object should have both input and output data for all samples.
 24 |     predict(field)
 25 |         For the input data stored in the variable `field`, tries to predict the output transformations.
 26 |     validate(iodata_list, k=3)
 27 |         For each of the inputs in iodata_list, predicts `k` outputs.
 28 |         After prediction, tries to compare them with the corresponding output and returns the final score.
 29 |     predict_on(cls, ds, k=3, args=[], kwargs=dict(), verbose=True, group_predictions=True)
 30 |         Utility method to process the dataset puzzles one by one.
 31 |     """
 32 | 
 33 |     def train(self, iodata_list):
 34 |         pass
 35 | 
 36 |     def predict(self, field):
 37 |         pass
 38 | 
 39 |     def validate(self, iodata_list, k=3):
 40 |         if isinstance(iodata_list, IOData):
 41 |             ps = islice(self.predict(iodata_list.input_field), k)
 42 |             # print(list(ps))
 43 |             scores = [Field.score(p, iodata_list.output_field) for p in ps]
 44 |             if len(scores) < 1:
 45 |                 return 0.0
 46 |             return max(scores)
 47 | 
 48 |         scores = []
 49 |         for iodata in iodata_list:
 50 |             score = self.validate(iodata)
 51 |             scores.append(score)
 52 |         if len(scores) < 1:
 53 |             return 0.0
 54 |         # print(scores)
 55 |         return np.mean(scores)
 56 | 
 57 |     def freeze_by_score(self, iodata_list, k=3):
 58 |         pass
 59 | 
 60 |     @classmethod
 61 |     def predict_on(
 62 |         cls, ds, k=3, args=[], kwargs=dict(), verbose=False, group_predictions=True
 63 |     ):
 64 |         for sample in ds:
 65 |             predictor = cls(*args, **kwargs)
 66 |             # if not predictor.is_available(sample):
 67 |             predictor.train(sample.train)
 68 |             predictor.freeze_by_score(sample.train)
 69 | 
 70 |             score = predictor.validate(sample.train)
 71 |             if score == 1 and verbose:
 72 |                 print(predictor)
 73 | 
 74 |             predictions = []
 75 |             for i, iodata in enumerate(sample.test):
 76 |                 prediction = list(islice(predictor.predict(iodata), k))
 77 |                 predictions.append(prediction)
 78 |                 if not group_predictions:
 79 |                     yield sample.name, i, prediction
 80 |             if group_predictions:
 81 |                 yield sample.name, predictions
 82 | 
 83 | 
 84 | class IdPredictor(Predictor, mixins.AvailableAll):
 85 | 
 86 |     def train(self, iodata_list):
 87 |         pass
 88 | 
 89 |     def predict(self, field):
 90 |         if isinstance(field, IOData):
 91 |             for v in self.predict(field.input_field):
 92 |                 yield v
 93 |             return
 94 |         # while True:
 95 |         yield Field(field.data)
 96 | 
 97 |     def __str__(self):
 98 |         return "IdPredictor()"
 99 | 
100 | 
101 | class ZerosPredictor(Predictor, mixins.AvailableAll):
102 |     def __init(self):
103 |         pass
104 | 
105 |     def train(self, iodata_list):
106 |         pass
107 | 
108 |     def predict(self, field):
109 |         if isinstance(field, IOData):
110 |             for v in self.predict(field.input_field):
111 |                 yield v
112 |                 return
113 |         # while True:
114 |         yield field.zeros()
115 | 
116 |     def __str__(self):
117 |         return "ZerosPredictor()"
118 | 
119 | 
120 | class ConstPredictor(Predictor, mixins.AvailableAll):
121 |     def __init__(self, value=1, multiplier=1):
122 |         self.value = value
123 |         self.multiplier = multiplier
124 | 
125 |     def train(self, iodata_list):
126 |         pass
127 | 
128 |     def predict(self, field):
129 |         if isinstance(field, IOData):
130 |             for v in self.predict(field.input_field):
131 |                 yield v
132 |             return
133 |         # while True:
134 |         yield field.consts(self.value, multiplier=self.multiplier)
135 | 
136 |     def __str__(self):
137 |         return f"ConstPredictor(value={self.value}, multiplier={self.multiplier})"
138 | 
139 | 
140 | class FillPredictor(Predictor, mixins.AvailableEqualShape):
141 |     def __init__(self):
142 |         self.common_patch = None
143 | 
144 |     def train(self, iodata_list):
145 |         patches = []
146 |         patch_sizes = set()
147 | 
148 |         for k, iodata in enumerate(iodata_list):
149 |             i = iodata.input_field
150 |             o = iodata.output_field
151 |             (r0, c0) = get_subpattern(i.data, check_passed=False)
152 |             (r1, c1) = get_subpattern(o.data, check_passed=False)
153 |             # print(k, r0, c0, r1, c1)
154 |             if check_subpattern(i.data, r1, c1):
155 |                 patch = self.get_patch(i.data, r1, c1, allow_zeros=True)
156 |                 # print(patch)
157 |                 # print(patch)
158 |                 patches.append(patch)
159 |                 patch_sizes.add((r1, c1))
160 |                 # print(r1,c1)
161 |                 # self.common_patch = patch
162 |         if len(patch_sizes) == 1:
163 |             self.common_patch = self.collect_patches(patches)
164 | 
165 |     def collect_patches(self, patches):
166 |         # print(patches)
167 |         common_patch = np.zeros(patches[0].shape, dtype=patches[0].dtype)
168 |         for p in patches:
169 |             for i in range(p.shape[0]):
170 |                 for j in range(p.shape[1]):
171 |                     if common_patch[i, j] == 0:
172 |                         common_patch[i, j] = p[i, j]
173 |                     elif common_patch[i, j] != p[i, j]:
174 |                         return None
175 |         return common_patch
176 | 
177 |     def get_patch(self, data, r, c, allow_zeros=False):
178 |         res = np.zeros((r, c), dtype=data.dtype)
179 |         for i in range(r):
180 |             for j in range(c):
181 |                 values = data[i::r, j::c]
182 |                 values = [v for v in np.unique(values) if v != 0]
183 | 
184 |                 # if len(values) != 1:
185 |                 #        return None
186 |                 if len(values) == 1:
187 |                     res[i, j] = values[0]
188 |         return res
189 | 
190 |     def predict(self, field):
191 |         if isinstance(field, IOData):
192 |             for v in self.predict(field.input_field):
193 |                 yield v
194 |             return
195 |         (r, c) = get_subpattern(field.data, wildcard=0)
196 |         if self.common_patch is not None:
197 |             patch = self.common_patch
198 |         else:
199 |             patch = self.get_patch(field.data, r, c, True)
200 |         # print(patch)
201 |         if patch is None or np.any(patch == 0):
202 |             yield Field(field.data)
203 |             return
204 |         result = field.data.copy()
205 |         coords = np.where(result == 0)
206 |         for x, y in zip(*coords):
207 |             result[x, y] = patch[x % r, y % c]
208 |         yield Field(result)
209 | 
210 |     def __str__(self):
211 |         return "FillPredictor()"
212 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/color_counting.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Predictor based on this notebook
  3 | https://www.kaggle.com/szabo7zoltan/colorandcountingmoduloq
  4 | """
  5 | 
  6 | import rootutils
  7 | 
  8 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  9 | 
 10 | import numpy as np
 11 | 
 12 | from base.iodata import IOData
 13 | from base.field import Field
 14 | from predictors.basic import Predictor
 15 | import predictors.availability_mixins as mixins
 16 | 
 17 | def get_p1_p2(i, j, n, k, v, q1, q2):
 18 |     if v == 0 or v == 2:
 19 |         p1 = i % q1
 20 |     else:
 21 |         p1 = (n - 1 - i) % q1
 22 |     if v == 0 or v == 3:
 23 |         p2 = j % q2
 24 |     else:
 25 |         p2 = (k - 1 - j) % q2
 26 |     return p1, p2
 27 | 
 28 | 
 29 | class ColorCountingPredictor(Predictor, mixins.AvailableEqualShape):
 30 |     def __init__(self):
 31 |         self.best_Dict = None
 32 |         self.best_Q1 = -1
 33 |         self.best_Q2 = -1
 34 |         self.best_v = -1
 35 | 
 36 |     def train(self, iodata_list):
 37 |         pairs = [
 38 |             (Q1, Q2)
 39 |             for t in range(15)
 40 |             for Q1 in range(1, 8)
 41 |             for Q2 in range(1, 8)
 42 |             if Q1 + Q2 == t
 43 |         ]
 44 |         h, w = list(zip(*[iodata.input_field.shape for iodata in iodata_list]))
 45 |         hmax = max(h)
 46 |         wmax = max(w)
 47 |         pairs = [(Q1, Q2) for Q1, Q2 in pairs if Q1 < hmax and Q2 < wmax]
 48 |         possible = True
 49 |         for Q1, Q2 in pairs:
 50 |             for v in range(4):
 51 |                 if self.best_Dict is not None:
 52 |                     return
 53 |                 possible = True
 54 |                 Dict = {}
 55 |                 for iodata in iodata_list:
 56 |                     (n, k) = iodata.input_field.shape
 57 |                     for i in range(n):
 58 |                         for j in range(k):
 59 |                             p1, p2 = get_p1_p2(i, j, n, k, v, Q1, Q2)
 60 |                             color1 = iodata.input_field.data[i, j]
 61 |                             color2 = iodata.output_field.data[i, j]
 62 |                             if color1 != color2:
 63 |                                 rule = (p1, p2, color1)
 64 |                                 if rule not in Dict:
 65 |                                     Dict[rule] = color2
 66 |                                 elif Dict[rule] != color2:
 67 |                                     possible = False
 68 |                 if not possible:
 69 |                     continue
 70 |                 for iodata in iodata_list:
 71 |                     (n, k) = iodata.input_field.shape
 72 |                     for i in range(n):
 73 |                         for j in range(k):
 74 |                             p1, p2 = get_p1_p2(i, j, n, k, v, Q1, Q2)
 75 |                             color1 = iodata.input_field.data[i, j]
 76 |                             rule = (p1, p2, color1)
 77 |                             if rule in Dict:
 78 |                                 color2 = 0 + Dict[rule]
 79 |                             else:
 80 |                                 color2 = 0 + iodata.output_field.data[i, j]
 81 |                             if color2 != iodata.output_field.data[i, j]:
 82 |                                 possible = False
 83 |                                 break
 84 |                         if not possible:
 85 |                             break
 86 |                     if not possible:
 87 |                         break
 88 |                 if possible:
 89 |                     self.best_Dict = Dict
 90 |                     self.best_Q1 = Q1
 91 |                     self.best_Q2 = Q2
 92 |                     self.best_v = v
 93 |                     return
 94 |         pass
 95 | 
 96 |     def predict(self, field):
 97 |         if isinstance(field, IOData):
 98 |             for v in self.predict(field.input_field):
 99 |                 yield v
100 |             return
101 |         # while True:
102 |         if self.best_Dict is None:
103 |             return
104 | 
105 |         n, k = field.shape
106 |         answer = np.zeros(field.shape, dtype=field.dtype)
107 |         for i in range(n):
108 |             for j in range(k):
109 |                 p1, p2 = get_p1_p2(i, j, n, k, self.best_v, self.best_Q1, self.best_Q2)
110 |                 color1 = field.data[i, j]
111 |                 rule = (p1, p2, color1)
112 |                 answer[i, j] = self.best_Dict.get(rule, color1)
113 |         yield Field(answer)
114 |         # yield field.consts(self.value, multiplier=self.multiplier)
115 | 
116 |     def __str__(self):
117 |         if self.best_Dict is None:
118 |             return "ColorCountingPredictor(undefined)"
119 |         return f"ColorCountingPredictor({self.best_Dict})"
120 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/complex.py:
--------------------------------------------------------------------------------
  1 | import rootutils
  2 | 
  3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  4 | 
  5 | import numpy as np
  6 | from itertools import islice
  7 | 
  8 | from base.field import Field
  9 | from predictors.basic import Predictor
 10 | # from predictors.basic import AvailableAll
 11 | from predictors.basic import IdPredictor, ZerosPredictor, ConstPredictor, FillPredictor
 12 | 
 13 | from predictors.color_counting import ColorCountingPredictor
 14 | from predictors.shapes import (
 15 |     RepeatingPredictor,
 16 |     FractalPredictor,
 17 |     ResizingPredictor,
 18 |     MirrorPredictor,
 19 |     ConstantShaper,
 20 | )
 21 | from predictors.boosting_tree import (
 22 |     BoostingTreePredictor,
 23 |     BoostingTreePredictor2,
 24 |     BoostingTreePredictor3,
 25 | )
 26 | from predictors.convolution import ConvolutionPredictor
 27 | from predictors.graph_boosting_tree import (
 28 |     GraphBoostingTreePredictor,
 29 |     GraphBoostingTreePredictor2,
 30 |     GraphBoostingTreePredictor3,
 31 | )
 32 | from predictors.decision_tree import AugmentedPredictor
 33 | from predictors.subpattern import SubpatternMatcherPredictor
 34 | from predictors.field2point import SimpleSummarizePredictor
 35 | from predictors.connector import *
 36 | import predictors.availability_mixins as mixins
 37 | 
 38 | 
 39 | class ComplexPredictor(Predictor, mixins.AvailableAll):
 40 |     def __init__(self, predictor_classes, verbose=False):
 41 |         self.predictors = []
 42 |         for data in predictor_classes:
 43 |             if isinstance(data, tuple):
 44 |                 if len(data) == 3:
 45 |                     cls, args, kwargs = data
 46 |                 else:
 47 |                     cls, args = data
 48 |                     kwargs = dict()
 49 |             else:
 50 |                 cls = data
 51 |                 args = []
 52 |                 kwargs = dict()
 53 |             self.predictors.append(cls(*args, **kwargs))
 54 |             self.verbose = verbose
 55 | 
 56 |     def train(self, iodata_list):
 57 |         self.predictors = [p for p in self.predictors if p.is_available(iodata_list)]
 58 |         invalid_predictors = set()
 59 |         for i, p in enumerate(self.predictors):
 60 |             try:
 61 |                 p.train(iodata_list)
 62 |             except Exception as e:
 63 |                 if self.verbose:
 64 |                     print(e)
 65 |                 invalid_predictors.add(i)
 66 |         self.predictors = [
 67 |             p for i, p in enumerate(self.predictors) if i not in invalid_predictors
 68 |         ]
 69 | 
 70 |     def validate(self, iodata_list, k=3):
 71 |         if len(self.predictors) == 0:
 72 |             return 0.0
 73 |         scores = []
 74 |         for iodata in iodata_list:
 75 |             pred_scores = []
 76 |             for res in islice(self.predict(iodata.input_field), k):
 77 |                 score = Field.score(res, iodata.output_field)
 78 |                 pred_scores.append(score)
 79 |             scores.append(max(pred_scores))
 80 |         # for p in self.predictors[:3]:
 81 |         #     score = p.validate(iodata_list)
 82 |         #     scores.append(score)
 83 |         if len(scores) == 0:
 84 |             return 0.0
 85 |         return np.mean(scores)
 86 | 
 87 |     def freeze_by_score(self, iodata_list, k=3):
 88 |         scores = []
 89 |         for p in self.predictors:
 90 |             score = 0
 91 |             try:
 92 |                 p.freeze_by_score(iodata_list, k=k)
 93 |                 score = p.validate(iodata_list, k=k)
 94 |             except:
 95 |                 score = -1
 96 |             scores.append(score)
 97 |         scores = np.asarray(scores)
 98 |         # scores = scores[np.argwhere(scores>0)]
 99 |         ids = np.argsort(scores)[::-1]
100 |         self.predictors = [self.predictors[i] for i in ids if scores[i] >= 0]
101 | 
102 |     def predict(self, field):
103 |         for p in self.predictors:
104 |             # if not p.is_available(sample):
105 |             #     continue
106 |             try:
107 |                 for v in p.predict(field):
108 |                     yield v
109 |             except Exception as e:
110 |                 if self.verbose:
111 |                     print(e)
112 |                 # continue
113 |         # for p in self.predictors:
114 |         #     try:
115 |         #         v = next(p.predict(field))
116 |         #     except:
117 |         #         continue
118 |         #     yield v
119 | 
120 |     def __str__(self):
121 |         s = ";".join([str(p) for p in self.predictors])
122 |         return f"ComplexPredictor({s})"
123 | 
124 | 
125 | class DefaultComplexPredictor(ComplexPredictor):
126 |     def __init__(self):
127 |         predictor_args = [
128 |             IdPredictor,
129 |             ZerosPredictor,
130 |             ColorCountingPredictor,
131 |             RepeatingPredictor,
132 |             FractalPredictor,
133 |             ResizingPredictor,
134 |             GraphBoostingTreePredictor,  # no impact
135 |             GraphBoostingTreePredictor3,
136 |             ConstantShaper,
137 |             # BoostingTreePredictor,
138 |             # BoostingTreePredictor2,
139 |             PointConnectorPredictor,
140 |             BoostingTreePredictor3,
141 |             SubpatternMatcherPredictor,
142 |             # AugmentedPredictor
143 |             FillPredictor,
144 |             MirrorPredictor,
145 |             SimpleSummarizePredictor,
146 |             # (ConvolutionPredictor, [], {'loss': 'mse'}),
147 |             # (ConvolutionPredictor, [], {'loss': 'dice'})
148 |         ]
149 |         super(DefaultComplexPredictor, self).__init__(predictor_args)
150 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/convolution.py:
--------------------------------------------------------------------------------
  1 | import rootutils
  2 | 
  3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | from itertools import product
  9 | 
 10 | import skimage.measure as sk_measure
 11 | 
 12 | from base.iodata import *
 13 | from base.field import *
 14 | from predictors.boosting_tree import BTFeatureExtractor
 15 | from predictors.basic import *
 16 | 
 17 | 
 18 | def DBA(inp_size, out_size, activation=torch.nn.LeakyReLU()):
 19 |     return nn.Sequential(
 20 |         nn.Conv2d(inp_size, out_size, kernel_size=3, padding=1),
 21 |         nn.BatchNorm2d(out_size),
 22 |         activation,
 23 |     )
 24 | 
 25 | 
 26 | class NLU(nn.Module):
 27 |     def __init__(self, inp_size, out_size=None, activation=torch.nn.LeakyReLU()):
 28 |         super().__init__()
 29 |         if out_size is None:
 30 |             out_size = inp_size // 2
 31 |         self.block1 = DBA(inp_size, out_size, activation)
 32 |         self.block2 = DBA(out_size, out_size, activation)
 33 | 
 34 |     def forward(self, x):
 35 |         y = self.block1(x)
 36 |         return torch.cat([y, self.block2(y)], 1)
 37 | 
 38 | 
 39 | class StackedUnit(nn.Module):
 40 |     def __init__(
 41 |         self,
 42 |         input_size=2,
 43 |         out_size=10,
 44 |         n=3,
 45 |         activation=torch.nn.LeakyReLU(),
 46 |         last_activation=torch.nn.Softmax(dim=1),
 47 |     ):
 48 |         super().__init__()
 49 |         self.blocks = nn.ModuleList([NLU(input_size) for i in range(n)])
 50 |         self.last_block = DBA(input_size, out_size, activation)
 51 | 
 52 |     def forward(self, x):
 53 |         y = x
 54 |         for block in self.blocks:
 55 |             y = block(y)
 56 |         return self.last_block(y)
 57 | 
 58 | 
 59 | def filter_ones(col, split_count=1):
 60 |     coords = np.argwhere(col > 0).flatten()
 61 |     if len(coords) == 0:
 62 |         return col, col
 63 |     split0 = []
 64 |     split1 = []
 65 |     last_seq = []
 66 |     for c in coords:
 67 |         if len(last_seq) < 1:
 68 |             last_seq.append(c)
 69 |             continue
 70 |         if last_seq[-1] + 1 == c:
 71 |             last_seq.append(c)
 72 |             continue
 73 |         if len(last_seq) <= split_count:
 74 |             split0.extend(last_seq)
 75 |         else:
 76 |             split1.extend(last_seq)
 77 |         last_seq = [c]
 78 |     if len(last_seq) > 0:
 79 |         if len(last_seq) <= split_count:
 80 |             split0.extend(last_seq)
 81 |         else:
 82 |             split1.extend(last_seq)
 83 |     s0 = np.zeros(col.shape)
 84 |     s0[split0] = 1
 85 |     s1 = np.zeros(col.shape)
 86 |     s1[split1] = 1
 87 |     return s0 * col, s1 * col
 88 | 
 89 | 
 90 | def split_coords(data, color, split_count=1):
 91 |     col = np.sum(d.data == color, 0)
 92 |     row = np.sum(d.data == color, 1)
 93 | 
 94 |     col0, col1 = filter_ones(col, split_count=split_count)
 95 |     row0, row1 = filter_ones(row, split_count=split_count)
 96 |     return col0 * row0.reshape(-1, 1), col1 * row1.reshape(-1, 1)
 97 | 
 98 | 
 99 | def dice_loss(pred, gt):
100 |     def binary_dice(a, b, eps=1.0):
101 |         # print(a.shape)
102 |         s = torch.sum(a) + torch.sum(b) + eps
103 |         if s != 0:
104 |             return 2 * torch.sum(a * b) / s
105 |         return None  # torch.tensor()
106 | 
107 |     # print(pred.shape, gt.shape)
108 |     res = [binary_dice(pred[:, i], gt[:, i]) for i in range(10)]
109 |     res = [r for r in res if r is not None]
110 | 
111 |     return torch.sum(torch.stack(res))
112 | 
113 | 
114 | def make_conv_features(field, nfeat=13, local_neighb=5):
115 |     nrows, ncols = field.shape
116 |     # feat = np.zeros((nrows*ncols, nfeat))
117 |     all_features = []
118 |     cur_idx = 0
119 |     for i in range(nrows):
120 |         feature_list = []
121 |         for j in range(ncols):
122 |             color = field.data[i, j]
123 |             features = [i, j, field.data[i, j]]
124 |             features.extend(
125 |                 BTFeatureExtractor.get_moore_neighbours(field, i, j, nrows, ncols)
126 |             )
127 |             features.extend(BTFeatureExtractor.get_tl_tr(field, i, j, nrows, ncols))
128 |             features.extend(
129 |                 [
130 |                     len(np.unique(field.data[i, :])),
131 |                     len(np.unique(field.data[:, j])),
132 |                     # next goes count of non-zero points
133 |                     np.sum(field.data[i, :] > 0),
134 |                     np.sum(field.data[:, j] > 0),
135 |                     (i + j),
136 |                     len(
137 |                         np.unique(
138 |                             field.data[
139 |                                 i - local_neighb : i + local_neighb,
140 |                                 j - local_neighb : j + local_neighb,
141 |                             ]
142 |                         )
143 |                     ),
144 |                 ]
145 |             )
146 | 
147 |             # feat[cur_idx,13]
148 |             features.extend(
149 |                 [
150 |                     (i + ncols - j - 1),
151 |                     (i + j) % 2,
152 |                     (i + j + 1) % 2,
153 |                     # (i + ncols - j - 1) % 2
154 |                     # (nrows - 1 - i + ncols - j - 1),
155 |                     # (nrows - 1 - i + j)
156 |                 ]
157 |             )
158 |             features.extend(
159 |                 [field.get(i + k, j + v) for k, v in product([-1, 0, 1], [-1, 0, 1])]
160 |             )
161 |             features.extend(
162 |                 [
163 |                     field.data[nrows - 1 - i, j],
164 |                     field.data[nrows - 1 - i, ncols - 1 - j],
165 |                     field.data[i, ncols - 1 - j],
166 |                 ]
167 |             )
168 |             features.extend(
169 |                 [
170 |                     field.data[i, j] != 0,
171 |                     np.sum(
172 |                         [
173 |                             field.get(i + k, j + v) == color
174 |                             for k, v in product([-1, 1], [-1, 1])
175 |                         ]
176 |                     ),
177 |                     np.sum(
178 |                         [
179 |                             field.get(i + 1, j) == color,
180 |                             field.get(i - 1, j) == color,
181 |                             field.get(i, j + 1) == color,
182 |                             field.get(i, j - 1) == color,
183 |                         ]
184 |                     ),
185 |                     # np.sum([ field.get(i + k, j + v) == 0
186 |                     #     for k, v in product([-1, 1], [-1, 1])]),
187 |                     # np.sum([
188 |                     #     field.get(i + 1, j) == 0,
189 |                     #     field.get(i - 1, j) == 0,
190 |                     #     field.get(i, j + 1) == 0,
191 |                     #     field.get(i, j - 1) == 0
192 |                     # ])
193 |                 ]
194 |             )
195 |             feature_list.append(features)
196 |         all_features.append(feature_list)
197 | 
198 |     feat = np.asarray(all_features)
199 |     # feat = np.concatenate([
200 |     #     feat,
201 |     #     np.stack([label(field.data==i) for i in range(10)], -1)
202 |     #     ], -1)
203 |     return feat
204 | 
205 | 
206 | def make_conv_features2(field, nfeat=13, local_neighb=5):
207 |     nrows, ncols = field.shape
208 |     # feat = np.zeros((nrows*ncols, nfeat))
209 |     all_features = []
210 |     cur_idx = 0
211 |     for i in range(nrows):
212 |         feature_list = []
213 |         for j in range(ncols):
214 |             color = field.data[i, j]
215 |             features = [
216 |                 # i,
217 |                 # j,
218 |                 field.data[i, j]
219 |             ]
220 |             # features.extend(get_moore_neighbours(field, i, j, nrows, ncols))
221 |             # features.extend(get_tl_tr(field, i, j, nrows, ncols))
222 |             features.extend(
223 |                 [
224 |                     len(np.unique(field.data[i, :])),
225 |                     len(np.unique(field.data[:, j])),
226 |                     # next goes count of non-zero points
227 |                     # np.sum(field.data[i, :] > 0),
228 |                     # np.sum(field.data[:, j] > 0),
229 |                     (i + j),
230 |                     # len(np.unique(field.data[
231 |                     #    i-local_neighb:i+local_neighb,
232 |                     #    j-local_neighb:j+local_neighb]))
233 |                 ]
234 |             )
235 | 
236 |             # feat[cur_idx,13]
237 |             # features.extend([
238 |             #     (i + ncols - j - 1),
239 |             #     (i + j) % 2,
240 |             #     (i + j + 1) % 2,
241 |             #     (i + ncols - j - 1) % 2,
242 |             #     (nrows - 1 - i + ncols - j - 1),
243 |             #     (nrows - 1 - i + j)
244 |             # ])
245 |             features.extend(
246 |                 [
247 |                     field.get(i + k, j + v)
248 |                     for k, v in product([-1, 0, 1], [-1, 0, 1])
249 |                     if k != 0 or v != 0
250 |                 ]
251 |             )
252 |             features.extend(
253 |                 [
254 |                     field.data[nrows - 1 - i, j],
255 |                     field.data[nrows - 1 - i, ncols - 1 - j],
256 |                     field.data[i, ncols - 1 - j],
257 |                 ]
258 |             )
259 |             features.extend(
260 |                 [
261 |                     field.data[i, j] != 0,
262 |                     np.sum(
263 |                         [
264 |                             field.get(i + k, j + v) == color
265 |                             for k, v in product([-1, 1], [-1, 1])
266 |                         ]
267 |                     ),
268 |                     np.sum(
269 |                         [
270 |                             field.get(i + 1, j) == color,
271 |                             field.get(i - 1, j) == color,
272 |                             field.get(i, j + 1) == color,
273 |                             field.get(i, j - 1) == color,
274 |                         ]
275 |                     ),
276 |                     np.sum(
277 |                         [
278 |                             field.get(i + k, j + v) == 0
279 |                             for k, v in product([-1, 1], [-1, 1])
280 |                         ]
281 |                     ),
282 |                     np.sum(
283 |                         [
284 |                             field.get(i + 1, j) == 0,
285 |                             field.get(i - 1, j) == 0,
286 |                             field.get(i, j + 1) == 0,
287 |                             field.get(i, j - 1) == 0,
288 |                         ]
289 |                     ),
290 |                 ]
291 |             )
292 |             features.extend(
293 |                 [
294 |                     np.sum(field.data[i, :] == c) + np.sum(field.data[:, j] == c)
295 |                     for c in range(10)
296 |                 ]
297 |             )
298 |             feature_list.append(features)
299 |         all_features.append(feature_list)
300 | 
301 |     feat = np.asarray(all_features)
302 |     feat = np.concatenate(
303 |         [feat, np.stack([sk_measure.label(field.data == i) for i in range(10)], -1)], -1
304 |     )
305 |     masks = []
306 |     for c in range(10):
307 |         col = np.sum(field.data == i, 0)
308 |         row = np.sum(field.data == i, 1)
309 |         col0, col1 = filter_ones(col, split_count=1)
310 |         row0, row1 = filter_ones(row, split_count=1)
311 |         # return col0*row0.reshape(-1, 1), col1*row1.reshape(-1, 1)
312 |         mask = col * row.reshape(-1, 1)
313 |         masks.extend(
314 |             [
315 |                 col * row.reshape(-1, 1),
316 |                 col0 * row0.reshape(-1, 1),
317 |                 col1 * row1.reshape(-1, 1),
318 |             ]
319 |         )
320 | 
321 |     masks = np.stack(masks, -1)
322 |     # print(masks.shape)
323 |     feat = np.concatenate([feat, masks], -1)
324 |     return feat
325 | 
326 | 
327 | def get_nonzero_ids(iodata_list, make_conv_features=make_conv_features):
328 |     zero_ids = dict()
329 |     max_count = 0
330 |     nfeatures = 0
331 |     max_count += len(iodata_list)
332 |     for iodata in iodata_list:  # [sample.train, sample.test]:
333 |         features = make_conv_features(iodata.input_field)
334 |         nfeatures = max(nfeatures, features.shape[-1])
335 |         features = features.reshape(-1, features.shape[-1])
336 |         for i in np.argwhere(features.sum(0) > 0).flatten():
337 |             if not i in zero_ids:
338 |                 zero_ids[i] = 0
339 |             zero_ids[i] += 1
340 |     return np.asarray(
341 |         [i for i in np.arange(nfeatures) if zero_ids.get(i, 0) < max_count]
342 |     )
343 | 
344 | 
345 | def train_on_sample(sample, cutoff=0.5, debug=False, infeatures=70):
346 |     feature_ids = get_nonzero_ids(sample.train + sample.test)
347 |     model = StackedUnit(len(feature_ids), 10, last_activation=nn.Softmax(dim=1))
348 |     # model = nn.Sequential(
349 |     #     nn.Conv2d(len(feature_ids), 128, 3, padding=1),
350 |     #     nn.LeakyReLU(),
351 |     #     nn.Conv2d(128, 64, 3, padding=1),
352 |     #     nn.LeakyReLU(),
353 |     #     nn.Conv2d(64, 32, 3, padding=1),
354 |     #     nn.LeakyReLU(),
355 |     #     #nn.Sigmoid(),
356 |     #     nn.Conv2d(32, 10, 3, padding=1),
357 |     #     #  nn.Sigmoid()
358 |     #     nn.Softmax(dim=1)
359 |     # )
360 |     loss_func = torch.nn.MSELoss()  # dice_loss
361 |     # print(net.parameters())
362 | 
363 |     optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
364 | 
365 |     for epoch in range(20):
366 |         model.train()
367 |         if debug:
368 |             print("Epoch", epoch)
369 |         losses = []
370 |         optimizer.zero_grad()
371 |         # train_x, train_y, result = make_features(iodata_list)
372 |         for iodata in sample.train:
373 |             features = make_conv_features(
374 |                 iodata.input_field
375 |             )  # .reshape(iodata.input_field.shape+(-1,))
376 |             features = features[:, :, feature_ids]
377 |             features = np.moveaxis(features, -1, 0)
378 |             features = features[np.newaxis, ...]
379 |             i = torch.tensor(features).float()
380 | 
381 |             o = iodata.output_field.t_splitted()
382 |             o = torch.unsqueeze(o, dim=0).float()
383 |             p = model.forward(i)
384 |             # print(i.is_leaf, p.is_leaf)
385 |             # print(p.sum(1))
386 |             # print(features.shape)
387 |             # print(o.shape, p.shape)
388 |             loss = loss_func(p, o)
389 |             loss.backward()
390 |             losses.append(loss.item())
391 |         if debug:
392 |             print(losses)
393 |         # if epoch % 10 == 0:
394 |         #    print("zero grad")
395 |         optimizer.step()
396 | 
397 |     if debug:
398 |         print("Validation:")
399 |     val_results = []
400 |     model.eval()
401 |     with torch.no_grad():
402 |         scores = []
403 |         for iodata in sample.test:
404 |             features = make_conv_features(
405 |                 iodata.input_field
406 |             )  # .reshape(iodata.input_field.shape+(-1,))
407 |             features = features[:, :, feature_ids]
408 |             features = np.moveaxis(features, -1, 0)
409 |             features = features[np.newaxis, ...]
410 |             i = torch.tensor(features).float()
411 | 
412 |             o = iodata.output_field.t_splitted()
413 |             o = torch.unsqueeze(o, dim=0).float()
414 |             p = model.forward(i)
415 |             p = torch.squeeze(p, dim=0)
416 |             p = Field.from_splitted(p)
417 |             score = Field.score(p, iodata.output_field)
418 |             scores.append(score)
419 |             val_results.append((p, iodata.input_field, iodata.output_field))
420 |             if debug:
421 |                 print(score)
422 |                 p.show()
423 |                 iodata.output_field.show()
424 |     scores = np.mean(scores)
425 |     # print(scores)
426 |     if scores < cutoff:
427 |         return None
428 |     return scores, model, val_results
429 | 
430 | 
431 | class ConvolutionPredictor(Predictor, mixins.AvailableEqualShape):
432 |     def __init__(self, nepochs=40, loss="mse"):
433 |         # self.xgb =  XGBClassifier(n_estimators=25*2, booster="dart", n_jobs=-1)
434 |         if loss == "mse":
435 |             self.loss_func = torch.nn.MSELoss()
436 |         else:
437 |             self.loss_func = dice_loss
438 |         # print(net.parameters())
439 |         self.nepochs = nepochs
440 |         self.lr = 0.01
441 |         self.debug = False
442 | 
443 |     def build_model(self, feature_ids):
444 |         model = nn.Sequential(
445 |             nn.Conv2d(len(feature_ids), 128, 3, padding=1),
446 |             nn.LeakyReLU(),
447 |             nn.Conv2d(128, 64, 3, padding=1),
448 |             nn.LeakyReLU(),
449 |             nn.Conv2d(64, 32, 3, padding=1),
450 |             nn.LeakyReLU(),
451 |             # nn.Sigmoid(),
452 |             nn.Conv2d(32, 10, 3, padding=1),
453 |             #  nn.Sigmoid()
454 |             nn.Softmax(dim=1),
455 |         )
456 |         return model
457 |         #
458 | 
459 |     def train(self, iodata_list):
460 |         self.feature_ids = get_nonzero_ids(
461 |             iodata_list, make_conv_features=make_conv_features2
462 |         )
463 |         self.model = self.build_model(self.feature_ids)
464 |         self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
465 |         all_losses = []
466 |         for epoch in range(self.nepochs):
467 |             self.model.train()
468 |             if self.debug:
469 |                 print("Epoch", epoch)
470 |             losses = []
471 |             self.optimizer.zero_grad()
472 |             # train_x, train_y, result = make_features(iodata_list)
473 |             for iodata in iodata_list:
474 |                 features = make_conv_features2(
475 |                     iodata.input_field
476 |                 )  # .reshape(iodata.input_field.shape+(-1,))
477 |                 features = features[:, :, self.feature_ids]
478 |                 features = np.moveaxis(features, -1, 0)
479 |                 features = features[np.newaxis, ...]
480 |                 i = torch.tensor(features).float()
481 | 
482 |                 o = iodata.output_field.t_splitted()
483 |                 o = torch.unsqueeze(o, dim=0).float()
484 |                 p = self.model.forward(i)
485 |                 loss = self.loss_func(p, o)
486 |                 loss.backward()
487 |                 losses.append(loss.item())
488 |             if self.debug:
489 |                 print(losses)
490 | 
491 |             losses = np.mean(losses)
492 |             if len(all_losses) > 0:
493 |                 if len(all_losses) > 10 and np.mean(all_losses[-10:]) <= losses:
494 |                     break
495 |             all_losses.append(losses)
496 | 
497 |             # if epoch % 10 == 0:
498 |             #    print("zero grad")
499 |             self.optimizer.step()
500 | 
501 |     def predict(self, field):
502 |         if isinstance(field, IOData):
503 |             for v in self.predict(field.input_field):
504 |                 yield v
505 |             return
506 |         self.model.eval()
507 |         with torch.no_grad():
508 |             features = make_conv_features2(field)
509 |             features = features[:, :, self.feature_ids]
510 |             features = np.moveaxis(features, -1, 0)
511 |             features = features[np.newaxis, ...]
512 |             i = torch.tensor(features).float()
513 |             p = self.model.forward(i)
514 |             p = torch.squeeze(p, dim=0).detach().cpu().numpy()
515 |         yield Field.from_splitted(p)
516 | 
517 |     def __str__(self):
518 |         return "ConvolutionPredictor()"
519 | 
520 | 
521 | class Convolution2PointPredictor(Predictor, mixins.AvailableShape2PointOrConstColor):
522 |     def __init__(self, nepochs=40, loss="mse"):
523 |         # self.xgb =  XGBClassifier(n_estimators=25*2, booster="dart", n_jobs=-1)
524 |         if loss == "mse":
525 |             self.loss_func = torch.nn.MSELoss()
526 |         else:
527 |             self.loss_func = dice_loss
528 |         # print(net.parameters())
529 |         self.nepochs = nepochs
530 |         self.lr = 0.01
531 |         self.debug = False
532 | 
533 |     def build_model(self, feature_ids):
534 |         model = nn.Sequential(
535 |             nn.Conv2d(len(feature_ids), 128, 3, padding=1),
536 |             nn.LeakyReLU(),
537 |             nn.Conv2d(128, 64, 3, padding=1),
538 |             nn.LeakyReLU(),
539 |             nn.Conv2d(64, 32, 3, padding=1),
540 |             nn.LeakyReLU(),
541 |             # nn.Sigmoid(),
542 |             nn.Conv2d(32, 10, 3, padding=1),
543 |             nn.AvgPool2d(3),
544 |             nn.Sigmoid(),
545 |             # nn.Softmax(dim=1)
546 |         )
547 |         return model
548 |         #
549 | 
550 |     def train(self, iodata_list):
551 |         self.feature_ids = get_nonzero_ids(
552 |             iodata_list, make_conv_features=make_conv_features2
553 |         )
554 |         self.model = self.build_model(self.feature_ids)
555 |         self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
556 |         all_losses = []
557 |         for epoch in range(self.nepochs):
558 |             self.model.train()
559 |             if self.debug:
560 |                 print("Epoch", epoch)
561 |             losses = []
562 |             self.optimizer.zero_grad()
563 |             # train_x, train_y, result = make_features(iodata_list)
564 |             for iodata in iodata_list:
565 |                 features = make_conv_features2(
566 |                     iodata.input_field
567 |                 )  # .reshape(iodata.input_field.shape+(-1,))
568 |                 features = features[:, :, self.feature_ids]
569 |                 features = np.moveaxis(features, -1, 0)
570 |                 features = features[np.newaxis, ...]
571 |                 i = torch.tensor(features).float()
572 | 
573 |                 o = iodata.output_field.t_splitted()
574 |                 o = torch.unsqueeze(o, dim=0).float()
575 |                 p = self.model.forward(i)
576 |                 loss = self.loss_func(p, o)
577 |                 loss.backward()
578 |                 losses.append(loss.item())
579 |             if self.debug:
580 |                 print(losses)
581 | 
582 |             losses = np.mean(losses)
583 |             if len(all_losses) > 0:
584 |                 if len(all_losses) > 10 and np.mean(all_losses[-10:]) <= losses:
585 |                     break
586 |             all_losses.append(losses)
587 | 
588 |             # if epoch % 10 == 0:
589 |             #    print("zero grad")
590 |             self.optimizer.step()
591 | 
592 |     def predict(self, field):
593 |         if isinstance(field, IOData):
594 |             for v in self.predict(field.input_field):
595 |                 yield v
596 |             return
597 |         self.model.eval()
598 |         with torch.no_grad():
599 |             features = make_conv_features2(field)
600 |             features = features[:, :, self.feature_ids]
601 |             features = np.moveaxis(features, -1, 0)
602 |             features = features[np.newaxis, ...]
603 |             i = torch.tensor(features).float()
604 |             p = self.model.forward(i)
605 |             p = torch.squeeze(p, dim=0).detach().cpu().numpy()
606 |         yield Field.from_splitted(p)
607 | 
608 |     def __str__(self):
609 |         return "ConvolutionPredictor()"
610 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/decision_tree.py:
--------------------------------------------------------------------------------
  1 | """Code in next predictor is based on this kernel
  2 | 
  3 | https://www.kaggle.com/adityaork/decision-tree-smart-data-augmentation/comments
  4 | """
  5 | 
  6 | import rootutils
  7 | 
  8 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  9 | 
 10 | import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
 11 | import json
 12 | from pathlib import Path
 13 | from collections import defaultdict
 14 | from itertools import product
 15 | from matplotlib import colors
 16 | import matplotlib.pyplot as plt
 17 | import numpy as np
 18 | from itertools import combinations, permutations
 19 | 
 20 | from sklearn.tree import DecisionTreeClassifier
 21 | from sklearn.ensemble import BaggingClassifier
 22 | import random
 23 | import math
 24 | 
 25 | 
 26 | from predictors.basic import *
 27 | import predictors.availability_mixins as mixins
 28 | 
 29 | 
 30 | class Augmenter:
 31 |     __slots__ = ()
 32 | 
 33 |     @staticmethod
 34 |     def getiorc(iodata):
 35 |         inp = iodata.input_field
 36 |         return iodata.input_field, iodata.output_field, inp.shape[0], inp.shape[1]
 37 | 
 38 |     @classmethod
 39 |     def getBkgColor(cls, iodata_list):
 40 |         color_dict = defaultdict(int)
 41 | 
 42 |         for iodata in iodata_list:
 43 |             inp, oup, r, c = cls.getiorc(iodata)
 44 |             for i in range(r):
 45 |                 for j in range(c):
 46 |                     color_dict[inp.data[i, j]] += 1
 47 |         color = -1
 48 |         max_count = 0
 49 |         for col, cnt in color_dict.items():
 50 |             if cnt > max_count:
 51 |                 color = col
 52 |                 max_count = cnt
 53 |         return color
 54 | 
 55 |     @classmethod
 56 |     def get_bl_cols(cls, iodata_list):
 57 |         result = []
 58 |         bkg_col = cls.getBkgColor(iodata_list)
 59 |         result.append(bkg_col)
 60 |         # num_input,input_cnt,num_output,output_cnt
 61 |         met_map = {}
 62 |         for i in range(10):
 63 |             met_map[i] = [0, 0, 0, 0]
 64 | 
 65 |         total_ex = 0
 66 |         for iodata in iodata_list:
 67 |             inp, oup = iodata.input_field, iodata.output_field
 68 |             u, uc = np.unique(inp.data, return_counts=True)
 69 |             inp_cnt_map = dict(zip(u, uc))
 70 |             u, uc = np.unique(oup.data, return_counts=True)
 71 |             oup_cnt_map = dict(zip(u, uc))
 72 | 
 73 |             for col, cnt in inp_cnt_map.items():
 74 |                 met_map[col][0] = met_map[col][0] + 1
 75 |                 met_map[col][1] = met_map[col][1] + cnt
 76 |             for col, cnt in oup_cnt_map.items():
 77 |                 met_map[col][2] = met_map[col][2] + 1
 78 |                 met_map[col][3] = met_map[col][3] + cnt
 79 |             total_ex += 1
 80 | 
 81 |         for col, met in met_map.items():
 82 |             num_input, input_cnt, num_output, output_cnt = met
 83 |             if num_input == total_ex or num_output == total_ex:
 84 |                 result.append(col)
 85 |             elif num_input == 0 and num_output > 0:
 86 |                 result.append(col)
 87 | 
 88 |         result = np.unique(result).tolist()
 89 |         if len(result) == 10:
 90 |             result.append(bkg_col)
 91 |         return np.unique(result).tolist()
 92 | 
 93 |     @staticmethod
 94 |     def getAround(i, j, inp, size=1):
 95 |         # v = [-1,-1,-1,-1,-1,-1,-1,-1,-1]
 96 |         r, c = inp.shape
 97 |         v = []
 98 |         sc = [0]
 99 |         for q in range(size):
100 |             sc.append(q + 1)
101 |             sc.append(-(q + 1))
102 |         for idx, (x, y) in enumerate(product(sc, sc)):
103 |             ii = i + x
104 |             jj = j + y
105 |             v.append(-1)
106 |             if (0 <= ii < r) and (0 <= jj < c):
107 |                 v[idx] = inp.data[ii, jj]
108 |         return v
109 | 
110 |     @classmethod
111 |     def getX(cls, inp, i, j, size):
112 |         n_inp = inp.data
113 |         z = [i, j]
114 |         r, c = inp.shape
115 |         for m in range(5):
116 |             z.append(i % (m + 1))
117 |             z.append(j % (m + 1))
118 |         z.append(i + j)
119 |         z.append(i * j)
120 |         #     z.append(i%j)
121 |         #     z.append(j%i)
122 |         z.append((i + 1) / (j + 1))
123 |         z.append((j + 1) / (i + 1))
124 |         z.append(r)
125 |         z.append(c)
126 |         z.append(len(np.unique(n_inp[i, :])))
127 |         z.append(len(np.unique(n_inp[:, j])))
128 |         arnd = cls.getAround(i, j, inp, size)
129 |         z.append(len(np.unique(arnd)))
130 |         z.extend(arnd)
131 |         return z
132 | 
133 |     @classmethod
134 |     def getXy(cls, inp, oup, size):
135 |         x = []
136 |         y = []
137 |         r, c = inp.shape
138 |         for i in range(r):
139 |             for j in range(c):
140 |                 # print(inp)
141 |                 x.append(cls.getX(inp, i, j, size))
142 |                 y.append(oup.data[i][j])
143 |         return x, y
144 | 
145 |     @staticmethod
146 |     def replace(inp, uni, perm):
147 |         # uni = '234' perm = ['5','7','9']
148 |         # print(uni,perm)
149 |         # print(uni, perm)
150 |         r_map = {int(c): int(s) for c, s in zip(uni, perm)}
151 |         r, c = inp.shape
152 |         rp = inp.data.tolist()
153 |         # print(rp)
154 |         for i in range(r):
155 |             for j in range(c):
156 |                 if rp[i][j] in r_map:
157 |                     rp[i][j] = r_map[rp[i][j]]
158 |         return Field(rp)
159 | 
160 |     @classmethod
161 |     def augment(cls, inp, oup, bl_cols):
162 |         cols = "0123456789"
163 |         npr_map = [1, 9, 72, 3024, 15120, 60480, 181440, 362880, 362880]
164 |         uni = "".join([str(x) for x in np.unique(inp.data).tolist()])
165 |         for c in bl_cols:
166 |             cols = cols.replace(str(c), "")
167 |             uni = uni.replace(str(c), "")
168 | 
169 |         exp_size = inp.shape[0] * inp.shape[1] * npr_map[len(uni)]
170 | 
171 |         mod = math.floor(exp_size / 120000)
172 |         mod = 1 if mod == 0 else mod
173 | 
174 |         # print(exp_size,mod,len(uni))
175 |         result = []
176 |         count = 0
177 |         for comb in combinations(cols, len(uni)):
178 |             for perm in permutations(comb):
179 |                 count += 1
180 |                 if count % mod == 0:
181 |                     # print(uni)
182 |                     result.append(
183 |                         (cls.replace(inp, uni, perm), cls.replace(oup, uni, perm))
184 |                     )
185 |         return result
186 | 
187 |     @staticmethod
188 |     def get_flips(i, o):
189 |         result = []
190 |         # inp = input_field.data
191 |         # oup = output_field.data
192 |         operations = [
193 |             lambda inp: np.fliplr(inp),
194 |             lambda inp: np.rot90(np.fliplr(inp), 1),
195 |             lambda inp: np.rot90(np.fliplr(inp), 2),
196 |             lambda inp: np.rot90(np.fliplr(inp), 3),
197 |             lambda inp: np.flipud(inp),
198 |             lambda inp: np.rot90(np.flipud(inp), 1),
199 |             lambda inp: np.rot90(np.flipud(inp), 2),
200 |             lambda inp: np.rot90(np.flipud(inp), 3),
201 |             lambda inp: np.fliplr(np.flipud(inp)),
202 |             lambda inp: np.flipud(np.fliplr(inp)),
203 |         ]
204 |         for op in operations:
205 |             yield Field(op(i.data)), Field(op(o.data))
206 |         # result.append((np.fliplr(inp).tolist(),np.fliplr(oup).tolist()))
207 |         # result.append((np.rot90(np.fliplr(inp),1).tolist(),np.rot90(np.fliplr(oup),1).tolist()))
208 |         # result.append((np.rot90(np.fliplr(inp),2).tolist(),np.rot90(np.fliplr(oup),2).tolist()))
209 |         # result.append((np.rot90(np.fliplr(inp),3).tolist(),np.rot90(np.fliplr(oup),3).tolist()))
210 |         # result.append((np.flipud(inp).tolist(),np.flipud(oup).tolist()))
211 |         # result.append((np.rot90(np.flipud(inp),1).tolist(),np.rot90(np.flipud(oup),1).tolist()))
212 |         # result.append((np.rot90(np.flipud(inp),2).tolist(),np.rot90(np.flipud(oup),2).tolist()))
213 |         # result.append((np.rot90(np.flipud(inp),3).tolist(),np.rot90(np.flipud(oup),3).tolist()))
214 |         # result.append((np.fliplr(np.flipud(inp)).tolist(),np.fliplr(np.flipud(oup)).tolist()))
215 |         # result.append((np.flipud(np.fliplr(inp)).tolist(),np.flipud(np.fliplr(oup)).tolist()))
216 |         # return result
217 | 
218 |     @classmethod
219 |     def gettaskxy(cls, iodata_list, aug, around_size, bl_cols, flip=True):
220 |         X = []
221 |         Y = []
222 |         for iodata in iodata_list:
223 |             inp, oup = iodata.input_field, iodata.output_field
224 |             tx, ty = cls.getXy(inp, oup, around_size)
225 |             X.extend(tx)
226 |             Y.extend(ty)
227 |             if flip:
228 |                 for ainp, aoup in cls.get_flips(inp, oup):
229 |                     tx, ty = cls.getXy(ainp, aoup, around_size)
230 |                     X.extend(tx)
231 |                     Y.extend(ty)
232 |                     if aug:
233 |                         augs = cls.augment(ainp, aoup, bl_cols)
234 |                         for ainp, aoup in augs:
235 |                             # print("1", ainp)
236 |                             tx, ty = cls.getXy(ainp, aoup, around_size)
237 |                             X.extend(tx)
238 |                             Y.extend(ty)
239 |             if aug:
240 |                 augs = cls.augment(inp, oup, bl_cols)
241 |                 for ainp, aoup in augs:
242 |                     # print("2", ainp)
243 |                     tx, ty = cls.getXy(ainp, aoup, around_size)
244 |                     X.extend(tx)
245 |                     Y.extend(ty)
246 |         return X, Y
247 | 
248 | 
249 | class AugmentedPredictor(Predictor, mixins.AvailableEqualShapeAndMaxNColors):
250 |     def __init__(self):
251 |         # self.value = value
252 |         # self.multiplier = multiplier
253 |         pass
254 | 
255 |     def predict_on_tree_model(self, inp, model, size):
256 |         r, c = inp.shape
257 |         oup = np.zeros(inp.shape, dtype=int)
258 |         for i in range(r):
259 |             for j in range(c):
260 |                 x = Augmenter.getX(inp, i, j, size)
261 |                 o = int(model.predict([x]))
262 |                 o = 0 if o < 0 else o
263 |                 oup[i][j] = o
264 |         return Field(oup)
265 | 
266 |     def train(self, iodata_list):
267 |         a_size = 4  # get_a_size(task_json)
268 |         bl_cols = Augmenter.get_bl_cols(iodata_list)
269 | 
270 |         isflip = False
271 |         X1, Y1 = Augmenter.gettaskxy(iodata_list, True, 1, bl_cols, isflip)
272 |         X3, Y3 = Augmenter.gettaskxy(iodata_list, True, 3, bl_cols, isflip)
273 |         X5, Y5 = Augmenter.gettaskxy(iodata_list, True, 5, bl_cols, isflip)
274 | 
275 |         self.model_1 = BaggingClassifier(
276 |             estimator=DecisionTreeClassifier(), n_estimators=100
277 |         ).fit(X1, Y1)
278 |         self.model_3 = BaggingClassifier(
279 |             estimator=DecisionTreeClassifier(), n_estimators=100
280 |         ).fit(X3, Y3)
281 |         self.model_5 = BaggingClassifier(
282 |             estimator=DecisionTreeClassifier(), n_estimators=100
283 |         ).fit(X5, Y5)
284 | 
285 |     def predict(self, field):
286 |         if isinstance(field, IOData):
287 |             for v in self.predict(field.input_field):
288 |                 yield v
289 |             return
290 |         # while True:
291 |         # pred_map_1 = submit_predict(task_json,model_1, 1)
292 |         pred1 = self.predict_on_tree_model(field, self.model_1, 1)
293 |         yield pred1
294 |         pred3 = self.predict_on_tree_model(field, self.model_3, 3)
295 |         yield pred3
296 |         pred5 = self.predict_on_tree_model(field, self.model_5, 5)
297 |         yield pred5
298 | 
299 |     def __str__(self):
300 |         return f"AugmentedPredictor()"
301 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/draft_predictors/cam_predictor.py:
--------------------------------------------------------------------------------
  1 | ## TODO: this was unfinished and shouldn't be used now
  2 | import rootutils
  3 | 
  4 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  5 | 
  6 | 
  7 | import torch
  8 | from torch import nn
  9 | 
 10 | from base.field import *
 11 | import predictors.availability_mixins as mixins
 12 | from predictors.basic import Predictor
 13 | from base.iodata import IOData
 14 | 
 15 | 
 16 | class CAModel(nn.Module):
 17 |     def __init__(self, num_states):
 18 |         super(CAModel, self).__init__()
 19 |         self.transition = nn.Sequential(
 20 |             nn.Conv2d(num_states, 128, kernel_size=3, padding=1),
 21 |             nn.BatchNorm2d(128),
 22 |             nn.LeakyReLU(),
 23 |             nn.Conv2d(128, 128, kernel_size=3, padding=1),
 24 |             nn.BatchNorm2d(128),
 25 |             nn.ReLU(),
 26 |             nn.Conv2d(128, num_states, kernel_size=1),
 27 |         )
 28 | 
 29 |     def forward(self, x, steps=1):
 30 |         for _ in range(steps):
 31 |             x = self.transition(torch.softmax(x, dim=1))
 32 |         return x
 33 | 
 34 | 
 35 | def solve_task(iodata_list, max_steps=10, num_epochs=100, device="cpu"):
 36 |     model = CAModel(10).to(device)
 37 |     criterion = nn.CrossEntropyLoss()
 38 |     losses = np.zeros((max_steps - 1) * num_epochs)
 39 | 
 40 |     for num_steps in range(1, max_steps):
 41 |         optimizer = torch.optim.Adam(model.parameters(), lr=(0.1 / (num_steps * 2)))
 42 | 
 43 |         for e in range(num_epochs):
 44 |             optimizer.zero_grad()
 45 |             loss = 0.0
 46 | 
 47 |             for sample in task:
 48 |                 # predict output from input
 49 |                 x = (
 50 |                     torch.from_numpy(inp2img(sample["input"]))
 51 |                     .unsqueeze(0)
 52 |                     .float()
 53 |                     .to(device)
 54 |                 )
 55 |                 y = torch.tensor(sample["output"]).long().unsqueeze(0).to(device)
 56 |                 y_pred = model(x, num_steps)
 57 |                 loss += criterion(y_pred, y)
 58 | 
 59 |                 # predit output from output
 60 |                 # enforces stability after solution is reached
 61 |                 y_in = (
 62 |                     torch.from_numpy(inp2img(sample["output"]))
 63 |                     .unsqueeze(0)
 64 |                     .float()
 65 |                     .to(device)
 66 |                 )
 67 |                 y_pred = model(y_in, 1)
 68 |                 loss += criterion(y_pred, y)
 69 | 
 70 |             loss.backward()
 71 |             optimizer.step()
 72 |             losses[(num_steps - 1) * num_epochs + e] = loss.item()
 73 |     return model, num_steps, losses
 74 | 
 75 | 
 76 | class CAMPredictor(Predictor, mixins.AvailableEqualShape):
 77 |     def __init__(self, max_steps=10, num_epochs=100):
 78 |         self.max_steps = max_steps
 79 |         self.num_epochs = num_epochs
 80 |         self.device = "cpu"
 81 |         self.model = CAModel(10).to(self.device)
 82 |         self.criterion = nn.CrossEntropyLoss()
 83 |         # self.optimizer = torch.optim.Adam(model.parameters(), lr=(0.1 / (max_steps * 2)))
 84 |         pass
 85 | 
 86 |     def train(self, iodata_list):
 87 |         losses = np.zeros((self.max_steps - 1) * self.num_epochs)
 88 |         self.model.train()
 89 |         for num_steps in range(1, self.max_steps):
 90 |             optimizer = torch.optim.Adam(
 91 |                 self.model.parameters(), lr=(0.1 / (num_steps * 2))
 92 |             )
 93 | 
 94 |             for e in range(self.num_epochs):
 95 |                 optimizer.zero_grad()
 96 |                 loss = 0.0
 97 | 
 98 |                 for iodata in iodata_list:
 99 |                     # predict output from input
100 |                     x = (
101 |                         torch.from_numpy(iodata.input_field.data_splitted)
102 |                         .unsqueeze(0)
103 |                         .float()
104 |                         .to(self.device)
105 |                     )
106 |                     y = (
107 |                         torch.from_numpy(iodata.output_field.data)
108 |                         .long()
109 |                         .unsqueeze(0)
110 |                         .to(self.device)
111 |                     )
112 |                     y_pred = self.model(x, num_steps)
113 |                     loss += self.criterion(y_pred, y)
114 | 
115 |                     # predit output from output
116 |                     # enforces stability after solution is reached
117 |                     y_in = (
118 |                         torch.from_numpy(iodata.output_field.data_splitted)
119 |                         .unsqueeze(0)
120 |                         .float()
121 |                         .to(self.device)
122 |                     )
123 |                     y_pred = self.model(y_in, 1)
124 |                     loss += self.criterion(y_pred, y)
125 | 
126 |                 loss.backward()
127 |                 optimizer.step()
128 |                 losses[(num_steps - 1) * self.num_epochs + e] = loss.item()
129 |         self.losses = losses
130 |         # model, num_steps, losses
131 | 
132 |     def predict(self, field):
133 |         if isinstance(field, IOData):
134 |             for v in self.predict(field.input_field):
135 |                 yield v
136 |             return
137 |         self.model.eval()
138 |         with torch.no_grad():
139 |             x = (
140 |                 torch.from_numpy(field.data_splitted)
141 |                 .unsqueeze(0)
142 |                 .float()
143 |                 .to(self.device)
144 |             )
145 |             pred = self.model(x, 100).argmax(1).squeeze().detach().cpu().numpy()
146 |         yield Field(pred)
147 | 
148 | 
149 | 
150 | 
151 | class MoverPredictor(Predictor, mixins.AvailableEqualShape):
152 |     def __init__(self):
153 |         pass
154 | 
155 |     def train(self, iodata_list):
156 |         self.transitions = []
157 |         h = []
158 |         w = []
159 |         for iodata in iodata_list:
160 |             i = iodata.input_field
161 |             o = iodata.output_field
162 |             coords = np.argwhere(i.data != o.data)
163 |             if coords.shape[0] > 0:
164 |                 xmin, ymin = np.min(coords, 0)
165 |                 xmax, ymax = np.max(coords, 0)
166 |                 start = i.data[xmin : xmax + 1, ymin : ymax + 1]
167 |                 end = o.data[xmin : xmax + 1, ymin : ymax + 1]
168 |             else:
169 |                 start = i.data.copy()
170 |                 end = o.data.copy()
171 |             self.transitions.append((start, end))
172 |             h.append(start.shape[0])
173 |             w.append(start.shape[1])
174 |         if len(np.unique(h)) and len(np.unique(w)) == 1:
175 |             self.single_step = True
176 |         else:
177 |             self.single_step = False
178 |         self.minh = np.min(h)
179 |         self.minw = np.min(w)
180 | 
181 |     def is_available(self, iodata_list):
182 |         for iodata in iodata_list:
183 |             if iodata.input_field.shape != iodata.output_field.shape:
184 |                 return False
185 | 
186 |     def predict(self, field):
187 |         if isinstance(field, IOData):
188 |             for v in self.predict(field.input_field):
189 |                 yield v
190 |             return
191 |         data = field.data.copy()
192 |         offsets = np.ones(data.shape)
193 |         offsets[-self.minh + 1 :] = 0
194 |         offsets[:, -self.minw + 1 :] = 0
195 |         for _ in range(100):
196 |             something_changed = False
197 |             # print(offsets)
198 |             for offset0, offset1 in np.argwhere(offsets == 1):
199 |                 no_changes_with_offset = True
200 |                 for start, end in self.transitions:
201 |                     h, w = start.shape
202 |                     if offset0 + h > data.shape[0] or offset1 + w > data.shape[1]:
203 |                         # offsets[offset0:, offset1:] = 0
204 |                         continue
205 |                     if np.all(
206 |                         data[offset0 : offset0 + h, offset1 : offset1 + w] == start
207 |                     ):
208 |                         data[offset0 : offset0 + h, offset1 : offset1 + w] = end[:, :]
209 |                         offsets[offset0 : offset0 + h, offset1 : offset1 + w] = 1
210 |                         something_changed = True
211 |                         no_changes_with_offset = False
212 |                         if self.single_step:
213 |                             yield Field(data)
214 |                             return
215 |                         break
216 |                 if no_changes_with_offset:
217 |                     offsets[offset0, offset1] = 0
218 |             if not something_changed:
219 |                 break
220 |         yield Field(data)
221 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/draft_predictors/cf_combinator.py:
--------------------------------------------------------------------------------
  1 | import rootutils
  2 | 
  3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  4 | 
  5 | 
  6 | from base.field import *
  7 | from base.iodata import *
  8 | from predictors.basic import Predictor
  9 | from operations.reversible import *
 10 | 
 11 | 
 12 | class SelectorCFPredictor(Predictor):
 13 |     """Selects one of the patterns based on some features and returns as a result"""
 14 | 
 15 |     def __init__(self):
 16 |         pass
 17 | 
 18 |     def is_available(self, iodata_list):
 19 |         for iodata in iodata_list:
 20 |             if isinstance(iodata, IOData):
 21 |                 return False
 22 |             i, o = iodata
 23 |             if not isinstance(i, ComplexField):
 24 |                 return False
 25 |             if not isinstance(o, ComplexField):
 26 |                 return False
 27 |             if o.shape != (1, 1):
 28 |                 return False
 29 |         return True
 30 | 
 31 |     def train(self, iodata_list):
 32 |         pass
 33 | 
 34 |     def predict(self, complex_field):
 35 |         yield complex_field
 36 | 
 37 | 
 38 | class CombinatorCFPredictor(Predictor):
 39 |     def __init__(self):
 40 |         self.iopairs = dict()
 41 |         pass
 42 | 
 43 |     def is_available(self, iodata_list):
 44 |         for iodata in iodata_list:
 45 |             if isinstance(iodata, IOData):
 46 |                 return False
 47 |             i, o = iodata
 48 |             if not isinstance(i, ComplexField):
 49 |                 return False
 50 |             if not isinstance(o, ComplexField):
 51 |                 return False
 52 |             if o.shape != (1, 1):
 53 |                 return False
 54 |         return True
 55 | 
 56 |     def train(self, iodata_list):
 57 |         result = dict()
 58 |         for i, o in iodata_list:
 59 |             ifields = [[x for line in d.data for x in line] for d in i.flat_iter()]
 60 |             ifields = list(zip(*ifields))
 61 |             ofields = [x for line in o.data[0][0].data for x in line]
 62 |             for inp, out in zip(ifields, ofields):
 63 |                 if inp in result:
 64 |                     if result[inp] != out:
 65 |                         continue
 66 |                         # raise Exception("incorrect solution")
 67 |                 else:
 68 |                     result[inp] = out
 69 |         self.iopairs = result
 70 | 
 71 |     def predict(self, complex_field):
 72 |         inp = [
 73 |             [[x for x in line] for line in d.data] for d in complex_field.flat_iter()
 74 |         ]
 75 |         inp = list(zip(*inp))
 76 | 
 77 |         # print(self.iopairs)
 78 |         result = [[self.iopairs.get(x, 0) for x in zip(*line)] for line in inp]
 79 |         cf = ComplexField([[Field(result)]])
 80 |         yield cf
 81 | 
 82 | 
 83 | class WrappedCFPredictor(Predictor):
 84 |     def __init__(self):
 85 |         self.combinator = CombinatorCFPredictor()
 86 |         self.op = None
 87 | 
 88 |     def is_available(self, iodata_list):
 89 |         for iodata in iodata_list:
 90 |             i = iodata.input_field
 91 |             o = iodata.output_field
 92 |             (oh, ow) = o.shape
 93 |             (ih, iw) = i.shape
 94 |             if oh == 1 and ow == 1:
 95 |                 return False
 96 |             if oh > ih or ow > iw:
 97 |                 return False
 98 |             if oh == ih and ow == iw:
 99 |                 return False
100 |         hparts = 1
101 |         wparts = 1
102 |         all_parts = []
103 |         for hsep in (0, 1, 2):
104 |             for wsep in (0, 1, 2):
105 |                 for outer_sep in (True, False):
106 |                     if hsep == 0 and wsep == 0 and outer_sep:
107 |                         continue
108 |                     res = []
109 |                     for iodata in iodata_list:
110 |                         if res is None:
111 |                             break
112 |                         i = iodata.input_field.data
113 |                         o = iodata.output_field.data
114 |                         (oh, ow) = o.shape
115 |                         (ih, iw) = i.shape
116 |                         if hsep > 0:
117 |                             hvalues = set(i[: outer_sep * hsep].flatten())
118 |                             for start in range(outer_sep * hsep + oh, ih, hsep + oh):
119 |                                 for x in np.unique(i[start : start + hsep]):
120 |                                     hvalues.add(x)
121 |                             if len(hvalues) > 1:
122 |                                 res = None
123 |                                 break
124 |                             # if len(hvalues) > 1:
125 |                             #    return False
126 |                         if wsep > 0:
127 |                             wvalues = set(i[:, : outer_sep * wsep].flatten())
128 |                             for start in range(outer_sep * wsep + ow, iw, wsep + ow):
129 |                                 for x in np.unique(i[:, start : start + wsep]):
130 |                                     wvalues.add(x)
131 |                             if len(wvalues) > 1:
132 |                                 res = None
133 |                                 break
134 |                                 # return False
135 |                         if outer_sep:
136 |                             ih -= hsep
137 |                             iw -= wsep
138 |                         else:
139 |                             ih += hsep
140 |                             iw += wsep
141 |                         h = ih // (oh + hsep)
142 | 
143 |                         if h * (oh + hsep) != ih or h < 1:
144 |                             res = None
145 |                             continue
146 |                         # h -= hsep
147 |                         w = iw // (ow + wsep)
148 |                         if w * (ow + wsep) != iw or w < 1:
149 |                             res = None
150 |                             break
151 |                         # print(h, w, ih, oh, iw, ow)
152 |                         # w -= wsep
153 |                         res.append((h, w))
154 |                     if res is None:
155 |                         continue
156 |                     res = set(res)
157 |                     if len(res) == 1:
158 |                         all_parts.append([list(res)[0], hsep, wsep, outer_sep])
159 |         if len(all_parts) < 1:
160 |             return False
161 |         if len(all_parts) > 1:
162 |             return False
163 |         (h, w), hsep, wsep, outer_sep = all_parts[0]
164 |         self.shape = (h, w)
165 |         self.hsep = hsep
166 |         self.wsep = wsep
167 |         self.outer_sep = outer_sep
168 |         self.op = WrappedOperation(
169 |             ReversibleSplit(
170 |                 (h, w), hsep=hsep, wsep=wsep, outer_sep=outer_sep
171 |             ),  # , splitter_func=split_by_shape),
172 |             ReversibleCombine(
173 |                 (1, 1), hsep=0, wsep=0, outer_sep=False, sep_color=0
174 |             ),  # , splitter_func=split_by_shape)
175 |         )
176 |         data = [self.op.wrap(iodata) for iodata in iodata_list]
177 | 
178 |         return self.combinator.is_available(data)
179 | 
180 |     def train(self, iodata_list):
181 |         data = [self.op.wrap(iodata) for iodata in iodata_list]
182 |         self.combinator.train(data)
183 | 
184 |     def predict(self, field):
185 |         field_inp, postprocess = self.op.run(field)
186 |         for x in self.combinator.predict(field_inp):
187 |             yield postprocess(x)
188 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/draft_predictors/cf_filler.py:
--------------------------------------------------------------------------------
 1 | import rootutils
 2 | 
 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 4 | 
 5 | from predictors.basic import Predictor
 6 | import predictors.availability_mixins as mixins
 7 | from base.iodata import IOData
 8 | 
 9 | class FillerCFPredictor(Predictor):
10 |     def __init__(self):
11 |         pass
12 | 
13 |     def is_available(self, iodata_list):
14 |         for iodata in iodata_list:
15 |             if isinstance(iodata, IOData):
16 |                 return False
17 |             i, o = iodata
18 |             if not isinstance(i, ComplexField):
19 |                 return False
20 |             if not isinstance(o, ComplexField):
21 |                 return False
22 |             if i.shape != o.shape:
23 |                 return False
24 |         return True
25 | 
26 |     def train(self, iodata_list):
27 |         pass
28 | 
29 |     def predict(self, complex_field):
30 |         yield complex_field
31 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/draft_predictors/cf_selector.py:
--------------------------------------------------------------------------------
 1 | import rootutils
 2 | 
 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 4 | 
 5 | from predictors.basic import Predictor
 6 | 
 7 | 
 8 | class SelectorCFPredictor(Predictor):
 9 |     def __init__(self):
10 |         pass
11 | 
12 |     def is_available(self, iodata_list):
13 |         for iodata in iodata_list:
14 |             if isinstance(iodata, IOData):
15 |                 return False
16 |             i, o = iodata
17 |             if not isinstance(i, ComplexField):
18 |                 return False
19 |             if not isinstance(o, ComplexField):
20 |                 return False
21 |             if o.shape != (1, 1):
22 |                 return False
23 |         return True
24 | 
25 |     def train(self, iodata_list):
26 |         pass
27 | 
28 |     def predict(self, complex_field):
29 |         yield complex_field
30 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/draft_predictors/cf_sorter.py:
--------------------------------------------------------------------------------
 1 | import rootutils
 2 | 
 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 4 | 
 5 | from predictors.basic import Predictor
 6 | import predictors.availability_mixins as mixins
 7 | 
 8 | def to_tuple(field):
 9 |     return tuple([x for line in field.data for x in line])
10 | 
11 | 
12 | class SorterCFPredictor(Predictor):
13 |     def __init__(self):
14 |         pass
15 | 
16 |     def is_available(self, iodata_list):
17 |         for iodata in iodata_list:
18 |             if isinstance(iodata, IOData):
19 |                 return False
20 |             i, o = iodata
21 |             if not isinstance(i, ComplexField):
22 |                 return False
23 |             if not isinstance(o, ComplexField):
24 |                 return False
25 |             if i.shape != o.shape:
26 |                 return False
27 | 
28 |             it = sorted([to_tuple(f) for f in i.flat_iter()])
29 |             ot = sorted([to_tuple(f) for f in o.flat_iter()])
30 |             if it != ot:
31 |                 return False
32 |         return True
33 | 
34 |     def train(self, iodata_list):
35 |         pass
36 | 
37 |     def predict(self, complex_field):
38 |         yield complex_field
39 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/field2point.py:
--------------------------------------------------------------------------------
 1 | import rootutils
 2 | 
 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 4 | 
 5 | from predictors.basic import *
 6 | from operations.field2point import SimpleSummarizeOperation
 7 | import predictors.availability_mixins as mixins
 8 | 
 9 | 
10 | class SimpleSummarizePredictor(Predictor):
11 |     def __init__(self):
12 |         self.op = SimpleSummarizeOperation()
13 | 
14 |     def is_available(self, iodata_list):
15 |         for iodata in iodata_list:
16 |             if iodata.output_field.shape != (1, 1):
17 |                 return False
18 |         return True
19 | 
20 |     def train(self, iodata_list):
21 |         self.op.train(iodata_list)
22 | 
23 |     def predict(self, field):
24 |         result = self.op.do(field, bg=self.op.bg)
25 |         yield result
26 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/graph_boosting_tree.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | """
  4 | 
  5 | import rootutils
  6 | 
  7 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  8 | 
  9 | import networkx as nx
 10 | import numpy as np
 11 | from xgboost import XGBClassifier
 12 | from sklearn.preprocessing import LabelEncoder
 13 | 
 14 | from predictors.basic import Predictor
 15 | from predictors.boosting_tree import BTFeatureExtractor
 16 | from base.field import Field
 17 | from base.iodata import IOData
 18 | import predictors.availability_mixins as mixins
 19 | 
 20 | 
 21 | class GraphFeatureExtractor:
 22 |     @staticmethod
 23 |     def compare_components(GI, GO):
 24 |         if nx.number_connected_components(GI) != nx.number_connected_components(GO):
 25 |             return False
 26 |         for x, y in zip(nx.connected_components(GI), nx.connected_components(GO)):
 27 |             if len(x) != len(y):
 28 |                 return False
 29 |         return True
 30 | 
 31 |     @staticmethod
 32 |     def get_comp_params(G):
 33 |         for x in nx.connected_components(G):
 34 |             gx = G.subgraph(x)
 35 |             nfeatures = []
 36 |             positions = set()
 37 |             ncolors = []
 38 |             props = set()
 39 |             comp_features = []
 40 |             for n in gx.nodes.values():
 41 |                 ncolors.append(n["neighbour_colors"])
 42 |                 color = n["color"]
 43 |                 nfeatures.append(n["features"])
 44 |                 positions.add(n["pos"])
 45 |                 comp_features = n["component_params"]
 46 |                 if "properties" in n:
 47 |                     props.add(n["properties"])
 48 |             data = {
 49 |                 "color": color,
 50 |                 "features": np.stack(nfeatures, 0).sum(0),
 51 |                 "comp_features": comp_features,
 52 |                 "ncolors": np.stack(ncolors, 0).sum(0),
 53 |                 "pos": positions,
 54 |                 "size": len(x),
 55 |             }
 56 |             if len(props) > 0:
 57 |                 data["properties"] = list(props)
 58 |             yield data
 59 | 
 60 |     @staticmethod
 61 |     def reorder(component_params_in, component_params_out):
 62 |         comp_dict = dict()
 63 |         for i, comp in enumerate(component_params_out):
 64 |             for pos in comp["pos"]:
 65 |                 comp_dict[pos] = i
 66 |         order = [comp_dict.get(list(comp["pos"])[0]) for comp in component_params_in]
 67 |         component_params_out = [component_params_out[i] for i in order]
 68 |         return component_params_in, component_params_out
 69 | 
 70 |     @staticmethod
 71 |     def get_data(cpi, cpo=None, use_zeros=True):
 72 |         if cpo is None:
 73 |             for gi in cpi:
 74 |                 if not use_zeros and gi["color"] == 0:
 75 |                     continue
 76 |                 # print(gi['features'].shape)
 77 |                 # yield gi['color'], gi['features'], gi['ncolors'], gi['size']
 78 |                 if "properties" in gi:
 79 |                     yield gi["color"], gi["features"], gi["comp_features"], gi[
 80 |                         "ncolors"
 81 |                     ], gi["size"], gi["properties"]
 82 |                 else:
 83 |                     yield gi["color"], gi["features"], gi["comp_features"], gi[
 84 |                         "ncolors"
 85 |                     ], gi["size"]
 86 |             return
 87 |         for gi, go in zip(cpi, cpo):
 88 |             if not use_zeros and gi["color"] == 0:
 89 |                 continue
 90 |             target = gi["color"] != go["color"]
 91 |             yield gi["color"], gi["features"], gi["comp_features"], gi["ncolors"], gi[
 92 |                 "size"
 93 |             ], target * 1.0, go["color"]
 94 | 
 95 |     @staticmethod
 96 |     def collect_graph_data(cpi, cpo=None, use_zeros=True):
 97 |         if cpo is None:
 98 |             colors, features, comp_features, ncolors, sizes = list(
 99 |                 zip(*GraphFeatureExtractor.get_data(cpi, use_zeros=use_zeros))
100 |             )
101 |         else:
102 |             (
103 |                 colors,
104 |                 features,
105 |                 comp_features,
106 |                 ncolors,
107 |                 sizes,
108 |                 targets_bin,
109 |                 targets_color,
110 |             ) = list(
111 |                 zip(*GraphFeatureExtractor.get_data(cpi, cpo, use_zeros=use_zeros))
112 |             )
113 | 
114 |         colors = np.asarray([[i == c for i in range(10)] for c in colors]).astype(
115 |             np.float
116 |         )
117 |         features = (np.stack(features, 0) > 0) * 1.0
118 |         comp_features = np.stack(comp_features, 0)
119 |         ncolors = (np.stack(ncolors, 0) > 0).astype(np.float)
120 |         sizes = np.asarray(sizes).reshape(-1, 1)
121 |         inputs = np.concatenate([colors, features, comp_features, ncolors, sizes], 1)
122 |         if cpo is None:
123 |             return inputs
124 |         targets = np.asarray(targets_bin)  # .reshape(-1, 1)
125 |         targets_color = np.asarray(targets_color)
126 |         # targets_color = np.asarray([[(c == i)*1.0 for i in range(10)]for c in targets_color])
127 |         return inputs, targets, targets_color
128 | 
129 |     @staticmethod
130 |     def prepare_graph_features(iodata, use_zeros=True):
131 |         GI = iodata.input_field.build_nxgraph(connectivity={i: 4 for i in range(10)})
132 |         GO = iodata.output_field.build_nxgraph(connectivity={i: 4 for i in range(10)})
133 |         component_params_in, component_params_out = GraphFeatureExtractor.reorder(
134 |             list(GraphFeatureExtractor.get_comp_params(GI)),
135 |             list(GraphFeatureExtractor.get_comp_params(GO)),
136 |         )
137 | 
138 |         inputs, targets, targets_color = GraphFeatureExtractor.collect_graph_data(
139 |             component_params_in, component_params_out, use_zeros=use_zeros
140 |         )
141 | 
142 |         return inputs, targets, targets_color
143 | 
144 |     @staticmethod
145 |     def prepare_graph_features_diff(iodata, use_zeros=False):
146 |         GI = iodata.input_field.build_nxgraph(
147 |             connectivity={i: 4 for i in range(10)},
148 |             properties=iodata.input_field.data != iodata.output_field.data,
149 |         )
150 |         graph_data = list(GraphFeatureExtractor.get_comp_params(GI))
151 |         colors, features, comp_features, ncolors, sizes, properties = list(
152 |             zip(*GraphFeatureExtractor.get_data(graph_data))
153 |         )
154 | 
155 |         colors = np.asarray([[i == c for i in range(10)] for c in colors]).astype(
156 |             np.float
157 |         )
158 |         features = (np.stack(features, 0) > 0) * 1.0
159 |         comp_features = np.stack(comp_features, 0)
160 |         ncolors = (np.stack(ncolors, 0) > 0).astype(np.float)
161 |         sizes = np.asarray(sizes).reshape(-1, 1)
162 |         targets = np.asarray(
163 |             [np.any(list(p)) * 1.0 for p in properties]
164 |         )  # np.asarray(targets_bin)#.reshape(-1, 1)
165 | 
166 |         inputs = np.concatenate([colors, features, comp_features, ncolors, sizes], 1)
167 | 
168 |         return inputs, targets  # , targets_color
169 | 
170 |     @staticmethod
171 |     def prepare_graph_features_for_eval(field, use_zeros=True):
172 |         GI = field.build_nxgraph(connectivity={i: 4 for i in range(10)})
173 |         graph_data = list(GraphFeatureExtractor.get_comp_params(GI))
174 |         if not use_zeros:
175 |             graph_data = [g for g in graph_data if g["color"] != 0]
176 |         inputs = GraphFeatureExtractor.collect_graph_data(
177 |             graph_data, use_zeros=use_zeros
178 |         )
179 | 
180 |         return graph_data, inputs  # , targets, targets_color
181 | 
182 | 
183 | class AvailableEqualShapeAndComponents:
184 |     def is_available(self, iodata_list):
185 |         for iodata in iodata_list:
186 |             if iodata.input_field.shape != iodata.output_field.shape:
187 |                 return False
188 |         for iodata in iodata_list:
189 |             GI = iodata.input_field.build_nxgraph(
190 |                 connectivity={i: 4 for i in range(10)}
191 |             )
192 |             GO = iodata.output_field.build_nxgraph(
193 |                 connectivity={i: 4 for i in range(10)}
194 |             )
195 |             equal_shapes_of_components = GraphFeatureExtractor.compare_components(
196 |                 GI, GO
197 |             )
198 |             if not equal_shapes_of_components:
199 |                 return False
200 |         return True
201 | 
202 | 
203 | class GraphBoostingTreePredictor(Predictor, AvailableEqualShapeAndComponents):
204 |     def __init__(self, n_estimators=10):
205 |         self.xgb_binary = XGBClassifier(
206 |             n_estimators=n_estimators, booster="dart", n_jobs=-1
207 |         )
208 |         self.xgb = XGBClassifier(
209 |             n_estimators=n_estimators,
210 |             booster="dart",
211 |             n_jobs=-1,
212 |             objective="multi:softmax",
213 |             num_class=10,
214 |         )
215 |         self.target_encoder = LabelEncoder()
216 | 
217 |     def train(self, iodata_list):
218 |         train_x, train_y_bin, train_y = list(
219 |             zip(
220 |                 *[
221 |                     GraphFeatureExtractor.prepare_graph_features(iodata)
222 |                     for iodata in iodata_list
223 |                 ]
224 |             )
225 |         )
226 |         train_x = np.concatenate(train_x, 0)
227 |         train_y_bin = np.concatenate(train_y_bin, 0)
228 |         train_y = np.concatenate(train_y, 0)
229 |         train_y_encoded = self.target_encoder.fit_transform(train_y)
230 |         # print(train_y_bin, train_y)
231 |         # feat, target, _ = GraphFeatureExtractor.prepare_graph_features(iodata_list)
232 |         self.xgb_binary.fit(train_x, train_y_bin, verbose=0)
233 |         self.xgb.fit(train_x, train_y_encoded, verbose=0)
234 | 
235 |     def predict(self, field):
236 |         if isinstance(field, IOData):
237 |             for v in self.predict(field.input_field):
238 |                 yield v
239 |             return
240 |         # repainter = Repaint(field.data)
241 |         prediction_data = np.zeros(field.shape)
242 |         graph_data, inputs = GraphFeatureExtractor.prepare_graph_features_for_eval(
243 |             field
244 |         )
245 |         preds_binary = self.xgb_binary.predict(inputs)
246 |         preds_colors_encoded = self.xgb.predict(inputs)  # .tolist()
247 |         preds_colors = self.target_encoder.inverse_transform(preds_colors_encoded)
248 |         # result = repainter(preds).tolist()
249 |         for comp, cbin, new_col in zip(graph_data, preds_binary, preds_colors):
250 |             color = int(new_col) if cbin > 0.5 else comp["color"]
251 |             # if cbin > 0.5:
252 |             #    print("new color", new_col, "old_color", comp['color'])
253 |             for i, j in comp["pos"]:
254 |                 prediction_data[i, j] = color
255 | 
256 |         yield Field(prediction_data)
257 | 
258 |     def __str__(self):
259 |         return "GraphBoostingTreePredictor()"
260 | 
261 | 
262 | class GraphBoostingTreePredictor2(Predictor):
263 |     def __init__(self, n_estimators=10):
264 |         # self.xgb_binary =  XGBClassifier(n_estimators=n_estimators, booster="dart", n_jobs=-1)
265 |         # self.xgb =  XGBClassifier(n_estimators=n_estimators, booster="dart", n_jobs=-1,
266 |         #    objective="multi:softmax", num_class=10)
267 |         self.xgb_classifiers = []
268 |         self.target_encoders = []
269 | 
270 |     def is_available(self, iodata_list):
271 |         for iodata in iodata_list:
272 |             if iodata.input_field.shape != iodata.output_field.shape:
273 |                 return False
274 |         components = []
275 |         components_nonzero = []
276 |         for iodata in iodata_list:
277 |             GI = iodata.input_field.build_nxgraph(
278 |                 connectivity={i: 4 for i in range(10)}
279 |             )
280 |             GO = iodata.output_field.build_nxgraph(
281 |                 connectivity={i: 4 for i in range(10)}
282 |             )
283 |             equal_shapes_of_components = GraphFeatureExtractor.compare_components(
284 |                 GI, GO
285 |             )
286 |             if not equal_shapes_of_components:
287 |                 return False
288 |             compdata = list(GraphFeatureExtractor.get_comp_params(GI))
289 |             components.append(len(compdata))
290 |             components_nonzero.append(len([gi for gi in compdata if gi["color"] != 0]))
291 |         self.ncomponents = -1
292 |         # print(components, components_nonzero)
293 |         if len(components) < 1:
294 |             return False
295 |         if len(np.unique(components)) == 1:
296 |             self.use_zeros = True
297 |             self.ncomponents = np.unique(components)[0]
298 |         if len(components_nonzero) > 0:
299 |             if len(np.unique(components_nonzero)) == 1:
300 |                 self.use_zeros = False
301 |                 self.ncomponents = np.unique(components_nonzero)[0]
302 |         # [GraphFeatureExtractor.prepare_graph_features(iodata)
303 |         #    for iodata in iodata_list]))
304 |         if self.ncomponents < 1:
305 |             return False
306 |         return True
307 | 
308 |     def train(self, iodata_list, n_estimators=20):
309 |         # train_x, train_y_bin, train_y = list(
310 |         train_sets = [[] for i in range(self.ncomponents)]
311 |         for iodata in iodata_list:
312 |             features, target_binary, target = (
313 |                 GraphFeatureExtractor.prepare_graph_features(iodata, self.use_zeros)
314 |             )
315 |             for i in range(min(features.shape[0], self.ncomponents)):
316 |                 train_sets[i].append((features[i], target_binary[i], target[i]))
317 |         # print(len(train_sets))
318 |         for ts in train_sets:
319 |             features, target_binary, target = list(zip(*ts))
320 |             features = np.stack(features, 0)
321 |             target = np.stack(target, 0)
322 |             xgb = XGBClassifier(
323 |                 n_estimators=n_estimators,
324 |                 booster="dart",
325 |                 n_jobs=-1,
326 |                 objective="multi:softmax",
327 |                 num_class=10,
328 |             )
329 |             te = LabelEncoder()
330 |             encoded_target = te.fit_transform(target)
331 |             xgb.fit(features, encoded_target)
332 |             self.xgb_classifiers.append(xgb)
333 |             self.target_encoders.append(te)
334 | 
335 |     def predict(self, field):
336 |         if isinstance(field, IOData):
337 |             for v in self.predict(field.input_field):
338 |                 yield v
339 |             return
340 |         # repainter = Repaint(field.data)
341 |         prediction_data = np.zeros(field.shape)
342 |         # print(self.use_zeros)
343 |         graph_data, inputs = GraphFeatureExtractor.prepare_graph_features_for_eval(
344 |             field, self.use_zeros
345 |         )
346 |         if inputs.shape[0] < 1:
347 |             return field
348 |         all_predictions = []
349 |         # print(inputs.shape, len(graph_data), len(self.xgb_classifiers))
350 |         # print(len(self.xgb_classifiers), inputs.shape)
351 |         for i in range(min(inputs.shape[0], self.ncomponents)):
352 |             xgb = self.xgb_classifiers[i]
353 |             encoded_predictions = xgb.predict([inputs[i]])
354 |             predictions = self.target_encoders[i].inverse_transform(encoded_predictions)
355 |             all_predictions.append(predictions)
356 |         # result = repainter(preds).tolist()
357 |         # TODO: check dimensions
358 |         for comp, color in zip(graph_data, all_predictions):
359 |             for i, j in comp["pos"]:
360 |                 prediction_data[i, j] = color
361 | 
362 |         yield Field(prediction_data)
363 | 
364 |     def __str__(self):
365 |         return "GraphBoostingTreePredictor2()"
366 | 
367 | 
368 | class GraphBoostingTreePredictor3(Predictor, mixins.AvailableEqualShapeAndLessThanNComponents):
369 |     def __init__(self, n_estimators=100, max_components=10):
370 |         # self.xgb_binary =  XGBClassifier(n_estimators=n_estimators, booster="dart", n_jobs=-1)
371 |         # self.xgb =  XGBClassifier(n_estimators=n_estimators, booster="dart", n_jobs=-1,
372 |         #    objective="multi:softmax", num_class=10)
373 |         self.n_estimators = n_estimators
374 |         self.max_components = max_components
375 | 
376 |         self.xgb_binary = XGBClassifier(
377 |             n_estimators=n_estimators, booster="dart", n_jobs=-1
378 |         )
379 |         self.xgb = XGBClassifier(
380 |             n_estimators=n_estimators,
381 |             booster="dart",
382 |             n_jobs=-1,
383 |             objective="multi:softmax",
384 |             num_class=10,
385 |         )
386 |         self.use_zeros = True
387 |         self.target_encoder = LabelEncoder()
388 | 
389 |     def is_available(self, iodata_list):
390 |         return super().is_available(iodata_list, n_components=self.max_components)
391 | 
392 |     def _make_train_binary_features(self, iodata_list):
393 |         train_x_binary, train_y_binary = list(
394 |             zip(
395 |                 *[
396 |                     GraphFeatureExtractor.prepare_graph_features_diff(
397 |                         iodata, self.use_zeros
398 |                     )
399 |                     for iodata in iodata_list
400 |                 ]
401 |             )
402 |         )
403 |         train_x_binary = np.concatenate(train_x_binary, 0)
404 |         train_y_binary = np.concatenate(train_y_binary, 0)
405 |         return train_x_binary, train_y_binary
406 | 
407 |     def train(self, iodata_list, n_estimators=20):
408 |         # train_x, train_y_bin, train_y = list(
409 |         train_x_binary, train_y_binary = self._make_train_binary_features(iodata_list)
410 |         # feat, target, _ = GraphFeatureExtractor.prepare_graph_features(iodata_list)
411 |         self.xgb_binary.fit(train_x_binary, train_y_binary, verbose=0)
412 |         # print("binary",train_y_binary)
413 | 
414 |         feat, target, _ = BTFeatureExtractor.get_features(
415 |             iodata_list, features_maker=BTFeatureExtractor.make_features_v2
416 |         )
417 |         encoded_target = self.target_encoder.fit_transform(target)
418 |         # print(target)
419 |         self.xgb.fit(feat, encoded_target, verbose=0)
420 |         # next - train xgboost
421 | 
422 |     def validate_binary(self, iodata_list):
423 |         train_x_binary, train_y_binary = self._make_train_binary_features(iodata_list)
424 |         return self.xgb_binary.predict(train_x_binary), train_y_binary
425 | 
426 |     def predict(self, field, return_binary=False):
427 |         if isinstance(field, IOData):
428 |             for v in self.predict(field.input_field):
429 |                 yield v
430 |             return
431 |         # repainter = Repaint(field.data)
432 |         nrows, ncols = field.shape
433 |         prediction_data = np.zeros(field.shape, dtype=np.uint8)
434 |         # print(self.use_zeros)
435 |         graph_data, inputs = GraphFeatureExtractor.prepare_graph_features_for_eval(
436 |             field, self.use_zeros
437 |         )
438 |         if inputs.shape[0] < 1:
439 |             # return field
440 |             preds_binary = []
441 |         else:
442 |             preds_binary = self.xgb_binary.predict(inputs)
443 | 
444 |         feat = BTFeatureExtractor.make_features_v2(field)
445 |         preds = self.xgb.predict(feat)
446 |         preds = self.target_encoder.inverse_transform(preds)
447 |         preds = preds.reshape(nrows, ncols)
448 |         preds = preds.astype(int)  # .tolist()
449 |         # result = repainter(preds).tolist()
450 |         prediction_data = preds
451 |         if len(preds) > 0 and np.sum(preds) > 0:
452 |             for comp, cbin in zip(graph_data, preds_binary):
453 |                 # color = int(new_col) if cbin > 0.5 else comp['color']
454 |                 # if cbin > 0.5:
455 |                 #    print("new color", new_col, "old_color", comp['color'])
456 |                 for i, j in comp["pos"]:
457 |                     if cbin > 0.5:
458 |                         prediction_data[i, j] = preds[i, j]
459 |                     else:
460 |                         prediction_data[i, j] = comp["color"]
461 |         if return_binary:
462 |             print(111)
463 |             yield preds_binary, graph_data, Field(prediction_data)
464 |         else:
465 |             yield Field(prediction_data)
466 | 
467 |     def __str__(self):
468 |         return "GraphBoostingTreePredictor3()"
469 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/shapes.py:
--------------------------------------------------------------------------------
  1 | import rootutils
  2 | 
  3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  4 | 
  5 | import numpy as np
  6 | 
  7 | from base.field import Field
  8 | from base.iodata import IOData
  9 | from base.transformers import resize_output, crop_data
 10 | 
 11 | from predictors.basic import *
 12 | 
 13 | # Predictor, AvailableAll, AvailableWithIntMultiplier, AvailableMirror
 14 | from predictors.boosting_tree import BoostingTreePredictor
 15 | 
 16 | from operations.basic import Repaint
 17 | from operations.resizing import Repeater, Resizer, Fractal, Mirror
 18 | # from utils import check_if_can_be_mirrored
 19 | import predictors.availability_mixins as mixins
 20 | 
 21 | 
 22 | class RepeatingPredictor(Predictor, mixins.AvailableWithIntMultiplier):
 23 |     def __init__(self, args=[], kwargs=dict()):
 24 |         # self.predictor = predictor_class(*args, **kwargs)
 25 |         pass
 26 | 
 27 |     def train(self, iodata_list):
 28 |         # self.predictor.train(iodata_list)
 29 |         pass
 30 | 
 31 |     def predict(self, field):
 32 |         if isinstance(field, IOData):
 33 |             for v in self.predict(field.input_field):
 34 |                 yield v
 35 |             return
 36 |         repeater = Repeater(self.m1, self.m2)
 37 |         result = repeater(field.data)
 38 |         yield Field(result)
 39 | 
 40 |     def __str__(self):
 41 |         return f"RepeatingPredictor(m1={self.m1}, m2={self.m2})"
 42 | 
 43 | 
 44 | class MirrorPredictor(Predictor, mixins.AvailableMirror):
 45 |     def __init__(self, predictor=BoostingTreePredictor):
 46 |         self.predictor = predictor()
 47 | 
 48 |     def train(self, iodata_list):
 49 |         self.mirror = Mirror(
 50 |             self.m1, self.m2, vertical=self.vertical, horizontal=self.horizontal
 51 |         )
 52 |         self.predictor.train(resize_output(iodata_list))
 53 |         # train_ds[i].show(predictor=predictor)
 54 | 
 55 |     def freeze_by_score(self, iodata_list):
 56 |         self.predictor.freeze_by_score(resize_output(iodata_list))
 57 | 
 58 |     def predict(self, field):
 59 |         if isinstance(field, IOData):
 60 |             for v in self.predict(field.input_field):
 61 |                 yield v
 62 |             return
 63 |         repainter = Repaint(field.data)
 64 |         for prediction in self.predictor.predict(field):
 65 |             result = self.mirror(prediction.data)
 66 |             result = repainter(result)
 67 |             yield Field(result)
 68 | 
 69 |     def __str__(self):
 70 |         return f"ResizingPredictor(m1={self.m1}, m2={self.m2})"
 71 | 
 72 | 
 73 | class ResizingPredictor(Predictor, mixins.AvailableWithIntMultiplier):
 74 |     def __init__(self):
 75 |         pass
 76 | 
 77 |     def train(self, iodata_list):
 78 |         pass
 79 | 
 80 |     def predict(self, field):
 81 |         if isinstance(field, IOData):
 82 |             for v in self.predict(field.input_field):
 83 |                 yield v
 84 |             return
 85 |         resizer = Resizer(self.m1, self.m2)
 86 |         result = resizer(field.data)
 87 |         yield Field(result)
 88 | 
 89 |     def __str__(self):
 90 |         return f"ResizingPredictor(m1={self.m1}, m2={self.m2})"
 91 | 
 92 | 
 93 | class FractalPredictor(Predictor, mixins.AvailableWithIntMultiplier):
 94 |     def __init__(self):
 95 |         pass
 96 | 
 97 |     def train(self, iodata_list):
 98 |         pass
 99 | 
100 |     def predict(self, field):
101 |         if isinstance(field, IOData):
102 |             for v in self.predict(field.input_field):
103 |                 yield v
104 |             return
105 |         fractal = Fractal(self.m1, self.m2)
106 |         result = fractal(field.data)
107 |         yield Field(result)
108 | 
109 |     def __str__(self):
110 |         return f"FractalPredictor(m1={self.m1}, m2={self.m2})"
111 | 
112 | 
113 | def change_colors(data, background_colors=[]):
114 |     colormap = {c: 0 for c in background_colors}
115 |     # colormap = dict()
116 |     if len(np.unique(data)) == len(background_colors):
117 |         colormap = dict()
118 | 
119 |     current_id = 1
120 |     for line in data:
121 |         if len(colormap) > 10:
122 |             break
123 |         for c in line:
124 |             if not c in colormap:
125 |                 colormap[c] = current_id
126 |                 current_id += 1  # chr(ord(current_id) + 1)
127 |         # print(line)
128 |     # redraw
129 |     # print(colormap)
130 |     data_modified = np.asarray([[colormap[c] for c in line] for line in data])
131 |     return data_modified
132 | 
133 | 
134 | def process_iodata_input(iodata, pattern, crop_func=crop_data):
135 |     i = iodata.input_field
136 |     o = iodata.output_field
137 |     # bg = {c: 0 for c in o.data[np.where(pattern == 0)]}
138 |     bg = dict(list(zip(*np.stack([o.data, pattern], 0).reshape(2, -1))))
139 |     # current_id = 1
140 |     cropped_data = crop_func(i.data)
141 |     # #print(cropped_data, id(i.data))
142 |     # for line in cropped_data:
143 |     #     for x in line:
144 |     #         if x in bg:
145 |     #             continue
146 |     #         bg[x] = current_id
147 |     #         current_id += 1
148 |     # return np.asarray([ [ bg[x] for x in line ] for line in cropped_data ])
149 |     data = [[bg.get(x, 0) for x in line] for line in cropped_data]
150 |     data = Field(data)
151 |     return iodata.reconstruct(data).data
152 | 
153 | 
154 | class ConstantShaper(Predictor):
155 |     def __init__(self):
156 |         self.pattern = None
157 | 
158 |     def is_available(self, iodata_list):
159 |         colormaps = [
160 |             change_colors(
161 |                 iodata.output_field.data,
162 |                 background_colors=[
163 |                     c
164 |                     for c in range(10)
165 |                     if np.sum(iodata.input_field.data == c)
166 |                     >= np.sum(iodata.output_field.data == c)
167 |                 ],
168 |             )
169 |             for iodata in iodata_list
170 |         ]
171 |         if len(colormaps) < 1:
172 |             return False
173 |         shapes = {c.shape for c in colormaps}
174 |         if len(shapes) != 1:
175 |             return False
176 |         # print(colormaps, np.stack(colormaps).std())
177 |         # print((np.unique(np.stack(colormaps))))
178 |         if (
179 |             np.stack(colormaps).std(0).max() > 0
180 |             or len(np.unique(np.stack(colormaps))) == 1
181 |         ):
182 |             for background_colors in [[]] + [[i] for i in range(10)]:
183 |                 colormaps = [
184 |                     change_colors(
185 |                         iodata.output_field.data, background_colors=background_colors
186 |                     )
187 |                     for iodata in iodata_list
188 |                 ]
189 |                 # print(background_colors, colormaps)
190 |                 if len(colormaps) < 1:
191 |                     return False
192 |                 shapes = {c.shape for c in colormaps}
193 |                 if len(shapes) != 1:
194 |                     return False
195 |                 if (
196 |                     np.stack(colormaps).std(0).max() <= 0
197 |                     and np.stack(colormaps).std() > 0
198 |                 ):
199 |                     break
200 |         # print(np.stack(colormaps).std(0))
201 |         if np.stack(colormaps).std(0).max() > 0:
202 |             return False
203 |         self.pattern = colormaps[0]
204 |         # if self.pattern.std() == 0:
205 |         #     return False
206 |         return True
207 | 
208 |     def train(self, iodata_list):
209 |         self.input_pattern = None
210 |         self.crop_func = lambda x: x
211 |         if self.pattern.std() == 0:
212 |             return
213 | 
214 |         for crop_func in [lambda x: x, crop_data]:
215 |             colormap = [
216 |                 process_iodata_input(iodata, self.pattern, crop_func)
217 |                 for iodata in iodata_list
218 |             ]
219 |             # print(colormap)
220 |             if len({x.shape for x in colormap}) == 1:
221 |                 break
222 |         # print({ x.shape for x in colormap }, colormap)
223 |         if len({x.shape for x in colormap}) == 1:
224 |             self.crop_func = crop_func
225 | 
226 |             if np.stack(colormap, 0).std(0).max() <= 0.0:
227 |                 self.input_pattern = colormap[0]
228 |         # self.input_pattern = None
229 |         # # actual training is done in is_available method
230 |         # for iodata in iodata_list:
231 |         #     i = iodata.input_field
232 |         #     o = iodata.output_field
233 |         #     np.stack([o.data, self.pattern], 0)
234 |         # pass
235 |         pass
236 | 
237 |     def predict(self, field):
238 |         if isinstance(field, IOData):
239 |             for v in self.predict(field.input_field):
240 |                 yield v
241 |                 # yield field.reconstruct(v)
242 |             return
243 |         if self.input_pattern is not None:
244 |             color_convertor = dict(
245 |                 set(
246 |                     list(
247 |                         zip(
248 |                             *np.stack(
249 |                                 [self.input_pattern, self.crop_func(field.data)], 0
250 |                             ).reshape(2, -1)
251 |                         )
252 |                     )
253 |                 )
254 |             )
255 |             result = np.asarray(
256 |                 [[color_convertor.get(x, x) for x in line] for line in self.pattern]
257 |             )
258 |             result = Field(result)
259 |             yield result  # field.reconstruct(result)
260 |             # return
261 |         data = self.crop_func(field.data)
262 |         h, w = data.shape
263 |         h = min(self.pattern.shape[0], h)
264 |         w = min(self.pattern.shape[1], w)
265 |         ss = self.pattern[:h, :w]
266 |         background_colors = np.unique(data[np.where(ss == 0)])
267 |         colormap = {
268 |             i: [
269 |                 c
270 |                 for c in np.unique(data[np.where(ss == i)])
271 |                 if i == 0 or c not in background_colors
272 |             ]
273 |             for i in np.unique(ss)
274 |         }
275 |         result = np.zeros(self.pattern.shape, dtype=np.uint8)
276 |         result[:h, :w] = data[:h, :w]
277 |         for key in colormap:
278 |             if key == 0:
279 |                 continue
280 |             value = colormap[key]
281 |             if len(value) < 1:
282 |                 continue
283 |             coords = np.where(self.pattern == key)
284 |             result[coords] = value[0]
285 |         yield Field(result)  # field.reconstruct(Field(result))
286 | 
287 |     def __str__(self):
288 |         return "ConstantShaper"
289 | 


--------------------------------------------------------------------------------
/kaggle_arc/predictors/subpattern.py:
--------------------------------------------------------------------------------
  1 | import rootutils
  2 | 
  3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
  4 | 
  5 | from xgboost import XGBClassifier
  6 | 
  7 | from operations.reversible import *
  8 | 
  9 | from predictors.basic import *  # BTFeatureExtractor, BoostingTreePredictor2
 10 | from predictors.boosting_tree import *
 11 | import predictors.availability_mixins as mixins
 12 | 
 13 | 
 14 | class SubpatternMatcher:
 15 |     @staticmethod
 16 |     def get_separator_length(sequence, size):
 17 |         if len(sequence) == 0:
 18 |             return None
 19 |         if len(sequence) == 1 and size // 2 == sequence[0]:
 20 |             return sequence[0]
 21 |         w = sequence[0]
 22 |         if w <= 1 or w > size // 2:
 23 |             return None
 24 |         xlast = w
 25 |         for x in sequence[1:]:
 26 |             if x != xlast + w + 1:
 27 |                 return None
 28 |         return w
 29 | 
 30 |     @staticmethod
 31 |     def get_separating_lines(i):
 32 |         for c in np.unique(i.flatten()):
 33 |             bmap = i == c
 34 |             s0 = bmap.std(0) == 0
 35 |             s1 = bmap.std(1) == 0
 36 |             s0 = bmap.all(0) * s0
 37 |             s1 = bmap.all(1) * s1
 38 | 
 39 |             # print(bmap[:, s0==0])
 40 |             s0 = np.argwhere(s0).flatten()
 41 |             s1 = np.argwhere(s1).flatten()
 42 |             r0 = SubpatternMatcher.get_separator_length(s0, i.shape[1])
 43 |             r1 = SubpatternMatcher.get_separator_length(s1, i.shape[0])
 44 |             if r0 is None and r1 is None:
 45 |                 # yield c, i.shape[0], i.shape[1]
 46 |                 continue
 47 |             if r0 is None:
 48 |                 yield c, r1, i.shape[1]
 49 |                 continue
 50 |             if r1 is None:
 51 |                 yield c, i.shape[0], r0
 52 |                 continue
 53 |             yield c, r1, r0
 54 | 
 55 |     @staticmethod
 56 |     def get_availability_param(iodata):
 57 |         i = iodata.input_field.data
 58 |         o = iodata.output_field.data
 59 |         isep = {(h, w): c for c, h, w in SubpatternMatcher.get_separating_lines(i)}
 60 |         osep = {(h, w): c for c, h, w in SubpatternMatcher.get_separating_lines(o)}
 61 | 
 62 |         common_areas = isep.keys() & osep.keys()
 63 |         if len(common_areas) < 1:
 64 |             return None
 65 |         return {k: (isep[k], osep[k]) for k in common_areas}
 66 | 
 67 |     @staticmethod
 68 |     def process_iodata_list(iodata_list):
 69 |         all_params = []
 70 |         total = set()
 71 |         for t in iodata_list:
 72 |             param = SubpatternMatcher.get_availability_param(t)
 73 |             if param is None:
 74 |                 return set(), []
 75 |             for k in param.keys():
 76 |                 total.add(k)
 77 |             all_params.append(param)
 78 |         sizes = {k for k in total if np.all([k in x for x in all_params])}
 79 |         all_params = [{k: x[k] for k in sizes} for x in all_params]
 80 |         return sizes, all_params
 81 | 
 82 | 
 83 | class SubpatternMatcherPredictor(Predictor):
 84 | 
 85 |     def __init__(self):
 86 |         self.xgb = XGBClassifier(
 87 |             n_estimators=10,
 88 |             booster="dart",
 89 |             n_jobs=-1,
 90 |             objective="multi:softmax",
 91 |             num_class=10,
 92 |         )
 93 |         pass
 94 | 
 95 |     def is_available(self, iodata_list):
 96 |         for iodata in iodata_list:
 97 |             if iodata.input_field.shape != iodata.output_field.shape:
 98 |                 return False
 99 |             # m1 = iodata.output_field.shape # iodata.input_field.height // iodata.output_field.height
100 |             # m2 = iodata.output_field.width  # iodata.input_field.width // iodata.output_field.width
101 |             # all_sizes.add((m1, m2))
102 |         sizes, params = SubpatternMatcher.process_iodata_list(iodata_list)
103 |         if len(sizes) < 1:
104 |             return False
105 |         self.sizes = sizes
106 |         self.params = params
107 |         (h, w) = list(sizes)[0]
108 |         self.op = WrappedOperation(
109 |             ReversibleSplit(
110 |                 (h, w), hsep=1, wsep=1, outer_sep=False, splitter_func=split_by_shape
111 |             ),
112 |             ReversibleCombine(
113 |                 (h, w),
114 |                 hsep=1,
115 |                 wsep=1,
116 |                 outer_sep=False,
117 |                 sep_color=5,
118 |                 splitter_func=split_by_shape,
119 |             ),
120 |         )
121 |         # self.op.train(iodata_list)
122 |         return True
123 | 
124 |     def train(self, iodata_list):
125 |         all_samples = []
126 |         self.op.train(iodata_list)
127 |         for iodata in iodata_list:
128 |             i, o = self.op.wrap(iodata)
129 |             all_samples.append((i, o))
130 |         all_samples = [
131 |             (xi, xo)
132 |             for (i, o) in all_samples
133 |             for xi, xo in zip(i.flat_iter(), o.flat_iter())
134 |             # for li, lo in zip(i, o)
135 |             # for xi, xo in zip(li, lo)
136 |         ]
137 |         # print(all_samples)
138 |         feat, target, _ = BTFeatureExtractor.get_features(
139 |             all_samples, features_maker=BTFeatureExtractor.make_features_v3
140 |         )
141 |         # print(feat.shape, target.shape)
142 |         self.xgb.fit(feat, target, verbose=-1)
143 | 
144 |     def predict(self, field):
145 |         if isinstance(field, IOData):
146 |             for v in self.predict(field.input_field):
147 |                 yield v
148 |             return
149 |         # repainter = Repaint(field.data)
150 | 
151 |         feature_field, postprocess = self.op.run(field)
152 | 
153 |         # print(feature_field)
154 |         def predict_on_subfield(x):
155 |             nrows, ncols = x.shape
156 |             feat = BTFeatureExtractor.make_features_v3(x)
157 |             preds = self.xgb.predict(feat).reshape(nrows, ncols)
158 |             preds = preds.astype(int)  # .tolist()
159 |             # print(x.data)
160 |             return Field(preds)
161 | 
162 |         lines = feature_field.map(predict_on_subfield)
163 |         result = postprocess(lines)
164 |         yield result
165 |         pass
166 | 


--------------------------------------------------------------------------------
/kaggle_arc/scripts/0_idpredictor_on_test_script.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | 
 4 | from constants import *
 5 | from base.field import Field
 6 | 
 7 | from utils import *
 8 | from base.field import *
 9 | 
10 | from predictors.basic import IdPredictor
11 | 
12 | 
13 | datasets = read_datasets(DATADIR)
14 | train_ds, eval_ds, test_ds = [convert2samples(x) for x in datasets]
15 | 
16 | # predictor = IdPredictor()
17 | 
18 | save_predictions(IdPredictor, test_ds, TEST_SAVEPATH)
19 | 


--------------------------------------------------------------------------------
/kaggle_arc/scripts/1_complexpredictor_on_test_script.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | 
 4 | from constants import *
 5 | from base.field import Field
 6 | 
 7 | from utils import *
 8 | from base.field import *
 9 | 
10 | from predictors.basic import IdPredictor, ZerosPredictor, ConstPredictor
11 | from predictors.complex import ComplexPredictor
12 | 
13 | 
14 | datasets = read_datasets(DATADIR)
15 | train_ds, eval_ds, test_ds = [convert2samples(x) for x in datasets]
16 | 
17 | # predictor = IdPredictor()
18 | predictor_args = [IdPredictor, ZerosPredictor]
19 | for i in range(1, 10):
20 |     predictor_args.append((ConstPredictor, [], {"value": i}))
21 | 
22 | save_predictions(ComplexPredictor, test_ds, TEST_SAVEPATH, k=3, args=[predictor_args])
23 | 


--------------------------------------------------------------------------------
/kaggle_arc/scripts/2_complexpredictor_coloring.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | 
 4 | from constants import *
 5 | from base.field import Field
 6 | 
 7 | from utils import *
 8 | from base.field import *
 9 | 
10 | from predictors.basic import IdPredictor, ZerosPredictor, ConstPredictor, FillPredictor
11 | from predictors.complex import ComplexPredictor
12 | from predictors.color_counting import ColorCountingPredictor
13 | from predictors.shapes import (
14 |     RepeatingPredictor,
15 |     FractalPredictor,
16 |     ResizingPredictor,
17 |     MirrorPredictor,
18 |     ConstantShaper,
19 | )
20 | from predictors.boosting_tree import (
21 |     BoostingTreePredictor,
22 |     BoostingTreePredictor2,
23 |     BoostingTreePredictor3,
24 | )
25 | from predictors.convolution import ConvolutionPredictor
26 | from predictors.graph_boosting_tree import (
27 |     GraphBoostingTreePredictor,
28 |     GraphBoostingTreePredictor2,
29 |     GraphBoostingTreePredictor3,
30 | )
31 | from predictors.decision_tree import AugmentedPredictor
32 | from predictors.subpattern import SubpatternMatcherPredictor
33 | from predictors.connector import PointConnectorPredictor
34 | from predictors.cam_predictor import *
35 | from predictors.connector import PointConnectorPredictor
36 | from predictors.cf_combinator import WrappedCFPredictor
37 | 
38 | datasets = read_datasets(DATADIR)
39 | train_ds, eval_ds, test_ds = [convert2samples(x) for x in datasets]
40 | 
41 | # predictor = IdPredictor()
42 | predictor_args = [
43 |     IdPredictor,
44 |     ZerosPredictor,
45 |     # ColorCountingPredictor,
46 |     RepeatingPredictor,
47 |     FractalPredictor,
48 |     ResizingPredictor,
49 |     # GraphBoostingTreePredictor,#no impact
50 |     # GraphBoostingTreePredictor3,
51 |     # ConstantShaper,
52 |     # BoostingTreePredictor,
53 |     # BoostingTreePredictor2,
54 |     BoostingTreePredictor3,
55 |     SubpatternMatcherPredictor,
56 |     # GraphBoostingTreePredictor2,
57 |     # PointConnectorPredictor,
58 |     # AugmentedPredictor,
59 |     # FillPredictor,
60 |     WrappedCFPredictor,
61 |     MirrorPredictor,
62 |     # (ConvolutionPredictor, [], {'loss': 'mse'}),
63 |     # (ConvolutionPredictor, [], {'loss': 'dice'})
64 | ]
65 | # for i in range(1, 10):
66 | #    predictor_args.append((ConstPredictor, [], {'value': i}))
67 | 
68 | save_predictions(ComplexPredictor, test_ds, TEST_SAVEPATH, k=3, args=[predictor_args])
69 | 


--------------------------------------------------------------------------------
/kaggle_arc/scripts/convert2ipynb.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | import sys
  5 | import networkx as nx
  6 | import subprocess
  7 | import matplotlib
  8 | 
  9 | try:
 10 |     if not matplotlib.is_interactive():
 11 |         matplotlib.use("svg")
 12 | except:
 13 |     pass
 14 | 
 15 | import matplotlib.pyplot as plt
 16 | 
 17 | TEMPLATE = {
 18 |     "cells": [],
 19 |     "metadata": {
 20 |         "kernelspec": {
 21 |             "display_name": "Python 3",
 22 |             "language": "python",
 23 |             "name": "python3",
 24 |         },
 25 |         "language_info": {
 26 |             "codemirror_mode": {"name": "ipython", "version": 3},
 27 |             "file_extension": ".py",
 28 |             "mimetype": "text/x-python",
 29 |             "name": "python",
 30 |             "nbconvert_exporter": "python",
 31 |             "pygments_lexer": "ipython3",
 32 |             "version": "3.7.3",
 33 |         },
 34 |     },
 35 |     "nbformat": 4,
 36 |     "nbformat_minor": 2,
 37 | }
 38 | 
 39 | 
 40 | def filter_imports(lines, debug=False):
 41 |     ids = dict()
 42 |     for i, l in enumerate(lines):
 43 |         if l.startswith("import "):
 44 |             ids[i] = l[6:].strip().split()[0]
 45 |             continue
 46 |         import_pos = l.find(" import ")
 47 |         if import_pos < 0:
 48 |             continue
 49 |         package = l[:import_pos].split()[1]
 50 |         ids[i] = package
 51 |     if debug:
 52 |         print(ids)
 53 |     return ids
 54 | 
 55 | 
 56 | def filter_local(ids, local_dirs=[], basedir="..", debug=False):
 57 |     result = dict()
 58 | 
 59 |     for k, v in ids.items():
 60 |         path = os.path.join(basedir, v.replace(".", os.sep))
 61 |         if not (path + ".py" in local_dirs):
 62 |             continue
 63 | 
 64 |         # path = os.path.join(basedir, s)
 65 |         if os.path.exists(path + ".py"):
 66 |             result[k] = (v, path + ".py")
 67 |         if os.path.exists(path) and os.path.isdir(path):
 68 |             if os.path.exists(path + "/__init__.py"):
 69 |                 result[k] = (v, path + "/__init__.py")
 70 |     return result
 71 | 
 72 | 
 73 | def filter_local_and_remote(ids, local_dirs=[], basedir="..", debug=False):
 74 |     local_imports = dict()
 75 |     remote_imports = dict()
 76 | 
 77 |     for k, v in ids.items():
 78 |         path = os.path.join(basedir, v.replace(".", os.sep))
 79 |         if not (path + ".py" in local_dirs):
 80 |             remote_imports[k] = (v, path)
 81 |             continue
 82 | 
 83 |         # path = os.path.join(basedir, s)
 84 |         if os.path.exists(path + ".py"):
 85 |             local_imports[k] = (v, path + ".py")
 86 |         if os.path.exists(path) and os.path.isdir(path):
 87 |             if os.path.exists(path + "/__init__.py"):
 88 |                 local_imports[k] = (v, path + "/__init__.py")
 89 |     return local_imports, remote_imports
 90 | 
 91 | 
 92 | def walk_deps(
 93 |     filename,
 94 |     processed=set(),
 95 |     local_dirs=[],
 96 |     basedir="..",
 97 |     debug=False,
 98 |     split_header=True,
 99 | ):
100 |     with open(filename) as f:
101 |         lines = f.readlines()
102 |     # local_dirs = [
103 |     #     os.path.join(base, f) for base, dirs, files in os.walk(basedir)
104 |     #     for f in files
105 |     #     if os.path.splitext(f)[-1]==".py"]
106 |     header_lines = [filename]
107 |     if split_header:
108 |         if lines[0].strip() == '"""':
109 |             for i in range(1, len(lines)):
110 |                 pos = lines[i].strip().find('"""')
111 |                 if pos >= 0:
112 |                     break
113 |             if pos >= 0:
114 |                 header_lines = lines[1:i]
115 |                 header_lines.append(lines[i][:pos])
116 |                 lines = lines[i + 1 :]
117 | 
118 |     # print(local_files)
119 |     ids = filter_imports(lines, debug=debug)
120 |     local_imports, remote_imports = filter_local_and_remote(
121 |         ids, local_dirs=local_dirs, basedir=basedir, debug=debug
122 |     )
123 |     # ids_ = { k: (package, path)
124 |     #     for k, (package, path) in ids.items()
125 |     #     #if not path in processed
126 |     #     }
127 |     # if len(ids_) < 1:
128 |     remote_import_lines = [lines[k] for k in remote_imports]
129 |     lines = [l for i, l in enumerate(lines) if not i in ids]
130 |     yield filename, header_lines, lines, local_imports, remote_import_lines
131 | 
132 |     # lines = [ l for i, l in enumerate(lines) if not i in ids ]
133 |     paths = set()
134 |     for k, (package, path) in local_imports.items():
135 |         if path in processed:
136 |             continue
137 |         paths.add(path)
138 |     new_processed = set()
139 |     for path in paths:
140 |         new_processed.add(path)
141 |         for w in walk_deps(
142 |             path, {*processed, *new_processed}, local_dirs=local_dirs, basedir=basedir
143 |         ):
144 |             yield w
145 | 
146 | 
147 | def make_graph(start_file="../scripts/runner.py", basedir=".."):
148 |     data = []
149 |     nodes = dict()
150 |     node_names = []
151 |     local_dirs = [
152 |         os.path.join(base, f)
153 |         for base, dirs, files in os.walk(basedir)
154 |         for f in files
155 |         if os.path.splitext(f)[-1] == ".py"
156 |     ]
157 |     # print(local_dirs)
158 |     all_remote_imports = set()
159 |     G = nx.DiGraph()
160 |     for file, header_lines, lines, deps, remote_import_lines in walk_deps(
161 |         start_file, local_dirs=local_dirs, basedir=basedir, debug=False
162 |     ):
163 | 
164 |         all_remote_imports.update(remote_import_lines)
165 | 
166 |         dependencies = set([dep for i, (package, dep) in deps.items()])
167 |         # print("-"*10)
168 |         # print(file, dependencies, deps)
169 |         if file not in nodes:
170 |             nodes[file] = len(nodes)
171 |             node_names.append(file)
172 |         for d in dependencies:
173 |             if not d in node_names:
174 |                 nodes[d] = len(nodes)
175 |                 node_names.append(d)
176 |         if nodes[file] in G.nodes:
177 |             G.nodes[nodes[file]]["lines"] = lines
178 |             G.nodes[nodes[file]]["name"] = file
179 |             G.nodes[nodes[file]]["header"] = header_lines
180 |         else:
181 |             G.add_node(nodes[file], lines=lines, name=file, header=header_lines)
182 |         for d in dependencies:
183 |             if not nodes[d] in G.nodes:
184 |                 G.add_node(nodes[d], name=d)
185 |             e = (nodes[d], nodes[file])
186 |             if not e in G.edges:
187 |                 G.add_edge(*e)
188 |     index = len(G.nodes)
189 | 
190 |     G.add_node(index, name="All imports", lines=sorted(all_remote_imports))
191 |     for i in range(index):
192 |         G.add_edge(index, i)
193 |     return G
194 | 
195 | 
196 | class DepGraph:
197 |     def __init__(self, mainpy, basedir="."):
198 |         self.graph = make_graph(mainpy, basedir=basedir)
199 | 
200 |     def sorted_files(self):
201 |         for i in nx.topological_sort(self.graph):
202 |             header = self.graph.nodes[i].get("header", [])
203 |             lines = self.graph.nodes[i].get("lines", [])
204 |             name = self.graph.nodes[i].get("name", [])
205 |             if len(lines) < 1:
206 |                 continue
207 |             yield header, name, lines
208 | 
209 |     def draw(self):
210 |         pos = nx.spring_layout(self.graph)
211 |         nx.draw(self.graph, pos=pos)
212 |         labels = {i: self.graph.nodes[i]["name"] for i in self.graph.nodes}
213 |         nx.draw_networkx_labels(self.graph, pos=pos, labels=labels)
214 |         plt.savefig("filename.png")
215 | 
216 | 
217 | def read_file(file_path):
218 |     with open(file_path) as f:
219 |         lines = f.readlines()
220 |     return lines
221 | 
222 | 
223 | def strip_lines(lines):
224 |     i = 0
225 |     j = 0
226 |     for i in range(len(lines)):
227 |         if len(lines[i].strip()) > 0:
228 |             break
229 |     for j in range(len(lines) - 1, i, -1):
230 |         if len(lines[j].strip()) > 0:
231 |             break
232 |     return lines[i : j + 1]
233 | 
234 | 
235 | def wrap2cell(data, ctype="code"):
236 |     code_params = {"code": {"execution_count": 0, "outputs": []}}
237 |     base = {"cell_type": ctype, "metadata": {}, "source": data}
238 |     return {**base, **code_params.get(ctype, {})}
239 | 
240 | 
241 | if __name__ == "__main__":
242 |     parser = argparse.ArgumentParser()
243 |     parser.add_argument(
244 |         "mainpy", help="path or name of main file which contains runnable code"
245 |     )
246 |     parser.add_argument("savepath", help="path where .ipynb notebook should be saved")
247 |     parser.add_argument(
248 |         "--draw",
249 |         action="store_true",
250 |         help="use this flag to indicate that graph should be saved",
251 |     )
252 |     args = parser.parse_args()
253 | 
254 |     graph = DepGraph(args.mainpy)
255 |     if args.draw:
256 |         graph.draw()
257 |     git_hash = (
258 |         subprocess.check_output(["git", "rev-parse", "HEAD"]).decode().strip()
259 |     )  # , stdout=subprocess.PIPE)
260 |     cmd = " ".join(sys.argv)
261 |     header_text = f"""
262 | This file was autogenerated from code at my github repo.
263 | - Main script: `{args.mainpy}`
264 | - Commit hash: {git_hash}
265 | 
266 | I generated this with the command (using python 3.7):
267 | ```
268 | python {cmd}
269 | ```
270 | """
271 |     # header_text = [linefor line in header_text.split("\n")]
272 |     TEMPLATE["cells"].append(wrap2cell([header_text], ctype="markdown"))
273 |     for header, name, lines in graph.sorted_files():
274 |         stripped_header = strip_lines(header)
275 |         if len(stripped_header) > 0:
276 |             TEMPLATE["cells"].append(wrap2cell(stripped_header, ctype="markdown"))
277 |         lines = strip_lines(lines)
278 |         if len(lines) > 0:
279 |             TEMPLATE["cells"].append(wrap2cell(lines))
280 | 
281 |     with open(args.savepath, "w") as f:
282 |         json.dump(TEMPLATE, f, indent=2)
283 | 


--------------------------------------------------------------------------------
/kaggle_arc/scripts/predictor_validator.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import pandas as pd
  4 | import json
  5 | import argparse
  6 | 
  7 | from constants import *
  8 | from base.field import Field
  9 | 
 10 | from utils import *
 11 | from base.field import *
 12 | 
 13 | from predictors.basic import (
 14 |     IdPredictor,
 15 |     ZerosPredictor,
 16 |     ConstPredictor,
 17 |     FillPredictor,
 18 |     Predictor,
 19 | )
 20 | from predictors.complex import ComplexPredictor
 21 | from predictors.color_counting import ColorCountingPredictor
 22 | from predictors.shapes import (
 23 |     RepeatingPredictor,
 24 |     FractalPredictor,
 25 |     ResizingPredictor,
 26 |     MirrorPredictor,
 27 |     ConstantShaper,
 28 | )
 29 | from predictors.boosting_tree import (
 30 |     BoostingTreePredictor,
 31 |     BoostingTreePredictor2,
 32 |     BoostingTreePredictor3,
 33 | )
 34 | from predictors.convolution import ConvolutionPredictor
 35 | from predictors.graph_boosting_tree import (
 36 |     GraphBoostingTreePredictor,
 37 |     GraphBoostingTreePredictor2,
 38 |     GraphBoostingTreePredictor3,
 39 | )
 40 | from predictors.decision_tree import AugmentedPredictor
 41 | from predictors.subpattern import SubpatternMatcherPredictor
 42 | from predictors.field2point import *
 43 | from predictors.complex import DefaultComplexPredictor
 44 | from predictors.cam_predictor import *
 45 | from predictors.connector import *
 46 | from predictors.connector import PointConnectorPredictor
 47 | from predictors.cf_combinator import *
 48 | 
 49 | datasets = read_datasets(DATADIR)
 50 | train_ds, eval_ds, test_ds = [convert2samples(x) for x in datasets]
 51 | 
 52 | 
 53 | def evaluate_on_dataset(
 54 |     predictor_class,
 55 |     ds,
 56 |     cutoff=1.0,
 57 |     draw_results=True,
 58 |     imagedir="../temp/images",
 59 |     dataset_id=0,
 60 | ):
 61 |     nsamples = 0
 62 |     train1 = 0
 63 |     test1 = 0
 64 |     params = {}
 65 |     params["total"] = len(ds)
 66 |     params["train_score"] = dict()
 67 |     params["test_score"] = dict()
 68 |     for i, sample in enumerate(ds):
 69 |         predictor = predictor_class()
 70 |         if not predictor.is_available(sample.train):
 71 |             continue
 72 |         nsamples += 1
 73 |         predictor.train(sample.train)
 74 |         predictor.freeze_by_score(sample.train)
 75 |         score_train = predictor.validate(sample.train)
 76 |         # print(score_train)
 77 |         params["train_score"][i] = score_train
 78 |         score_test = predictor.validate(sample.test)
 79 |         params["test_score"][i] = score_test
 80 |         if score_train >= cutoff:
 81 |             train1 += 1
 82 |         if score_test >= cutoff:
 83 |             test1 += 1
 84 |         if draw_results:  # and score_train == 1 and score_test < 1:
 85 |             title = f"Image {i}: train={score_train:2.2f}, test={score_test:2.2f}\n"
 86 |             sample.show(predictor=predictor, title=title)
 87 |             # bbox_inches='tight',
 88 |             # plt.tight_layout()
 89 |             plt.savefig(
 90 |                 os.path.join(
 91 |                     imagedir,
 92 |                     f"image_{dataset_id}_{score_train:0.2f}_{score_test:0.2f}_{i:03d}.png",
 93 |                 )
 94 |             )
 95 |             plt.close("all")
 96 | 
 97 |     return train1, test1, nsamples, params
 98 | 
 99 | 
100 | if len(sys.argv) < 1:
101 |     print("no predictor classes were provided")
102 | 
103 | names = sys.argv[1:] + [
104 |     n + "Predictor" for n in sys.argv[1:] if n.find("Predictor") < 0
105 | ]
106 | savedir = "../temp/eval"
107 | if not os.path.exists(savedir):
108 |     os.makedirs(savedir)
109 | 
110 | for name in names:
111 |     if not name in globals():
112 |         # print(f"{name} predictor not found")
113 |         continue
114 |     predictor_class = globals()[name]
115 |     imagedir = os.path.join("../temp/images", name)
116 |     if not os.path.exists(imagedir):
117 |         os.makedirs(imagedir)
118 |     # if not isinstance(predictor_class, Predictor):
119 |     #     print(f"{name} is not a predictor")
120 |     #     continue
121 |     all_results = [name]
122 |     for i, ds in enumerate([train_ds, eval_ds]):
123 |         result = evaluate_on_dataset(
124 |             predictor_class, ds, cutoff=1.0, imagedir=imagedir, dataset_id=i
125 |         )
126 |         params = result[-1]
127 |         with open(os.path.join(savedir, f"{name}_{i}.json"), "w") as f:
128 |             json.dump(params, f)
129 |         result = result[:-1]
130 |         result = " / ".join(([f"{r:d}" for r in result]))
131 |         all_results.append(result)
132 |     all_results = " | ".join(all_results)
133 |     print(all_results)
134 | 


--------------------------------------------------------------------------------
/kaggle_arc/utils.py:
--------------------------------------------------------------------------------
  1 | import rootutils
  2 | 
  3 | root = rootutils.setup_root(
  4 |     __file__, indicator=".project-root", pythonpath=True, cwd=False
  5 | )
  6 | 
  7 | import json
  8 | import os
  9 | from collections import OrderedDict
 10 | 
 11 | import pandas as pd
 12 | import numpy as np
 13 | from pathlib import Path
 14 | 
 15 | from base.iodata import Sample
 16 | from base.field import Field
 17 | 
 18 | 
 19 | def read_single_dataset(basedir, prefix):
 20 |     if isinstance(basedir, str):
 21 |         basedir = Path(basedir)
 22 |     challenges_file = basedir / f"{prefix}_challenges.json"
 23 |     solutions_file = basedir / f"{prefix}_solutions.json"
 24 |     if not challenges_file.exists():
 25 |         return None
 26 |     with open(challenges_file.as_posix()) as f:
 27 |         puzzles = json.load(f)
 28 |     solutions = {}
 29 |     if solutions_file.exists():
 30 |         with open(solutions_file.as_posix()) as f:
 31 |             solutions = json.load(f)
 32 |     # we are interested in puzzles with corresponding solutions, so we'll ignore solutions
 33 |     # which are not present in the puzzles file
 34 |     puzzle_id_list = sorted(puzzles)
 35 |     records = []
 36 |     for puzzle_id in puzzle_id_list:
 37 |         puzzle = puzzles[puzzle_id]
 38 |         solution = solutions.get(puzzle_id)
 39 |         records.append((puzzle_id, (puzzle, solution)))
 40 |     return records
 41 | 
 42 | 
 43 | def read_datasets_old(basedir="../data"):
 44 |     train_dir = os.path.join(basedir, "training")
 45 |     train_data = OrderedDict(
 46 |         (os.path.splitext(x)[0], os.path.join(train_dir, x))
 47 |         for x in os.listdir(train_dir)
 48 |     )
 49 |     eval_dir = os.path.join(basedir, "evaluation")
 50 |     eval_data = OrderedDict(
 51 |         (os.path.splitext(x)[0], os.path.join(eval_dir, x))
 52 |         for x in os.listdir(eval_dir)
 53 |     )
 54 |     test_dir = os.path.join(basedir, "test")
 55 |     test_data = OrderedDict(
 56 |         (os.path.splitext(x)[0], os.path.join(test_dir, x))
 57 |         for x in os.listdir(test_dir)
 58 |     )
 59 |     return train_data, eval_data, test_data
 60 | 
 61 | 
 62 | def read_datasets(
 63 |     basedir="../data",
 64 |     train_prefix="arc-agi_training",
 65 |     eval_prefix="arc-agi_evaluation",
 66 |     test_prefix="arc-agi_test",
 67 | ):
 68 |     checked_path = Path(basedir) / "training"
 69 |     if checked_path.exists():
 70 |         return read_datasets_old(basedir)
 71 | 
 72 |     train_data = read_single_dataset(basedir, prefix=train_prefix)
 73 |     eval_data = read_single_dataset(basedir, prefix=eval_prefix)
 74 |     test_data = read_single_dataset(basedir, prefix=test_prefix)
 75 |     return train_data, eval_data, test_data
 76 | 
 77 | 
 78 | def convert2samples(data):
 79 |     if isinstance(data, OrderedDict):
 80 |         return [Sample(name, path) for name, path in data.items()]
 81 |     return [Sample(name, puzzle_data) for name, puzzle_data in data]
 82 | 
 83 | 
 84 | def save_predictions(
 85 |     predictor, ds, savepath, k=3, args=[], kwargs=dict(), verbose=True
 86 | ):
 87 |     all_data = []
 88 |     for name, i, prediction in predictor.predict_on(
 89 |         ds, k=k, args=args, kwargs=kwargs, verbose=verbose
 90 |     ):
 91 |         if isinstance(prediction, Field):
 92 |             preds = [str(prediction)] * k
 93 |         if isinstance(prediction, list):
 94 |             preds = [str(p) for p in prediction]
 95 |             if len(preds) < k:
 96 |                 preds = (preds * k)[:k]
 97 |         preds = " ".join(preds)
 98 |         all_data.append({"output_id": f"{name}_{i}", "output": preds})
 99 |     pd.DataFrame(all_data, columns=["output_id", "output"]).to_csv(savepath, index=None)
100 | 
101 | 
102 | def check_if_can_be_mirrored(data, h=14, w=9):
103 |     # w, h = iodata.input_field.shape
104 |     sample = data[:h, :w]
105 |     h1, w1 = data.shape
106 |     m1, m2 = h1 // h, w1 // w
107 |     buf = dict()
108 |     buf[(0, 0)] = sample
109 |     for i in range(m1):
110 |         for j in range(m2):
111 |             if i == 0 and j == 0:
112 |                 continue
113 |             current = data[i * h : i * h + h, j * w : j * w + w]
114 |             p = (i % 2, j % 2)
115 |             # print(p, h, w)
116 |             if p in buf:
117 |                 if not np.all(buf[p] == current):
118 |                     return None
119 |             else:
120 |                 buf[p] = current
121 |     a1 = np.all(sample == buf[0, 1])
122 |     a2 = np.all(sample == buf[1, 0])
123 |     a3 = np.all(sample == buf[1, 1])
124 |     if a1 and a2 and a3:
125 |         return (False, False)
126 |     b1 = np.all(sample[:, ::-1] == buf[0, 1])
127 |     b2 = np.all(sample[::-1, :] == buf[1, 0])
128 |     b3 = np.all(buf[1, 1] == buf[1, 0])
129 |     b4 = np.all(buf[1, 1] == buf[0, 1])
130 |     b5 = np.all(sample[::-1, ::-1] == buf[1, 1])
131 |     if b1 and b2 and b5:
132 |         return (True, True)
133 |     if b1 and a2 and b4:
134 |         return (False, True)
135 |     if b2 and a1 and b3:
136 |         return (True, False)
137 |     return None
138 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "kaggle_arc"
 7 | authors = [
 8 |     {name = "Tatiana Malygina"},
 9 | ]
10 | version = "0.0.1"
11 | dependencies = [
12 |     "numpy",
13 |     "pandas",
14 |     "rootutils",
15 |     "scikit-learn",
16 |     "scikit-image",
17 |     "networkx",
18 |     "xgboost",
19 |     "torch"
20 | ]
21 | readme = "readme.md"
22 | requires-python = ">=3.8"
23 | 
24 | [tool.setuptools.packages]
25 | find = {}
26 | 
27 | [tool.setuptools.package-data]
28 | "*" = [".project-root"]


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ![Project image](https://github.com/latticetower/kaggle-arc/blob/main/images/ministry_of_predictors.png)
 3 | 
 4 | # Description
 5 | 
 6 | Legacy code with my solutions from https://www.kaggle.com/c/abstraction-and-reasoning-challenge. 
 7 | 
 8 | I'm refactoring it to reuse in the ongoing kaggle ARC Prize competition.
 9 | 
10 | It is not guaranteed that there are no typos or missing code pieces. If you use this, use at your own risk, especially predictor classes which use xgboost (I've noticed that they crash kaggle notebook on some data samples, but haven't figured out how to fix).
11 | 
12 | ## Installation & usage
13 | ```bash
14 | git clone https://github.com/latticetower/kaggle-arc.git kaggle-arc
15 | pip install kaggle-arc
16 | ```
17 | 
18 | ```mermaid
19 | ---
20 | title: Main project classes
21 | ---
22 | classDiagram
23 |     Field --o IOData
24 |     IOData --o Sample
25 |     class Field["kaggle_arc.base.Field"]{
26 |         numpy.array data
27 |         show(ax=None, label=None)
28 |     }
29 |     class IOData["kaggle_arc.iodata.IOData"]{
30 |         Field input_field
31 |         Field output_field
32 |         show(predictor=None, npredictions=1, ...)
33 |     }
34 |     class Sample["kaggle_arc.iodata.Sample"]{
35 |         String name
36 |         List[IOData] train
37 |         List[IOData] test
38 |         show(predictor=None, npredictions=3, ...)
39 |     }
40 |     Predictor <|-- ComplexPredictor
41 |     AvailableAll <|-- ComplexPredictor
42 |     note for Predictor "has many descendant classes"
43 |     class Predictor["kaggle_arc.predictors.basic.Predictor"]{
44 |         <<interface>>
45 |         train(iodata_list)
46 |         predict(field)
47 |         validate(iodata_list, k=3)
48 |         predict_on(predictor_class, ds, ...)
49 |     }
50 |     class ComplexPredictor["kaggle_arc.predictors.complex.ComplexPredictor"]{
51 |         List[Predictor] predictors
52 |         train(iodata_list)
53 |         predict(field)
54 |         validate(iodata_list, k=3)
55 |         freeze_by_score(iodata_list, k=3)
56 |     }
57 |     note for AvailableAll "mixin class\nhas many descendant classes"
58 |     class AvailableAll["kaggle_arc.predictors.basic.AvailableAll"]{
59 |         <<interface>>
60 |         is_available(iodata_list)
61 |     }
62 | ```
63 | 


--------------------------------------------------------------------------------