├── .gitignore ├── images └── ministry_of_predictors.png ├── kaggle_arc ├── .project-root ├── README.md ├── base │ ├── __init__.py │ ├── field.py │ ├── iodata.py │ ├── transformers.py │ └── utils.py ├── constants.py ├── operations │ ├── basic.py │ ├── field2point.py │ ├── resizing.py │ ├── reversible.py │ └── subpatterns.py ├── predictors │ ├── __init__.py │ ├── availability_mixins.py │ ├── basic.py │ ├── boosting_tree.py │ ├── color_counting.py │ ├── complex.py │ ├── connector.py │ ├── convolution.py │ ├── decision_tree.py │ ├── draft_predictors │ │ ├── cam_predictor.py │ │ ├── cf_combinator.py │ │ ├── cf_filler.py │ │ ├── cf_selector.py │ │ └── cf_sorter.py │ ├── field2point.py │ ├── graph_boosting_tree.py │ ├── shapes.py │ └── subpattern.py ├── scripts │ ├── 0_idpredictor_on_test_script.py │ ├── 1_complexpredictor_on_test_script.py │ ├── 2_complexpredictor_coloring.py │ ├── convert2ipynb.py │ └── predictor_validator.py └── utils.py ├── pyproject.toml └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | input 2 | temp 3 | working 4 | 5 | 6 | **/__pycache__ 7 | **/.ipynb_checkpoints 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | share/python-wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | *.py,cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | cover/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | .pybuilder/ 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | # For a library or package, you might want to ignore these files since the code is 95 | # intended to run in multiple environments; otherwise, check them in: 96 | # .python-version 97 | 98 | # pipenv 99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 102 | # install all needed dependencies. 103 | #Pipfile.lock 104 | 105 | # poetry 106 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 107 | # This is especially recommended for binary packages to ensure reproducibility, and is more 108 | # commonly ignored for libraries. 109 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 110 | #poetry.lock 111 | 112 | # pdm 113 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 114 | #pdm.lock 115 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 116 | # in version control. 117 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 118 | .pdm.toml 119 | .pdm-python 120 | .pdm-build/ 121 | 122 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 123 | __pypackages__/ 124 | 125 | # Celery stuff 126 | celerybeat-schedule 127 | celerybeat.pid 128 | 129 | # SageMath parsed files 130 | *.sage.py 131 | 132 | # Environments 133 | .env 134 | .venv 135 | env/ 136 | venv/ 137 | ENV/ 138 | env.bak/ 139 | venv.bak/ 140 | 141 | # Spyder project settings 142 | .spyderproject 143 | .spyproject 144 | 145 | # Rope project settings 146 | .ropeproject 147 | 148 | # mkdocs documentation 149 | /site 150 | 151 | # mypy 152 | .mypy_cache/ 153 | .dmypy.json 154 | dmypy.json 155 | 156 | # Pyre type checker 157 | .pyre/ 158 | 159 | # pytype static type analyzer 160 | .pytype/ 161 | 162 | # Cython debug symbols 163 | cython_debug/ 164 | 165 | # PyCharm 166 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 167 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 168 | # and can be added to the global gitignore or merged into this file. For a more nuclear 169 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 170 | #.idea/ 171 | 172 | **/.DS_Store -------------------------------------------------------------------------------- /images/ministry_of_predictors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/latticetower/kaggle-arc/e29bb298e68245048ffcc7aaa392d858e061adf2/images/ministry_of_predictors.png -------------------------------------------------------------------------------- /kaggle_arc/.project-root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/latticetower/kaggle-arc/e29bb298e68245048ffcc7aaa392d858e061adf2/kaggle_arc/.project-root -------------------------------------------------------------------------------- /kaggle_arc/README.md: -------------------------------------------------------------------------------- 1 | # kaggle-arc 2 | https://www.kaggle.com/c/abstraction-and-reasoning-challenge 3 | 4 | # Usage: 5 | 6 | Direct script calls look similar to this: 7 | ``` 8 | cd scripts 9 | 10 | PYTHONPATH=$(pwd)/..:$PYTHONPATH python 1_complexpredictor_on_test_script.py 1>out.txt 11 | ``` 12 | 13 | To convert script to single ipynb notebook, call command from the root repo folder: 14 | ``` 15 | python scripts/convert2ipynb.py scripts/runner.py temp/sample1.ipynb 16 | ``` 17 | first parameter is a path to main script, second is a path to save notebook. 18 | 19 | # Evaluation 20 | 21 | To evaluate predictors on train and evaluation datasets, the following command was used 22 | ``` 23 | PYTHONPATH=$(pwd)/..:$PYTHONPATH python predictor_validator.py Id Zeros ColorCounting Repeating Fractal 24 | ``` 25 | Table format: correct predictions on train / correct predictions on test / count of samples for which predictor is available 26 | Class name | Train | Evaluation 27 | -----------|-------|----------- 28 | IdPredictor | 4 / 0 / 400 | 2 / 0 / 400 29 | ZerosPredictor | 0 / 0 / 400 | 0 / 0 / 400 30 | ColorCountingPredictor | 16 / 5 / 262 | 2 / 1 / 270 31 | RepeatingPredictor | 0 / 0 / 17 | 0 / 0 / 23 32 | FractalPredictor | 1 / 1 / 17 | 2 / 1 / 23 33 | ResizingPredictor | 2 / 2 / 17 | 1 / 1 / 23 34 | ConstantShaper | 4 / 4 / 15 | 3 / 3 / 10 35 | BoostingTreePredictor | 136 / 24 / 262 | 134 / 8 / 270 36 | No augmentation, with painter | 217 / 31 / 262 | 188 / 8 / 270 37 | with square features | 214 / 31 / 262 | 191 / 9 / 270 38 | with new features | 161 / 32 / 262 | 135 / 7 / 270 39 | Augmentation + repainter: | 79 / 8 / 262 | 43 / 2 / 270 40 | Augmentation, w/o repainter: | 83 / 7 / 262 | 37 / 1 / 270 41 | BoostingTreePredictor2 | 2 / 1 / 31 | 3 / 3 / 27 42 | BoostingTreePredictor3 | 218 / 31 / 262 | 198 / 12 / 270 43 | SubpatternMatcherPredictor | 2 / 2 / 10 | 0 / 0 / 2 44 | SimpleSummarizePredictor | 1 / 1 / 6 | 0 / 2 / 2 45 | GraphBoostingTreePredictor | 17 / 13 / 43 | 9 / 6 / 33 46 | GraphBoostingTreePredictor3 | 155 / 28 / 262 | 143 / 6 / 270 47 | PointConnectorPredictor | 24 / 3 / 90 | 11 / 2 / 68 48 | ConvolutionPredictor | 11 / 1 / 262 | 3 / 0 / 270 -------------------------------------------------------------------------------- /kaggle_arc/base/__init__.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | from base.field import Field 6 | from base.iodata import IOData, Sample -------------------------------------------------------------------------------- /kaggle_arc/base/field.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | import os 6 | import numpy as np 7 | import matplotlib 8 | 9 | # try: 10 | # if not matplotlib.is_interactive(): 11 | # matplotlib.use("svg") 12 | # except: 13 | # pass 14 | 15 | import matplotlib.pyplot as plt 16 | import seaborn as sns 17 | 18 | import torch 19 | from itertools import product 20 | from collections import OrderedDict 21 | import networkx as nx 22 | 23 | from typing import NamedTuple 24 | 25 | from constants import * 26 | from base.utils import * 27 | 28 | 29 | def binary_dice(a, b): 30 | s = np.sum(a) + np.sum(b) 31 | if s != 0: 32 | return 2 * np.sum(a * b) / s 33 | return None 34 | 35 | 36 | def multiclass_dice(a, b, c): 37 | return binary_dice(1 * (a == c), 1 * (b == c)) 38 | 39 | 40 | def build_colormap(i, o=None, bg=0): 41 | colormap = {bg: 0} 42 | current_id = 1 43 | for line in i: 44 | for x in line: 45 | if x in colormap: 46 | continue 47 | colormap[x] = current_id 48 | current_id += 1 49 | if o is not None: 50 | for line in o: 51 | for x in line: 52 | if x in colormap: 53 | continue 54 | colormap[x] = current_id 55 | current_id += 1 56 | return colormap 57 | 58 | 59 | class Field: 60 | """ Represents the matrix with values [0-9], which corresponds to single input or output of ARC puzzles. Stores additional properties and colorization schemes, method for drawing, etc. 61 | """ 62 | __slots__ = ["data", "multiplier", "colormap", "prop_names"] 63 | 64 | def __init__(self, data): 65 | if isinstance(data, list): 66 | self.data = np.asarray( 67 | [[(x if x >= 0 else 10 - x) for x in line] for line in data], 68 | dtype=np.uint8, 69 | ) 70 | else: 71 | self.data = data.copy() 72 | self.multiplier = 0.5 73 | self.colormap = None 74 | self.prop_names = ( 75 | "h w xmin ymin xmax ymax xmean ymean is_convex holes contour_size interior_size".split() 76 | + "is_rectangular is_square".split() 77 | + [f"flip_{i}" for i in range(10)] 78 | + [f"flip_conv_{i}" for i in range(10)] 79 | ) 80 | 81 | def get(self, i, j, default_color=0): 82 | if i < 0 or j < 0: 83 | return default_color 84 | if i >= self.data.shape[0] or j >= self.data.shape[1]: 85 | return default_color 86 | return self.data[i, j] 87 | 88 | @property 89 | def processed(self): 90 | if self.colormap is None: 91 | self.colormap = build_colormap(self.data, o=None, bg=0) 92 | new_data = [[self.colormap.get(x, x) for x in line] for line in self.data] 93 | return Field(new_data) 94 | 95 | def reconstruct(self, field): 96 | if self.colormap is None: 97 | return field 98 | rev = {v: k for k, v in self.colormap.items()} 99 | rev = {k: v for k, v in rev.items() if k != v} 100 | if len(rev) < 1: 101 | return field 102 | new_data = [[rev.get(x, x) for x in line] for line in field.data] 103 | return Field(new_data) 104 | 105 | @property 106 | def to_array(self): 107 | return [[int(x) for x in line] for line in self.data] 108 | 109 | @property 110 | def height(self): 111 | return self.data.shape[0] 112 | 113 | @property 114 | def width(self): 115 | return self.data.shape[1] 116 | 117 | @property 118 | def shape(self): 119 | return self.data.shape 120 | 121 | @property 122 | def dtype(self): 123 | return self.data.dtype 124 | 125 | @property 126 | def data_splitted(self): 127 | return np.stack([1.0 * (self.data == i) for i in range(10)]) 128 | 129 | def t(self): 130 | return torch.tensor(self.data) 131 | 132 | def t_splitted(self): 133 | return torch.tensor(self.data_splitted) 134 | 135 | @staticmethod 136 | def from_splitted(data): 137 | return Field(np.argmax(data, 0)) 138 | 139 | def __eq__(self, b): 140 | if not isinstance(b, Field): 141 | return self.data == b # this does all conversion magic 142 | if not (self.height == b.height and self.width == b.width): 143 | return False 144 | return np.all(self.data == b.data) 145 | 146 | def __ne__(self, b): 147 | # if not isinstance(b, Field): 148 | # return self.data != b 149 | return ~(self == b) 150 | 151 | def __repr__(self): 152 | return repr(self.data) 153 | 154 | def show(self, ax=None, label=None): 155 | if ax is None: 156 | plt.figure( 157 | figsize=(self.width * self.multiplier, self.height * self.multiplier) 158 | ) 159 | ax = plt.gca() 160 | ax.imshow(self.data, cmap=COLORMAP, norm=NORM) 161 | for edge, spine in ax.spines.items(): 162 | spine.set_visible(False) 163 | ax.set_xticks(np.arange(self.data.shape[1]) + 0.5, minor=True) 164 | ax.set_yticks(np.arange(self.data.shape[0]) + 0.5, minor=True) 165 | ax.set_xticklabels([]) 166 | ax.set_yticklabels([]) 167 | ax.grid(which="minor", color="black", linestyle="-", linewidth=1) 168 | ax.tick_params(which="minor", bottom=False, left=False) 169 | ax.set_aspect("equal") 170 | if label is not None: 171 | ax.set_title(label) 172 | 173 | @staticmethod 174 | def compare_length(a, b): 175 | return a.width == b.width and a.height == b.height 176 | 177 | @staticmethod 178 | def dice(a, b): 179 | dist = [multiclass_dice(a, b, i) for i in range(10)] 180 | dist = [d for d in dist if d is not None] 181 | return np.mean(dist) 182 | 183 | @staticmethod 184 | def sized_dice(a, b): 185 | if Field.compare_length(a, b): 186 | return Field.dice(a, b) 187 | h = min(a.height, b.height) 188 | w = min(a.width, b.width) 189 | a_ = Field(a.data[:h, :w]) 190 | b_ = Field(b.data[:h, :w]) 191 | d = Field.dice(a_, b_) 192 | size_coef = 2 * w * h / (a.width * a.height + b.width * b.height) 193 | return size_coef * d 194 | 195 | @classmethod 196 | def distance(cls, a, b): 197 | return 1 - cls.dice(a, b) 198 | 199 | @classmethod 200 | def score(cls, a, b): 201 | return cls.sized_dice(a, b) 202 | 203 | def str_iter(self): 204 | yield "|" 205 | for line in self.data: 206 | for x in line: 207 | yield str(x) 208 | yield "|" 209 | 210 | def __repr__(self): 211 | return "".join(self.str_iter()) 212 | 213 | def zeros(self, multiplier=1): 214 | return self.consts(value=0, multiplier=multiplier) 215 | 216 | def consts(self, value=1, multiplier=1): 217 | new_shape = tuple([x * multiplier for x in self.data.shape]) 218 | return Field(value * np.ones(new_shape, dtype=self.data.dtype)) 219 | 220 | @staticmethod 221 | def fromstring(s): 222 | assert s[0] == "|" 223 | data = [[int(x) for x in line] for line in s[1:-1].split("|")] 224 | return Field(data) 225 | 226 | def build_nxgraph(self, connectivity={0: 4}, properties=None): 227 | def get_features(data): 228 | return np.stack([(data == i) * 1.0 for i in range(10)], 0) 229 | 230 | graph_nx = nx.Graph() 231 | graph_nx.graph["global_features"] = np.asarray( 232 | [[(np.sum(self.data == i) > 0) * 1.0 for i in range(10)]] 233 | ).astype(np.float64) 234 | all_features = get_features(self.data) 235 | node_ids = OrderedDict() # node id -> (i, j) pair 236 | node_coords = OrderedDict() # node (i, j) pair -> id 237 | 238 | regions0 = get_data_regions(self.data) 239 | params0, maps0 = get_region_params(regions0) 240 | 241 | regions1 = get_data_regions(self.data, connectivity=1) 242 | params1, maps1 = get_region_params(regions1, connectivity=1) 243 | 244 | for i in range(self.data.shape[0]): 245 | for j in range(self.data.shape[1]): 246 | new_id = len(node_ids) 247 | node_ids[new_id] = (i, j) 248 | node_coords[(i, j)] = new_id 249 | 250 | color = self.data[i, j] 251 | 252 | left = i == 0 # left 253 | top = j == 0 # top 254 | right = i == self.data.shape[0] - 1 # right 255 | bottom = j == self.data.shape[1] - 1 # bottom 256 | features = [left, right, top, bottom] 257 | neighbours = [ 258 | (i1, j1) 259 | for i1, j1 in product([i - 1, i, i + 1], [j - 1, j, j + 1]) 260 | if (i1 != i or j1 != j) 261 | and i1 >= 0 262 | and j1 >= 0 263 | and i1 < self.data.shape[0] 264 | and j1 < self.data.shape[1] 265 | ] 266 | if connectivity.get(color, 4) == 4: 267 | neighbours = [ 268 | (i1, j1) for i1, j1 in neighbours if (i1 == i or j1 == j) 269 | ] 270 | # angle 90 271 | 272 | angle_props = [] 273 | for d1, d2 in [(-1, -1), (-1, +1), (+1, -1), (+1, +1)]: 274 | angle_270 = False 275 | left_shift = False 276 | top_shift = False 277 | if self.get(i + d1, j + d2) != color: 278 | angle_270 = ( 279 | self.get(i, j + d2) == color 280 | and self.get(i + d1, j) == color 281 | ) 282 | left_shift = self.get(i + d1, j) != color 283 | top_shift = self.get(i, j + d2) != color 284 | angle_props.extend([angle_270, left_shift, top_shift]) 285 | features.extend(angle_props) 286 | # for c in self.get(i - 1, j), self.get(i, j - 1), self.get(i+1, j), self.get 287 | # for i1 in (i - 1, i + 1) if i1 >=0 and i1 < self.data.shape[0] 288 | # if not left 289 | ncolors = set([self.data[i1, j1] for i1, j1 in neighbours]) 290 | ncolors = [(i in ncolors) * 1 for i in range(10)] 291 | props = { 292 | "features": np.asarray(features).astype(np.float), 293 | "neighbours": neighbours, 294 | "neighbour_colors": np.asarray(ncolors), 295 | "color": self.data[i, j], 296 | "x": all_features[:, i, j].astype(np.float64), 297 | "pos": (i, j), 298 | } 299 | rid0 = regions0[i, j] 300 | p = [params0[rid0][k] for k in self.prop_names] 301 | rid1 = regions1[i, j] 302 | p += [params1[rid1][k] for k in self.prop_names] 303 | 304 | if properties is not None: 305 | props["properties"] = properties[i, j] 306 | props["component_params"] = np.asarray(p) 307 | 308 | graph_nx.add_node( 309 | new_id, 310 | # features=np.asarray(features).astype(np.float), 311 | # neighbours=neighbours, 312 | # neighbour_colors = np.asarray(ncolors), 313 | # color=self.data[i, j], 314 | # x=all_features[:, i, j].astype(np.float64), 315 | # pos=(i, j) 316 | **props, 317 | ) 318 | 319 | for i in range(self.data.shape[0]): 320 | for j in range(self.data.shape[1]): 321 | # neighbours = [ 322 | # (i1, j1) 323 | # for i1, j1 in product([i - 1, i, i + 1], [j - 1, j, j + 1]) 324 | # if (i1 != i or j1 != j) and i1 >= 0 and j1 >= 0 325 | # and i1 < self.data.shape[0] and j1 < self.data.shape[1] 326 | # ] 327 | id0 = node_coords[(i, j)] 328 | color0 = self.data[i, j] 329 | 330 | neighbours = graph_nx.nodes[id0]["neighbours"] 331 | 332 | for i1, j1 in neighbours: 333 | id1 = node_coords[(i1, j1)] 334 | color1 = self.data[i1, j1] 335 | if color0 == color1: 336 | graph_nx.add_edge( 337 | id0, 338 | id1, 339 | features=np.asarray( 340 | [(color0 == x) * 1.0 for x in range(10)] 341 | ).astype(np.float64), 342 | ) 343 | # graph_nx.add_edge(id1, id0, features=[color0]) 344 | 345 | # graph_nx.add_node() 346 | return graph_nx 347 | 348 | 349 | class ComplexField: 350 | def __init__(self, data, **params): 351 | self.data = data 352 | self.params = params 353 | self.multiplier = 0.5 354 | 355 | @property 356 | def shape(self): 357 | if len(self.data) > 0: 358 | if isinstance(self.data[0], list): 359 | return (len(self.data), len(self.data[0])) 360 | return (len(self.data),) 361 | 362 | @property 363 | def width(self): 364 | if len(self.shape) == 1: 365 | return 1 366 | return self.shape[1] 367 | 368 | @property 369 | def height(self): 370 | return self.shape[0] 371 | 372 | def flat_iter(self): 373 | for line in self.data: 374 | if isinstance(line, list): 375 | for x in line: 376 | yield x 377 | else: 378 | yield line 379 | 380 | def map(self, func): 381 | new_data = [[func(x) for x in line] for line in self.data] 382 | return ComplexField(new_data, **self.params) 383 | 384 | def show(self, ax=None, label=None): 385 | if ax is None: 386 | plt.figure( 387 | figsize=(self.width * self.multiplier, self.height * self.multiplier) 388 | ) 389 | ax = plt.gca() 390 | pass 391 | 392 | def __str__(self): 393 | return f"ComplexField({self.shape}, {self.params})" 394 | -------------------------------------------------------------------------------- /kaggle_arc/base/iodata.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | import json 6 | import matplotlib 7 | from matplotlib import cm 8 | import matplotlib.gridspec as gridspec 9 | import seaborn as sns 10 | from itertools import islice 11 | 12 | from base.field import * 13 | 14 | 15 | class IOData: 16 | """Represent the example in the ARC puzzle. Example might contain pair of input and output, or, in case of the test data, only input. 17 | """ 18 | __slots__ = ["input_field", "output_field", "colormap"] 19 | 20 | def __init__(self, data=None, input_field=None, output_field=None): 21 | # data['train'][0]['input'] 22 | self.input_field = input_field 23 | self.output_field = output_field 24 | if data is not None: 25 | if "input" in data: 26 | self.input_field = Field(data["input"]) 27 | if "output" in data: 28 | self.output_field = Field(data["output"]) 29 | self.colormap = None 30 | 31 | @property 32 | def input_processed(self): 33 | i = self.input_field.data 34 | o = self.output_field 35 | if o is not None: 36 | o = o.data 37 | if self.colormap is None: 38 | self.colormap = build_colormap(i, o) 39 | data = [[self.colormap.get(x, x) for x in line] for line in i] 40 | return Field(data) 41 | 42 | @property 43 | def output_processed(self): 44 | if self.colormap is None: 45 | self.colormap = build_colormap( 46 | self.input_field.data, self.output_field.data 47 | ) 48 | data = [ 49 | [self.colormap.get(x, x) for x in line] for line in self.output_field.data 50 | ] 51 | return Field(data) 52 | 53 | def reconstruct(self, field): 54 | if self.colormap is None: 55 | return field 56 | new_data = [[self.colormap.get(x, x) for x in line] for line in field.data] 57 | return Field(new_data) 58 | 59 | def show(self, fig=None, axes=None, predictor=None, npredictions=1): 60 | if fig is None: 61 | if predictor is not None: 62 | fig, axes = plt.subplots(nrows=1, ncols=2 + npredictions) 63 | else: 64 | fig, axes = plt.subplots(nrows=1, ncols=2) 65 | ax0, ax1 = axes[:2] 66 | ax0.set_xticks([]) 67 | ax0.set_yticks([]) 68 | ax1.set_xticks([]) 69 | ax1.set_yticks([]) 70 | if self.input_field is not None: 71 | self.input_field.show(ax0, label="input") 72 | # ax0.axis("off") 73 | if self.output_field is not None: 74 | self.output_field.show(ax1, label="output") 75 | # ax1.axis("off") 76 | if predictor is not None: 77 | for i, prediction in enumerate( 78 | islice(predictor.predict(self.input_field), npredictions) 79 | ): 80 | ax = axes[2 + i] 81 | ax.set_xticks([]) 82 | ax.set_yticks([]) 83 | prediction.show(ax) 84 | # ax.axis("off") 85 | 86 | def t(self): 87 | result = [self.input_field.t()] 88 | if self.output_field is not None: 89 | result.append(self.output_field.t()) 90 | return tuple(result) 91 | 92 | def t_splitted(self): 93 | result = [self.input_field.t_splitted()] 94 | if self.output_field is not None: 95 | result.append(self.output_field.t_splitted()) 96 | return tuple(result) 97 | 98 | 99 | class Sample: 100 | """Represents single puzzle. 101 | """ 102 | __slots__ = ["name", "train", "test"] 103 | 104 | def __init__(self, name, path): 105 | self.name = name 106 | 107 | if isinstance(path, str): 108 | with open(path) as f: 109 | puzzle_data = json.load(f) 110 | solutions = None 111 | # self.test = [ IOData(sample) for sample in puzzle_data.get('test', []) ] 112 | else: 113 | (puzzle_data, solutions) = path 114 | 115 | self.train = [IOData(sample) for sample in puzzle_data.get("train", [])] 116 | 117 | if solutions is None or len(solutions) == 0: 118 | self.test = [IOData(sample) for sample in puzzle_data.get("test", [])] 119 | else: 120 | self.test = [ 121 | IOData(sample, output_field=Field(solution)) 122 | for sample, solution in zip(puzzle_data.get("test", []), solutions) 123 | ] 124 | 125 | def predict(self, predictors): 126 | predictions = [] 127 | for sample in self.iterate_test(): 128 | pred = [predictor.predict(sample) for predictor in predictors] 129 | predictions.append(pred) 130 | return predictions 131 | 132 | def show( 133 | self, 134 | fig=None, 135 | grids=[None, None, None], 136 | w=2, 137 | h=2, 138 | ncols=2, 139 | predictor=None, 140 | npredictions=3, 141 | title="", 142 | ): 143 | ntrain = len(self.train) 144 | ntest = len(self.test) 145 | ncols += npredictions 146 | if predictor is not None: 147 | if not predictor.is_available(self.train): 148 | predictor = None 149 | else: 150 | predictor.train(self.train) 151 | gs, train_gs, test_gs = grids 152 | if fig is None: 153 | fig = plt.figure(figsize=(ncols * w, (ntrain + ntest) * h)) 154 | plt.title(title) 155 | ax = plt.gca() 156 | for edge, spine in ax.spines.items(): 157 | spine.set_visible(False) 158 | ax.set_xticklabels([]) 159 | ax.set_yticklabels([]) 160 | ax.set_xticks([]) 161 | ax.set_yticks([]) 162 | 163 | if gs is None: 164 | gs = gridspec.GridSpec( 165 | 2, 1, figure=fig, height_ratios=[ntrain, ntest], hspace=0.1 166 | ) 167 | if train_gs is None: 168 | train_gs = gridspec.GridSpecFromSubplotSpec( 169 | ntrain, ncols, subplot_spec=gs[0] 170 | ) 171 | if test_gs is None: 172 | test_gs = gridspec.GridSpecFromSubplotSpec(ntest, ncols, subplot_spec=gs[1]) 173 | 174 | if train_gs is not None: 175 | train_ax = fig.add_subplot(gs[0]) 176 | train_ax.set_xticks([]) 177 | train_ax.set_yticks([]) 178 | train_ax.set_ylabel("Train samples") 179 | for i in range(ntrain): 180 | ax0 = fig.add_subplot(train_gs[i, 0]) 181 | ax1 = fig.add_subplot(train_gs[i, 1]) 182 | self.train[i].show(fig=fig, axes=[ax0, ax1]) 183 | if predictor is not None: 184 | preds = islice( 185 | predictor.predict(self.train[i].input_field), npredictions 186 | ) 187 | for k, prediction in enumerate(preds): 188 | ax = fig.add_subplot(train_gs[i, k + 2]) 189 | ax.set_xticks([]) 190 | ax.set_yticks([]) 191 | dice = Field.score(prediction, self.train[i].output_field) 192 | prediction.show(ax, label=f"{dice:1.4f}") 193 | 194 | if test_gs is not None: 195 | test_ax = fig.add_subplot(gs[1]) 196 | test_ax.set_xticks([]) 197 | test_ax.set_yticks([]) 198 | test_ax.set_ylabel("Test samples") 199 | for i in range(ntest): 200 | ax0 = fig.add_subplot(test_gs[i, 0]) 201 | ax1 = fig.add_subplot(test_gs[i, 1]) 202 | # npredictions=1 203 | # pred_ax = [fig.add_subplot(test_gs[i, 2+k]) for k in range(npredictions)] 204 | self.test[i].show(fig=fig, axes=[ax0, ax1]) 205 | # predictor=predictor, npredictions=npredictions) 206 | if predictor is not None: 207 | preds = islice( 208 | predictor.predict(self.test[i].input_field), npredictions 209 | ) 210 | for k, prediction in enumerate(preds): 211 | ax = fig.add_subplot(test_gs[i, k + 2]) 212 | ax.set_xticks([]) 213 | ax.set_yticks([]) 214 | if self.test[i].output_field is not None: 215 | dice = Field.score(prediction, self.test[i].output_field) 216 | dice = f"{dice:1.4f}" 217 | else: 218 | dice = "-" 219 | prediction.show(ax, label=dice) 220 | -------------------------------------------------------------------------------- /kaggle_arc/base/transformers.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | import numpy as np 6 | from base.field import Field 7 | from base.iodata import IOData 8 | 9 | 10 | def resize_output(iodata): 11 | if isinstance(iodata, list): 12 | return [resize_output(data) for data in iodata] 13 | h, w = iodata.input_field.shape 14 | if iodata.output_field is not None: 15 | output = iodata.output_field.data[:h, :w] 16 | output = Field(output) 17 | else: 18 | output = None 19 | return IOData(input_field=iodata.input_field, output_field=output) 20 | 21 | 22 | def crop_data(data): 23 | h = np.argwhere(data.std(0) > 0).flatten() 24 | w = np.argwhere(data.std(1) > 0).flatten() 25 | if len(h) < 1 or len(w) < 1: 26 | return data 27 | return data[min(h) : max(h) + 1, min(w) : max(w) + 1] 28 | -------------------------------------------------------------------------------- /kaggle_arc/base/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common helper functions 3 | """ 4 | 5 | import rootutils 6 | 7 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 8 | 9 | import skimage.measure as sk_measure 10 | from itertools import product 11 | import numpy as np 12 | 13 | 14 | def get_data_regions(data, connectivity=None): 15 | "returns distinct regions for colors 0-9" 16 | l = sk_measure.label(data, connectivity=connectivity) 17 | lz = sk_measure.label(data == 0, connectivity=connectivity) 18 | m = np.max(l) 19 | lz += m 20 | ids = np.where(data == 0) 21 | l[ids] = lz[ids] 22 | return l 23 | 24 | 25 | def make_convex(r): 26 | mask = np.zeros(r.shape) 27 | mask[np.where(r)] = 1 28 | coords = np.argwhere(r) 29 | for xcoord in np.unique(coords[:, 0]): 30 | y = coords[np.argwhere(coords[:, 0] == xcoord), 1] 31 | mask[xcoord, np.min(y) : np.max(y)] = 1 32 | for ycoord in np.unique(coords[:, 1]): 33 | x = coords[np.argwhere(coords[:, 1] == ycoord), 0] 34 | mask[np.min(x) : np.max(x), ycoord] = 1 35 | # print)np.unique(coords[:, 0]) 36 | return mask == 1 37 | 38 | 39 | def fill_region_holes(r): 40 | mask = np.zeros(r.shape) 41 | mask[np.where(r)] = 1 42 | coords = np.argwhere(r) 43 | xmin, ymin = np.min(coords, 0) 44 | xmax, ymax = np.max(coords, 0) 45 | for i in range(xmin, xmax + 1): 46 | for j in range(ymin, ymax + 1): 47 | x = coords[np.argwhere(coords[:, 1] == j), 0] 48 | if not (np.min(x) <= i and i <= np.max(x)): 49 | continue 50 | y = coords[np.argwhere(coords[:, 0] == i), 1] 51 | if not (np.min(y) <= j and j <= np.max(y)): 52 | continue 53 | mask[i, j] = 1 54 | return mask == 1 55 | 56 | 57 | def split_interior(r, connectivity=None): 58 | mask = np.zeros(r.shape) 59 | shifts = [ 60 | (i, j) for i, j in product([-1, 0, 1], [-1, 0, 1]) if not (i == 0 and j == 0) 61 | ] 62 | if connectivity is not None: 63 | shifts = [(i, j) for (i, j) in shifts if i == 0 or j == 0] 64 | for x, y in np.argwhere(r): 65 | neighbours = [(x + i, y + j) for i, j in shifts] 66 | neighbours = [ 67 | (i, j) 68 | for i, j in neighbours 69 | if i >= 0 and j >= 0 and i < r.shape[0] and j < r.shape[1] 70 | ] 71 | if np.any([r[i, j] != 1 for i, j in neighbours]): 72 | mask[x, y] = 1 73 | mask = mask == 1 74 | return mask, (~mask) * r 75 | 76 | 77 | def get_region_params(r, connectivity=None): 78 | params = dict() 79 | maps = dict() 80 | for rid in np.unique(r): 81 | params[rid] = dict() 82 | maps[rid] = dict() 83 | region = r == rid 84 | m = np.argwhere(region) 85 | xmin, ymin = np.min(m, 0) 86 | xmax, ymax = np.max(m, 0) 87 | xmean, ymean = np.mean(m, 0) 88 | # print(xmin) 89 | params[rid]["h"] = ymax - ymin + 1 90 | params[rid]["w"] = xmax - xmin + 1 91 | params[rid]["xmin"] = xmin 92 | params[rid]["xmax"] = xmax 93 | params[rid]["ymin"] = ymin 94 | params[rid]["ymax"] = ymax 95 | params[rid]["xmean"] = int(xmean) 96 | params[rid]["ymean"] = int(ymean) 97 | conv = make_convex(region) 98 | 99 | maps[rid]["convex"] = conv 100 | 101 | params[rid]["is_convex"] = np.all(conv == region) 102 | no_holes = fill_region_holes(region) 103 | 104 | maps[rid]["no_holes"] = no_holes 105 | 106 | is_rectangular = no_holes[xmin : xmax + 1, ymin : ymax + 1].mean() == 1 107 | params[rid]["is_rectangular"] = is_rectangular 108 | params[rid]["is_square"] = is_rectangular and xmax - xmin + 1 == ymax - ymin + 1 109 | area = region[xmin : xmax + 1, ymin : ymax + 1] 110 | convex_area = conv[xmin : xmax + 1, ymin : ymax + 1] 111 | 112 | operations = [ 113 | lambda inp: np.fliplr(inp), 114 | lambda inp: np.rot90(np.fliplr(inp), 1), 115 | lambda inp: np.rot90(np.fliplr(inp), 2), 116 | lambda inp: np.rot90(np.fliplr(inp), 3), 117 | lambda inp: np.flipud(inp), 118 | lambda inp: np.rot90(np.flipud(inp), 1), 119 | lambda inp: np.rot90(np.flipud(inp), 2), 120 | lambda inp: np.rot90(np.flipud(inp), 3), 121 | lambda inp: np.fliplr(np.flipud(inp)), 122 | lambda inp: np.flipud(np.fliplr(inp)), 123 | ] 124 | for i, op in enumerate(operations): 125 | transformed_area = op(area) 126 | params[rid][f"flip_{i}"] = (transformed_area.shape == area.shape) and np.all(transformed_area == area) 127 | transformed_convex_area = op(convex_area) 128 | params[rid][f"flip_conv_{i}"] = (transformed_convex_area.shape == convex_area.shape) and np.all(transformed_convex_area == convex_area) 129 | 130 | inner_regions = [ 131 | x for x in np.unique(r[np.where(no_holes)]) if x != rid and x != 0 132 | ] 133 | params[rid]["inner_regions"] = inner_regions 134 | params[rid]["holes"] = len(inner_regions) 135 | contour, interior = split_interior(region, connectivity=connectivity) 136 | 137 | maps[rid]["contour"] = contour 138 | maps[rid]["interior"] = interior 139 | 140 | params[rid]["contour_size"] = np.sum(contour) 141 | params[rid]["interior_size"] = np.sum(interior) 142 | 143 | return params, maps 144 | -------------------------------------------------------------------------------- /kaggle_arc/constants.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | import seaborn as sns 3 | 4 | DATADIR = "../input/abstraction-and-reasoning-challenge" 5 | 6 | WORKDIR = "../working" 7 | TEST_SAVEPATH = "../working/submission.csv" 8 | 9 | PALETTE = sns.crayon_palette( 10 | ( 11 | "Eggplant,Aquamarine,Jungle Green,Atomic Tangerine,Blue Bell,Wisteria," 12 | + "Banana Mania,Blue Violet,Carnation Pink,Cerise" 13 | ).split(",") 14 | ) # list(sns.crayons)[:10]) 15 | COLORMAP = matplotlib.colors.ListedColormap(PALETTE) 16 | NORM = matplotlib.colors.Normalize(vmin=0, vmax=9) 17 | -------------------------------------------------------------------------------- /kaggle_arc/operations/basic.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | import numpy as np 6 | 7 | from base.field import Field 8 | 9 | 10 | def func_identity(x): 11 | return x 12 | 13 | 14 | def const_func_wrapper(i): 15 | return lambda x: i 16 | 17 | 18 | candidate_functions = [ 19 | func_identity, 20 | ] + [const_func_wrapper(i) for i in range(10)] 21 | 22 | 23 | class Operation: 24 | def __call__(self, data): 25 | pass 26 | 27 | 28 | class Replace(Operation): 29 | def __init__(self, replacements): 30 | # replacements is an array with 10 elements - some permutation of numbers 0..9 31 | self.replacements = replacements 32 | self.repl_func = np.vectorize(lambda x: self.replacements[x]) 33 | 34 | def __call__(self, data): 35 | c = data.copy() 36 | c = self.repl_func(c) 37 | return c # Field(c) 38 | 39 | 40 | class Repaint(Operation): 41 | def __init__(self, input_data): 42 | data = np.unique(input_data, return_counts=True) 43 | s = sorted(zip(*data), key=lambda x: x[1], reverse=True) 44 | self.replacements = [x for x, y in s] 45 | 46 | def build_replacements_dict(self, data, filter_zero=False): 47 | replacements = self.replacements 48 | if filter_zero: 49 | replacements = [k for k in self.replacements if k != 0] 50 | data = [d for d in data if d != 0] 51 | repl_dict = dict(list(zip(data, replacements))) 52 | return repl_dict 53 | 54 | def __call__(self, input_data): 55 | data = np.unique(input_data, return_counts=True) 56 | data = sorted(zip(*data), key=lambda x: x[1], reverse=True) 57 | data = [x for x, y in data] 58 | replacements = self.build_replacements_dict(data, filter_zero=True) 59 | # print(replacements) 60 | if len(replacements) < 1: 61 | return input_data 62 | repl_coords = {k: np.where(input_data == k) for k in replacements} 63 | result = input_data.copy() 64 | for k, (x, y) in repl_coords.items(): 65 | c = replacements[k] 66 | result[x, y] = c 67 | return result 68 | -------------------------------------------------------------------------------- /kaggle_arc/operations/field2point.py: -------------------------------------------------------------------------------- 1 | """ 2 | First we define methods for different field to color conversion operations. 3 | """ 4 | 5 | import rootutils 6 | 7 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 8 | 9 | 10 | import numpy as np 11 | 12 | from skimage.measure import label 13 | 14 | from operations.basic import Operation 15 | from base.field import * 16 | from base.iodata import * 17 | 18 | 19 | class IrreversibleOperation: 20 | def __init__(self): 21 | pass 22 | 23 | def do(self): 24 | pass 25 | 26 | 27 | def most_frequent_color(data, bg=None): 28 | # print("most frequent color") 29 | if bg is None: 30 | return np.argmax([np.sum(data == i) for i in range(10)]) 31 | s = [np.sum(data == i) for i in range(10) if i != bg] 32 | if np.sum(s) > 0: 33 | return np.argmax(s) 34 | return bg 35 | 36 | 37 | def least_frequent_color(data, bg=None): 38 | # print("least frequent color") 39 | if bg is None: 40 | s = {i: np.sum(data == i) for i in range(10)} 41 | s = [(k, v) for k, v in s.items() if v > 0] 42 | s = sorted(s, key=lambda x: x[1]) 43 | if len(s) > 0: 44 | # print(s) 45 | return s[0][0] 46 | return 0 47 | s = {i: np.sum(data == i) for i in range(10) if i != bg} 48 | s = [(k, v) for k, v in s.items() if v > 0] 49 | s = sorted(s, key=lambda x: x[1]) 50 | if len(s) > 0: 51 | return s[0][0] 52 | return bg 53 | 54 | 55 | def count_color_area(data, bg=0): 56 | # print("color area") 57 | return np.max(label(data != bg)) 58 | 59 | 60 | def count_color_area_bg(data, bg=0): 61 | # print("color area bg") 62 | return np.max(label(data == bg)) 63 | 64 | 65 | def compute_color_gradient(data): 66 | cg0 = [np.sum(data[i]) for i in range(data.shape[0])] 67 | cg1 = [np.sum(data[:, i]) for i in range(data.shape[1])] 68 | return tuple(cg0), tuple(cg1) 69 | 70 | 71 | def compute_weight_gradient(data, bg=0): 72 | return compute_color_gradient(data != bg) 73 | 74 | 75 | def count_colors(data, bg=None): 76 | # print("count colors") 77 | # print(len(np.unique(data))) 78 | return len(np.unique(data)) 79 | 80 | 81 | def make_positional_color_selector(x, y): 82 | def pos_color_selector(data, bg=None): 83 | if x >= data.shape[0] or x < -data.shape[0]: 84 | x0 = x % data.shape[0] 85 | else: 86 | x0 = x 87 | if y >= data.shape[1] or y < -data.shape[1]: 88 | y0 = y % data.shape[1] 89 | else: 90 | y0 = y 91 | return data.data[x0, y0] 92 | 93 | return pos_color_selector 94 | 95 | 96 | class SimpleSummarizeOperation(IrreversibleOperation): 97 | def __init__(self): 98 | self.bg = None 99 | self.func = None # lambda x, bg=0: x 100 | 101 | def train(self, iodata_list): 102 | if isinstance(iodata_list[0], IOData): 103 | iodata_list = [(x.input_field, x.output_field) for x in iodata_list] 104 | # elif isinstance(complex_iodata_list[0][0], ComplexField): 105 | # complex_iodata_list = [ 106 | # (xi, xo) for i, o in complex_iodata_list 107 | # for xi, xo in zip(i.flat_iter(), o.flat_iter())] 108 | candidates = [ 109 | most_frequent_color, 110 | least_frequent_color, 111 | count_color_area, 112 | count_color_area_bg, 113 | count_colors, 114 | ] 115 | color_dict = { 116 | tuple((i.data == i.data[0, 0]).flatten()): o.data[0, 0] 117 | for i, o in iodata_list 118 | } 119 | # candidates.append( 120 | # lambda x, bg: color_dict.get(tuple((x.data == x.data[0, 0]).flatten()), 0) 121 | # ) 122 | # candidates.extend([]) 123 | 124 | h, w = list(zip(*[i.shape for i, o in iodata_list])) 125 | hmin = np.min(h) 126 | wmin = np.min(w) 127 | for i in range(hmin): 128 | for j in range(wmin): 129 | func = [ 130 | make_positional_color_selector(i, j), 131 | make_positional_color_selector(i, -j), 132 | make_positional_color_selector(-i, j), 133 | make_positional_color_selector(-i, -j), 134 | ] 135 | candidates.extend(func) 136 | 137 | best_candidate = candidates[0] 138 | best_bg = dict() 139 | best_score = 0 140 | 141 | for candidate in candidates: 142 | best_bg[candidate] = None 143 | candidate_score = 0 144 | # candidate_bg = None 145 | scores = [] 146 | best_sample_score = 0 147 | for bg in list(range(10)) + [None]: 148 | score = [ 149 | Field.score(Field([[candidate(i.data, bg=bg)]]), o) 150 | for i, o in iodata_list 151 | ] 152 | mean_score = np.mean(score) 153 | if mean_score > best_sample_score: 154 | best_sample_score = mean_score 155 | best_bg[candidate] = [bg] 156 | candidate_score = best_sample_score 157 | # print(candidate_score, best_bg) 158 | # print(candidate_score) 159 | # best_bg[candidate_score] = (candidatecandidate_bg 160 | if candidate_score > best_score: 161 | best_score = candidate_score 162 | best_candidate = candidate 163 | 164 | self.func = best_candidate 165 | self.bg = best_bg[best_candidate] 166 | # self.bg = [self.bg[k] for k in sorted(self.bg)] 167 | # print(self.bg) 168 | # most_frequent_color 169 | pass 170 | 171 | def do(self, field, bg=None): 172 | if len(self.bg) != 1: 173 | # print("use bg from param", self.bg, bg) 174 | pixel = self.func(field, bg=bg) 175 | else: 176 | # print(self.bg) 177 | pixel = self.func(field, bg=self.bg[0]) 178 | # print(pixel, self.func) 179 | # print(np.asarray(pixel)) 180 | return Field([[pixel]]) 181 | 182 | 183 | class ComplexSummarizeOperation(IrreversibleOperation): 184 | def __init__(self): 185 | self.bg = None 186 | self.func = None # lambda x, bg=0: x 187 | 188 | def train(self, complex_iodata_list): 189 | if isinstance(complex_iodata_list[0], IOData): 190 | complex_iodata_list = [ 191 | (x.input_field, x.output_field) for x in complex_iodata_list 192 | ] 193 | # elif isinstance(complex_iodata_list[0][0], ComplexField): 194 | # complex_iodata_list = [ 195 | # (xi, xo) for i, o in complex_iodata_list 196 | # for xi, xo in zip(i.flat_iter(), o.flat_iter())] 197 | candidates = [ 198 | most_frequent_color, 199 | least_frequent_color, 200 | count_color_area, 201 | count_color_area_bg, 202 | count_colors, 203 | ] 204 | best_candidate = candidates[0] 205 | best_bg = dict() 206 | best_score = 0 207 | 208 | for candidate in candidates: 209 | best_bg[candidate] = dict() 210 | candidate_score = 0 211 | # candidate_bg = None 212 | scores = [] 213 | for k, (inp, out) in enumerate(complex_iodata_list): 214 | 215 | iodata_list = list(zip(inp.flat_iter(), out.flat_iter())) 216 | # iodata_list = list(zip([x for xs in inp for x in xs], [x for xs in out for x in xs])) 217 | best_sample_score = 0 218 | for bg in list(range(10)) + [None]: 219 | score = [ 220 | Field.score(Field([[candidate(i.data, bg=bg)]]), o) 221 | for i, o in iodata_list 222 | ] 223 | mean_score = np.mean(score) 224 | if mean_score > best_sample_score: 225 | best_sample_score = mean_score 226 | best_bg[candidate][k] = bg 227 | scores.append(best_sample_score) 228 | candidate_score = np.mean(scores) 229 | # print(candidate_score, best_bg) 230 | # print(candidate_score) 231 | # best_bg[candidate_score] = (candidatecandidate_bg 232 | if candidate_score > best_score: 233 | best_score = candidate_score 234 | best_candidate = candidate 235 | 236 | self.func = best_candidate 237 | self.bg = best_bg[best_candidate] 238 | self.bg = [self.bg[k] for k in sorted(self.bg)] 239 | bg = set() 240 | for k in self.bg: 241 | bg.add(k) 242 | self.bg = list(bg) 243 | # print(self.bg) 244 | # most_frequent_color 245 | pass 246 | 247 | def do(self, field, bg=None): 248 | if len(self.bg) != 1: 249 | # print("use bg from param", self.bg, bg) 250 | pixel = self.func(field, bg=bg) 251 | else: 252 | # print(self.bg) 253 | pixel = self.func(field, bg=self.bg[0]) 254 | # print(pixel, self.func) 255 | # print(np.asarray(pixel)) 256 | return Field([[pixel]]) 257 | -------------------------------------------------------------------------------- /kaggle_arc/operations/resizing.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | import numpy as np 6 | from operations.basic import Operation 7 | 8 | 9 | class Repeater(Operation): 10 | def __init__(self, m1, m2): 11 | self.m1 = m1 12 | self.m2 = m2 13 | 14 | def __call__(self, data): 15 | height, width = data.shape 16 | result = np.zeros((height * self.m1, width * self.m2), dtype=data.dtype) 17 | # for offset1 in range(self.m1): 18 | # for offset2 in range(self.m2): 19 | for i in range(height): 20 | for j in range(width): 21 | result[i::height, j::width] = data[i, j] 22 | return result 23 | 24 | 25 | class Mirror(Operation): 26 | def __init__(self, m1, m2, horizontal=True, vertical=True): 27 | self.m1 = m1 28 | self.m2 = m2 29 | self.horizontal = horizontal 30 | self.vertical = vertical 31 | 32 | def __call__(self, data): 33 | height, width = data.shape 34 | result = np.zeros((height * self.m1, width * self.m2), dtype=data.dtype) 35 | # for offset1 in range(self.m1): 36 | # for offset2 in range(self.m2): 37 | for i in range(height): 38 | for j in range(width): 39 | result[i::height, j::width] = data[i, j] 40 | if self.vertical: 41 | result[height + i :: 2 * height, j::width] = data[height - 1 - i, j] 42 | if self.horizontal: 43 | result[i::height, width + j :: 2 * width] = data[i, width - 1 - j] 44 | if self.horizontal and self.vertical: 45 | result[height + i :: 2 * height, width + j :: 2 * width] = data[ 46 | height - 1 - i, width - 1 - j 47 | ] 48 | return result 49 | 50 | 51 | class Resizer(Operation): 52 | def __init__(self, m1, m2): 53 | self.m1 = m1 54 | self.m2 = m2 55 | 56 | def __call__(self, data): 57 | height, width = data.shape 58 | result = np.zeros((height * self.m1, width * self.m2), dtype=data.dtype) 59 | for i in range(height): 60 | for j in range(width): 61 | result[ 62 | i * self.m1 : (i + 1) * self.m1, j * self.m2 : (j + 1) * self.m2 63 | ] = data[i, j] 64 | return result 65 | 66 | 67 | class Fractal(Operation): 68 | def __init__(self, m1, m2): 69 | self.m1 = m1 70 | self.m2 = m2 71 | 72 | def __call__(self, data): 73 | height, width = data.shape 74 | result = np.zeros((height * self.m1, width * self.m2), dtype=data.dtype) 75 | for i in range(height): 76 | for j in range(width): 77 | result[i::height, j::width] = data[i, j] 78 | for i in range(height): 79 | for j in range(width): 80 | if data[i, j] == 0: 81 | result[ 82 | i * self.m1 : (i + 1) * self.m1, j * self.m2 : (j + 1) * self.m2 83 | ] = 0 # field.data[i, j] 84 | return result 85 | -------------------------------------------------------------------------------- /kaggle_arc/operations/reversible.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | import numpy as np 6 | 7 | from base.field import * 8 | from base.iodata import * 9 | from operations.basic import candidate_functions 10 | 11 | 12 | class ReversibleOperation: 13 | def __init__(self): 14 | pass 15 | 16 | def do(self, field): 17 | pass 18 | 19 | def od(self, field): 20 | pass 21 | 22 | 23 | def split2shape(field, target_shape, hsep=0, wsep=0, outer_sep=False): 24 | h, w = target_shape 25 | fh, fw = field.shape 26 | if outer_sep: 27 | hpart = (fh - hsep) // h - hsep 28 | wpart = (fw - wsep) // w - wsep 29 | else: 30 | hpart = (fh + hsep) // h - hsep 31 | wpart = (fw + wsep) // w - wsep 32 | 33 | splitted = [] 34 | splitters = np.ones(field.data.shape, dtype=field.dtype) 35 | for i in range(h): 36 | line = [] 37 | hstart = outer_sep * hsep + i * (hpart + hsep) 38 | for j in range(w): 39 | wstart = outer_sep * wsep + j * (wpart + wsep) 40 | subfield = Field( 41 | field.data[hstart : hstart + hpart, wstart : wstart + wpart] 42 | ) 43 | line.append(subfield) 44 | splitters[hstart : hstart + hpart, wstart : wstart + wpart] = 0 45 | splitted.append(line) 46 | cf = ComplexField(splitted, separator=splitters * field.data, splitter=splitters) 47 | return cf 48 | 49 | 50 | def split_by_shape(field, subshape, hsep=0, wsep=0, outer_sep=False): 51 | h, w = subshape 52 | fh, fw = field.shape 53 | # hpart = (fh - outer_sep*hsep) // h - hsep 54 | # wpart = (fw - outer_sep*wsep) // w - wsep 55 | 56 | splitted = [] 57 | splitters = np.ones(field.data.shape, dtype=field.dtype) 58 | 59 | for i in range(outer_sep * 1, fh, h + hsep): 60 | line = [] 61 | sep_line = [] 62 | for j in range(outer_sep * 1, fw, w + wsep): 63 | subfield = Field(field.data[i : i + h, j : j + w]) 64 | line.append(subfield) 65 | splitters[i : i + h, j : j + w] = 0 66 | # sep_line.append(Field(field.data[i + h:i+h+hsep])) 67 | splitted.append(line) 68 | sep = splitters * field.data 69 | cf = ComplexField(splitted, separator=sep, splitter=splitters) 70 | return cf 71 | 72 | 73 | def collect_field(multifield, hsep=0, wsep=0, outer_sep=False, sep_color=0): 74 | all_lines = [] 75 | for line in multifield.data: 76 | line_data = [] 77 | shape = list({x.shape for x in line})[0] 78 | sep = np.ones((shape[0], wsep), dtype=np.uint8) * sep_color 79 | if outer_sep and hsep > 0: 80 | line_data.append(sep) 81 | for x in line: 82 | line_data.append(x.data) 83 | if wsep > 0: 84 | line_data.append(sep) 85 | if not outer_sep and wsep > 0: 86 | line_data = line_data[:-1] 87 | line_data = np.concatenate( 88 | line_data, 1 89 | ) # np.concatenate([x.data for x in line], 1) 90 | all_lines.append(line_data) 91 | # collect all line parts 92 | shape = list({x.shape for x in all_lines})[0] 93 | sep = np.ones((hsep, shape[1])) * sep_color 94 | line_data = [] 95 | if outer_sep: 96 | line_data.append(sep) 97 | for l in all_lines: 98 | line_data.append(l) 99 | if hsep > 0: 100 | line_data.append(sep) 101 | if not outer_sep and hsep > 0: 102 | line_data = line_data[:-1] 103 | all_lines = np.concatenate(line_data, 0) 104 | return Field(all_lines) 105 | 106 | 107 | def increase2shape(data, target_shape): 108 | h, w = target_shape 109 | line = np.concatenate([data for i in range(w)], 1) 110 | d = np.concatenate([line for j in range(h)], 0) 111 | return d 112 | 113 | 114 | def decrease2color(data, background=0): 115 | colors, counts = np.unique(data[np.where(data != background)], return_counts=True) 116 | if len(colors) < 1: 117 | return background 118 | return colors[0] 119 | 120 | 121 | class ReversibleSplit(ReversibleOperation): 122 | def __init__( 123 | self, 124 | shape, 125 | hsep=0, 126 | wsep=0, 127 | outer_sep=False, 128 | sep_color=0, 129 | parent=None, 130 | splitter_func=split2shape, 131 | ): 132 | self.shape = shape 133 | self.hsep = hsep 134 | self.wsep = wsep 135 | self.outer_sep = outer_sep 136 | self.sep_color = sep_color 137 | parent = (None,) 138 | self.splitter_func = splitter_func 139 | 140 | def do(self, field): 141 | splitted = self.splitter_func( 142 | field, self.shape, hsep=self.hsep, wsep=self.wsep, outer_sep=self.outer_sep 143 | ) 144 | return splitted 145 | 146 | def od(self, multifield): 147 | field = collect_field( 148 | multifield, 149 | hsep=self.hsep, 150 | wsep=self.wsep, 151 | outer_sep=self.outer_sep, 152 | sep_color=self.sep_color, 153 | ) 154 | return field 155 | 156 | def __str__(self): 157 | return f"ReversibleSplit({self.shape})" 158 | 159 | 160 | class ReversibleCombine(ReversibleOperation): 161 | def __init__( 162 | self, 163 | shape, 164 | hsep=0, 165 | wsep=0, 166 | outer_sep=False, 167 | sep_color=0, 168 | parent=None, 169 | splitter_func=split2shape, 170 | ): 171 | self.shape = shape 172 | self.hsep = hsep 173 | self.wsep = wsep 174 | self.outer_sep = outer_sep 175 | self.sep_color = sep_color 176 | self.splitter_func = splitter_func 177 | self.color_func = None 178 | self.parent = None 179 | 180 | def train(self, io_list): 181 | # todo: correctly process case when there is no splitter 182 | get_color = lambda m: np.unique( 183 | m.params["separator"][np.where(m.params["splitter"])] 184 | )[0] 185 | pairs = [(m, self.od(output_field)) for m, output_field in io_list] 186 | color_pairs = [(get_color(m), get_color(o)) for m, o in pairs] 187 | scores = [] 188 | for func in candidate_functions: 189 | score = np.mean([func(i) == o for i, o in color_pairs]) 190 | scores.append((-score, func)) 191 | scores = sorted(scores, key=lambda x: x[0]) 192 | score, score_func = scores[0] 193 | if score > -1: 194 | color_dict = dict(color_pairs) 195 | score_func = lambda x: color_dict.get(x, 0) 196 | self.color_func = score_func 197 | 198 | def do(self, multifield): 199 | if self.hsep > 0 or self.wsep > 0: 200 | colors = np.unique( 201 | multifield.params["separator"][np.where(multifield.params["splitter"])] 202 | ) 203 | else: 204 | colors = [] 205 | sep_color = self.sep_color 206 | if len(colors) > 0 and self.color_func is not None: 207 | sep_color = self.color_func(colors[0]) 208 | 209 | field = collect_field( 210 | multifield, 211 | hsep=self.hsep, 212 | wsep=self.wsep, 213 | outer_sep=self.outer_sep, 214 | sep_color=sep_color, 215 | ) 216 | return field 217 | 218 | def od(self, field): 219 | splitted = self.splitter_func( 220 | field, self.shape, hsep=self.hsep, wsep=self.wsep, outer_sep=self.outer_sep 221 | ) 222 | return splitted 223 | 224 | def __str__(self): 225 | return f"ReversibleCombine({self.shape})" 226 | 227 | 228 | class WrappedOperation: 229 | def __init__(self, preprocess=None, postprocess=None): 230 | self.preprocess = preprocess 231 | self.postprocess = postprocess 232 | 233 | def wrap(self, iodata): 234 | if isinstance(iodata, IOData): 235 | i = iodata.input_field 236 | o = iodata.output_field 237 | else: 238 | i, o = iodata 239 | forward_i = self.preprocess.do(i) 240 | if self.postprocess is None: 241 | reverse_o = o 242 | else: 243 | reverse_o = self.postprocess.od(o) 244 | return forward_i, reverse_o 245 | 246 | def train(self, iodata_list): 247 | # TODO: need to implement this 248 | data = [ 249 | (self.preprocess.do(iodata.input_field), iodata.output_field) 250 | for iodata in iodata_list 251 | ] 252 | if self.postprocess is not None: 253 | self.postprocess.train(data) 254 | 255 | def run(self, field, prev=lambda x: x): 256 | x = self.preprocess.do(field) 257 | if self.postprocess is None: 258 | op = prev 259 | else: 260 | if prev is None: 261 | op = lambda t: self.postprocess.do(t) 262 | else: 263 | op = lambda t: prev(self.postprocess.do(t)) 264 | return x, op 265 | 266 | 267 | class WrappedOperationList: 268 | def __init__(self, operations): 269 | self.operations = operations 270 | pass 271 | 272 | def train(self, iodata_list): 273 | il = iodata_list 274 | for op in self.operations: 275 | op.train(il) 276 | il = [op.wrap(io) for io in il] 277 | pass 278 | 279 | def wrap(self, iodata): 280 | if isinstance(iodata, IOData): 281 | i = iodata.input_field 282 | o = iodata.output_field 283 | x = (i, o) 284 | else: 285 | x = iodata 286 | for op in self.operations: 287 | x = op.wrap(x) 288 | return x 289 | 290 | def run(self, field, prev=lambda x: x): 291 | x = field 292 | prev = None 293 | for op in operations: 294 | x, prev = op.run(x, prev) 295 | return x, prev 296 | -------------------------------------------------------------------------------- /kaggle_arc/operations/subpatterns.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for subpattern extraction 3 | """ 4 | 5 | import rootutils 6 | 7 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 8 | 9 | import numpy as np 10 | 11 | 12 | def get_suffixes(s, wildcard=0): 13 | suffix_len = 0 14 | m = len(s) 15 | suffixes = [0 for i in range(m)] 16 | i = 1 17 | while i < m: 18 | if s[i] == wildcard or s[suffix_len] == wildcard or s[i] == s[suffix_len]: 19 | suffix_len += 1 20 | suffixes[i] = suffix_len 21 | i += 1 22 | elif suffix_len != 0: 23 | suffix_len = suffixes[suffix_len - 1] 24 | else: 25 | suffixes[i] = 0 26 | i += 1 27 | return suffixes 28 | 29 | 30 | def get_repeat_length(suffixes): 31 | n = len(suffixes) 32 | k = suffixes[-1] 33 | if k < n - k: 34 | return n 35 | return n - k 36 | 37 | 38 | def check_subpattern(data, r, c, wildcard=0): 39 | for line in data: 40 | condition = np.all( 41 | [x == y or x == wildcard or y == wildcard for x, y in zip(line, line[c:])] 42 | ) 43 | if not condition: 44 | return False 45 | for line in data.T[:c]: 46 | condition = np.all( 47 | [x == y or x == wildcard or y == wildcard for x, y in zip(line, line[c:])] 48 | ) 49 | if not condition: 50 | return False 51 | return True 52 | 53 | 54 | def get_subpattern(data, wildcard=0, check_passed=True): 55 | repeats = [] 56 | for line in data: 57 | s = get_suffixes(line, wildcard) 58 | r = get_repeat_length(s) 59 | repeats.append(r) 60 | # print(repeats) 61 | if check_passed: 62 | col = int(np.median(repeats)) 63 | else: 64 | col = np.lcm.reduce(repeats) 65 | # print(col) 66 | crepeats = [] 67 | if check_passed: 68 | subset = data.T 69 | else: 70 | subset = data.T[:col] 71 | for line in subset: 72 | s = get_suffixes(line, wildcard) 73 | r = get_repeat_length(s) 74 | if check_passed: 75 | if r == len(s): 76 | continue 77 | crepeats.append(r) 78 | # print(crepeats) 79 | if check_passed: 80 | if len(crepeats) == 0: 81 | row = len(data) 82 | else: 83 | row = int(np.median(crepeats)) 84 | else: 85 | row = np.lcm.reduce(crepeats) 86 | return row, col 87 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/__init__.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | from predictors.basic import IdPredictor, ZerosPredictor, ConstPredictor, FillPredictor 6 | from predictors.complex import ComplexPredictor 7 | from predictors.color_counting import ColorCountingPredictor 8 | from predictors.shapes import ( 9 | RepeatingPredictor, 10 | FractalPredictor, 11 | ResizingPredictor, 12 | MirrorPredictor, 13 | ConstantShaper, 14 | ) 15 | from predictors.boosting_tree import ( 16 | BoostingTreePredictor, 17 | BoostingTreePredictor2, 18 | BoostingTreePredictor3, 19 | ) 20 | from predictors.convolution import ConvolutionPredictor 21 | from predictors.graph_boosting_tree import ( 22 | GraphBoostingTreePredictor, 23 | GraphBoostingTreePredictor2, 24 | GraphBoostingTreePredictor3, 25 | ) 26 | from predictors.decision_tree import AugmentedPredictor 27 | from predictors.subpattern import SubpatternMatcherPredictor 28 | from predictors.connector import PointConnectorPredictor 29 | # from predictors.cf_combinator import WrappedCFPredictor 30 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/availability_mixins.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | import numpy as np 5 | import skimage.measure as sk_measure 6 | from fractions import Fraction 7 | 8 | from utils import check_if_can_be_mirrored 9 | 10 | class AvailableAll: 11 | def is_available(self, iodata_list): 12 | return True 13 | 14 | 15 | class AvailableEqualShape: 16 | def is_available(self, iodata_list): 17 | for iodata in iodata_list: 18 | if iodata.input_field.shape != iodata.output_field.shape: 19 | return False 20 | return True 21 | 22 | 23 | class AvailableShape2Point: 24 | def is_available(self, iodata_list): 25 | for iodata in iodata_list: 26 | if iodata.output_field.shape != (1, 1): 27 | return False 28 | return True 29 | 30 | 31 | class AvailableShape2PointOrConstColor: 32 | def is_available(self, iodata_list): 33 | for iodata in iodata_list: 34 | if iodata.output_field.shape != (1, 1): 35 | if len(np.unique(iodata.output_field.data)) != 1: 36 | return False 37 | return True 38 | 39 | 40 | class AvailableEqualShapeAndMaxNColors: 41 | def is_available(self, iodata_list, n_colors=4): 42 | for iodata in iodata_list: 43 | if iodata.input_field.shape != iodata.output_field.shape: 44 | return False 45 | if len(np.unique(iodata.input_field.data)) > n_colors: 46 | return False 47 | if len(np.unique(iodata.output_field.data)) > n_colors: 48 | return False 49 | return True 50 | 51 | 52 | class AvailableWithIntMultiplier: 53 | def is_available(self, iodata_list): 54 | all_sizes = set() 55 | for iodata in iodata_list: 56 | m1 = iodata.output_field.height // iodata.input_field.height 57 | m2 = iodata.output_field.width // iodata.input_field.width 58 | all_sizes.add((m1, m2)) 59 | if len(all_sizes) == 1: 60 | h, w = all_sizes.pop() 61 | if w > 1 and h > 1: 62 | self.m1 = h 63 | self.m2 = w 64 | return True 65 | return False 66 | 67 | 68 | class AvailableWithFractionalMultiplier: 69 | def is_available(self, iodata_list): 70 | all_sizes = set() 71 | for iodata in iodata_list: 72 | m1 = Fraction(iodata.output_field.height, iodata.input_field.height) 73 | m2 = Fraction(iodata.output_field.width, iodata.input_field.width) 74 | all_sizes.add((m1, m2)) 75 | if len(all_sizes) == 1: 76 | h, w = all_sizes.pop() 77 | self.m1 = h 78 | self.m2 = w 79 | return True 80 | return False 81 | 82 | 83 | class AvailableMirror(AvailableWithIntMultiplier): 84 | def is_available(self, iodata_list): 85 | availability_check = AvailableWithIntMultiplier() 86 | # print(isinstance(self, AvailableMirror)) 87 | if not availability_check.is_available(iodata_list): 88 | # print(11) 89 | return False 90 | self.m1 = availability_check.m1 91 | self.m2 = availability_check.m2 92 | results = set() 93 | for iodata in iodata_list: 94 | h, w = iodata.input_field.shape 95 | res = check_if_can_be_mirrored(iodata.output_field.data, h=h, w=w) 96 | # print(res) 97 | if res is None: 98 | return False 99 | results.add(res) 100 | (vertical, horizontal) = results.pop() 101 | if len(results) > 0: 102 | return False 103 | self.vertical = vertical 104 | self.horizontal = horizontal 105 | return True 106 | 107 | 108 | class AvailableEqualShapeAndLessThanNComponents: 109 | def is_available(self, iodata_list, n_components=10): 110 | for iodata in iodata_list: 111 | if iodata.input_field.shape != iodata.output_field.shape: 112 | return False 113 | for iodata in iodata_list: 114 | region_labels = sk_measure.label(iodata.input_field.data) 115 | max_region_id = np.max(region_labels) 116 | if max_region_id > n_components: 117 | return False 118 | return True -------------------------------------------------------------------------------- /kaggle_arc/predictors/basic.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | import numpy as np 6 | from itertools import islice 7 | # from fractions import Fraction 8 | 9 | from base.iodata import IOData 10 | from base.field import Field 11 | # from utils import check_if_can_be_mirrored 12 | from operations.subpatterns import get_subpattern 13 | from operations.subpatterns import check_subpattern 14 | import predictors.availability_mixins as mixins 15 | 16 | class Predictor: 17 | """Base class for all predictors. 18 | 19 | Methods 20 | ------- 21 | train(iodata_list) 22 | Trains the given predictor with a list of IOData objects. 23 | Each object should have both input and output data for all samples. 24 | predict(field) 25 | For the input data stored in the variable `field`, tries to predict the output transformations. 26 | validate(iodata_list, k=3) 27 | For each of the inputs in iodata_list, predicts `k` outputs. 28 | After prediction, tries to compare them with the corresponding output and returns the final score. 29 | predict_on(cls, ds, k=3, args=[], kwargs=dict(), verbose=True, group_predictions=True) 30 | Utility method to process the dataset puzzles one by one. 31 | """ 32 | 33 | def train(self, iodata_list): 34 | pass 35 | 36 | def predict(self, field): 37 | pass 38 | 39 | def validate(self, iodata_list, k=3): 40 | if isinstance(iodata_list, IOData): 41 | ps = islice(self.predict(iodata_list.input_field), k) 42 | # print(list(ps)) 43 | scores = [Field.score(p, iodata_list.output_field) for p in ps] 44 | if len(scores) < 1: 45 | return 0.0 46 | return max(scores) 47 | 48 | scores = [] 49 | for iodata in iodata_list: 50 | score = self.validate(iodata) 51 | scores.append(score) 52 | if len(scores) < 1: 53 | return 0.0 54 | # print(scores) 55 | return np.mean(scores) 56 | 57 | def freeze_by_score(self, iodata_list, k=3): 58 | pass 59 | 60 | @classmethod 61 | def predict_on( 62 | cls, ds, k=3, args=[], kwargs=dict(), verbose=False, group_predictions=True 63 | ): 64 | for sample in ds: 65 | predictor = cls(*args, **kwargs) 66 | # if not predictor.is_available(sample): 67 | predictor.train(sample.train) 68 | predictor.freeze_by_score(sample.train) 69 | 70 | score = predictor.validate(sample.train) 71 | if score == 1 and verbose: 72 | print(predictor) 73 | 74 | predictions = [] 75 | for i, iodata in enumerate(sample.test): 76 | prediction = list(islice(predictor.predict(iodata), k)) 77 | predictions.append(prediction) 78 | if not group_predictions: 79 | yield sample.name, i, prediction 80 | if group_predictions: 81 | yield sample.name, predictions 82 | 83 | 84 | class IdPredictor(Predictor, mixins.AvailableAll): 85 | 86 | def train(self, iodata_list): 87 | pass 88 | 89 | def predict(self, field): 90 | if isinstance(field, IOData): 91 | for v in self.predict(field.input_field): 92 | yield v 93 | return 94 | # while True: 95 | yield Field(field.data) 96 | 97 | def __str__(self): 98 | return "IdPredictor()" 99 | 100 | 101 | class ZerosPredictor(Predictor, mixins.AvailableAll): 102 | def __init(self): 103 | pass 104 | 105 | def train(self, iodata_list): 106 | pass 107 | 108 | def predict(self, field): 109 | if isinstance(field, IOData): 110 | for v in self.predict(field.input_field): 111 | yield v 112 | return 113 | # while True: 114 | yield field.zeros() 115 | 116 | def __str__(self): 117 | return "ZerosPredictor()" 118 | 119 | 120 | class ConstPredictor(Predictor, mixins.AvailableAll): 121 | def __init__(self, value=1, multiplier=1): 122 | self.value = value 123 | self.multiplier = multiplier 124 | 125 | def train(self, iodata_list): 126 | pass 127 | 128 | def predict(self, field): 129 | if isinstance(field, IOData): 130 | for v in self.predict(field.input_field): 131 | yield v 132 | return 133 | # while True: 134 | yield field.consts(self.value, multiplier=self.multiplier) 135 | 136 | def __str__(self): 137 | return f"ConstPredictor(value={self.value}, multiplier={self.multiplier})" 138 | 139 | 140 | class FillPredictor(Predictor, mixins.AvailableEqualShape): 141 | def __init__(self): 142 | self.common_patch = None 143 | 144 | def train(self, iodata_list): 145 | patches = [] 146 | patch_sizes = set() 147 | 148 | for k, iodata in enumerate(iodata_list): 149 | i = iodata.input_field 150 | o = iodata.output_field 151 | (r0, c0) = get_subpattern(i.data, check_passed=False) 152 | (r1, c1) = get_subpattern(o.data, check_passed=False) 153 | # print(k, r0, c0, r1, c1) 154 | if check_subpattern(i.data, r1, c1): 155 | patch = self.get_patch(i.data, r1, c1, allow_zeros=True) 156 | # print(patch) 157 | # print(patch) 158 | patches.append(patch) 159 | patch_sizes.add((r1, c1)) 160 | # print(r1,c1) 161 | # self.common_patch = patch 162 | if len(patch_sizes) == 1: 163 | self.common_patch = self.collect_patches(patches) 164 | 165 | def collect_patches(self, patches): 166 | # print(patches) 167 | common_patch = np.zeros(patches[0].shape, dtype=patches[0].dtype) 168 | for p in patches: 169 | for i in range(p.shape[0]): 170 | for j in range(p.shape[1]): 171 | if common_patch[i, j] == 0: 172 | common_patch[i, j] = p[i, j] 173 | elif common_patch[i, j] != p[i, j]: 174 | return None 175 | return common_patch 176 | 177 | def get_patch(self, data, r, c, allow_zeros=False): 178 | res = np.zeros((r, c), dtype=data.dtype) 179 | for i in range(r): 180 | for j in range(c): 181 | values = data[i::r, j::c] 182 | values = [v for v in np.unique(values) if v != 0] 183 | 184 | # if len(values) != 1: 185 | # return None 186 | if len(values) == 1: 187 | res[i, j] = values[0] 188 | return res 189 | 190 | def predict(self, field): 191 | if isinstance(field, IOData): 192 | for v in self.predict(field.input_field): 193 | yield v 194 | return 195 | (r, c) = get_subpattern(field.data, wildcard=0) 196 | if self.common_patch is not None: 197 | patch = self.common_patch 198 | else: 199 | patch = self.get_patch(field.data, r, c, True) 200 | # print(patch) 201 | if patch is None or np.any(patch == 0): 202 | yield Field(field.data) 203 | return 204 | result = field.data.copy() 205 | coords = np.where(result == 0) 206 | for x, y in zip(*coords): 207 | result[x, y] = patch[x % r, y % c] 208 | yield Field(result) 209 | 210 | def __str__(self): 211 | return "FillPredictor()" 212 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/color_counting.py: -------------------------------------------------------------------------------- 1 | """ 2 | Predictor based on this notebook 3 | https://www.kaggle.com/szabo7zoltan/colorandcountingmoduloq 4 | """ 5 | 6 | import rootutils 7 | 8 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 9 | 10 | import numpy as np 11 | 12 | from base.iodata import IOData 13 | from base.field import Field 14 | from predictors.basic import Predictor 15 | import predictors.availability_mixins as mixins 16 | 17 | def get_p1_p2(i, j, n, k, v, q1, q2): 18 | if v == 0 or v == 2: 19 | p1 = i % q1 20 | else: 21 | p1 = (n - 1 - i) % q1 22 | if v == 0 or v == 3: 23 | p2 = j % q2 24 | else: 25 | p2 = (k - 1 - j) % q2 26 | return p1, p2 27 | 28 | 29 | class ColorCountingPredictor(Predictor, mixins.AvailableEqualShape): 30 | def __init__(self): 31 | self.best_Dict = None 32 | self.best_Q1 = -1 33 | self.best_Q2 = -1 34 | self.best_v = -1 35 | 36 | def train(self, iodata_list): 37 | pairs = [ 38 | (Q1, Q2) 39 | for t in range(15) 40 | for Q1 in range(1, 8) 41 | for Q2 in range(1, 8) 42 | if Q1 + Q2 == t 43 | ] 44 | h, w = list(zip(*[iodata.input_field.shape for iodata in iodata_list])) 45 | hmax = max(h) 46 | wmax = max(w) 47 | pairs = [(Q1, Q2) for Q1, Q2 in pairs if Q1 < hmax and Q2 < wmax] 48 | possible = True 49 | for Q1, Q2 in pairs: 50 | for v in range(4): 51 | if self.best_Dict is not None: 52 | return 53 | possible = True 54 | Dict = {} 55 | for iodata in iodata_list: 56 | (n, k) = iodata.input_field.shape 57 | for i in range(n): 58 | for j in range(k): 59 | p1, p2 = get_p1_p2(i, j, n, k, v, Q1, Q2) 60 | color1 = iodata.input_field.data[i, j] 61 | color2 = iodata.output_field.data[i, j] 62 | if color1 != color2: 63 | rule = (p1, p2, color1) 64 | if rule not in Dict: 65 | Dict[rule] = color2 66 | elif Dict[rule] != color2: 67 | possible = False 68 | if not possible: 69 | continue 70 | for iodata in iodata_list: 71 | (n, k) = iodata.input_field.shape 72 | for i in range(n): 73 | for j in range(k): 74 | p1, p2 = get_p1_p2(i, j, n, k, v, Q1, Q2) 75 | color1 = iodata.input_field.data[i, j] 76 | rule = (p1, p2, color1) 77 | if rule in Dict: 78 | color2 = 0 + Dict[rule] 79 | else: 80 | color2 = 0 + iodata.output_field.data[i, j] 81 | if color2 != iodata.output_field.data[i, j]: 82 | possible = False 83 | break 84 | if not possible: 85 | break 86 | if not possible: 87 | break 88 | if possible: 89 | self.best_Dict = Dict 90 | self.best_Q1 = Q1 91 | self.best_Q2 = Q2 92 | self.best_v = v 93 | return 94 | pass 95 | 96 | def predict(self, field): 97 | if isinstance(field, IOData): 98 | for v in self.predict(field.input_field): 99 | yield v 100 | return 101 | # while True: 102 | if self.best_Dict is None: 103 | return 104 | 105 | n, k = field.shape 106 | answer = np.zeros(field.shape, dtype=field.dtype) 107 | for i in range(n): 108 | for j in range(k): 109 | p1, p2 = get_p1_p2(i, j, n, k, self.best_v, self.best_Q1, self.best_Q2) 110 | color1 = field.data[i, j] 111 | rule = (p1, p2, color1) 112 | answer[i, j] = self.best_Dict.get(rule, color1) 113 | yield Field(answer) 114 | # yield field.consts(self.value, multiplier=self.multiplier) 115 | 116 | def __str__(self): 117 | if self.best_Dict is None: 118 | return "ColorCountingPredictor(undefined)" 119 | return f"ColorCountingPredictor({self.best_Dict})" 120 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/complex.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | import numpy as np 6 | from itertools import islice 7 | 8 | from base.field import Field 9 | from predictors.basic import Predictor 10 | # from predictors.basic import AvailableAll 11 | from predictors.basic import IdPredictor, ZerosPredictor, ConstPredictor, FillPredictor 12 | 13 | from predictors.color_counting import ColorCountingPredictor 14 | from predictors.shapes import ( 15 | RepeatingPredictor, 16 | FractalPredictor, 17 | ResizingPredictor, 18 | MirrorPredictor, 19 | ConstantShaper, 20 | ) 21 | from predictors.boosting_tree import ( 22 | BoostingTreePredictor, 23 | BoostingTreePredictor2, 24 | BoostingTreePredictor3, 25 | ) 26 | from predictors.convolution import ConvolutionPredictor 27 | from predictors.graph_boosting_tree import ( 28 | GraphBoostingTreePredictor, 29 | GraphBoostingTreePredictor2, 30 | GraphBoostingTreePredictor3, 31 | ) 32 | from predictors.decision_tree import AugmentedPredictor 33 | from predictors.subpattern import SubpatternMatcherPredictor 34 | from predictors.field2point import SimpleSummarizePredictor 35 | from predictors.connector import * 36 | import predictors.availability_mixins as mixins 37 | 38 | 39 | class ComplexPredictor(Predictor, mixins.AvailableAll): 40 | def __init__(self, predictor_classes, verbose=False): 41 | self.predictors = [] 42 | for data in predictor_classes: 43 | if isinstance(data, tuple): 44 | if len(data) == 3: 45 | cls, args, kwargs = data 46 | else: 47 | cls, args = data 48 | kwargs = dict() 49 | else: 50 | cls = data 51 | args = [] 52 | kwargs = dict() 53 | self.predictors.append(cls(*args, **kwargs)) 54 | self.verbose = verbose 55 | 56 | def train(self, iodata_list): 57 | self.predictors = [p for p in self.predictors if p.is_available(iodata_list)] 58 | invalid_predictors = set() 59 | for i, p in enumerate(self.predictors): 60 | try: 61 | p.train(iodata_list) 62 | except Exception as e: 63 | if self.verbose: 64 | print(e) 65 | invalid_predictors.add(i) 66 | self.predictors = [ 67 | p for i, p in enumerate(self.predictors) if i not in invalid_predictors 68 | ] 69 | 70 | def validate(self, iodata_list, k=3): 71 | if len(self.predictors) == 0: 72 | return 0.0 73 | scores = [] 74 | for iodata in iodata_list: 75 | pred_scores = [] 76 | for res in islice(self.predict(iodata.input_field), k): 77 | score = Field.score(res, iodata.output_field) 78 | pred_scores.append(score) 79 | scores.append(max(pred_scores)) 80 | # for p in self.predictors[:3]: 81 | # score = p.validate(iodata_list) 82 | # scores.append(score) 83 | if len(scores) == 0: 84 | return 0.0 85 | return np.mean(scores) 86 | 87 | def freeze_by_score(self, iodata_list, k=3): 88 | scores = [] 89 | for p in self.predictors: 90 | score = 0 91 | try: 92 | p.freeze_by_score(iodata_list, k=k) 93 | score = p.validate(iodata_list, k=k) 94 | except: 95 | score = -1 96 | scores.append(score) 97 | scores = np.asarray(scores) 98 | # scores = scores[np.argwhere(scores>0)] 99 | ids = np.argsort(scores)[::-1] 100 | self.predictors = [self.predictors[i] for i in ids if scores[i] >= 0] 101 | 102 | def predict(self, field): 103 | for p in self.predictors: 104 | # if not p.is_available(sample): 105 | # continue 106 | try: 107 | for v in p.predict(field): 108 | yield v 109 | except Exception as e: 110 | if self.verbose: 111 | print(e) 112 | # continue 113 | # for p in self.predictors: 114 | # try: 115 | # v = next(p.predict(field)) 116 | # except: 117 | # continue 118 | # yield v 119 | 120 | def __str__(self): 121 | s = ";".join([str(p) for p in self.predictors]) 122 | return f"ComplexPredictor({s})" 123 | 124 | 125 | class DefaultComplexPredictor(ComplexPredictor): 126 | def __init__(self): 127 | predictor_args = [ 128 | IdPredictor, 129 | ZerosPredictor, 130 | ColorCountingPredictor, 131 | RepeatingPredictor, 132 | FractalPredictor, 133 | ResizingPredictor, 134 | GraphBoostingTreePredictor, # no impact 135 | GraphBoostingTreePredictor3, 136 | ConstantShaper, 137 | # BoostingTreePredictor, 138 | # BoostingTreePredictor2, 139 | PointConnectorPredictor, 140 | BoostingTreePredictor3, 141 | SubpatternMatcherPredictor, 142 | # AugmentedPredictor 143 | FillPredictor, 144 | MirrorPredictor, 145 | SimpleSummarizePredictor, 146 | # (ConvolutionPredictor, [], {'loss': 'mse'}), 147 | # (ConvolutionPredictor, [], {'loss': 'dice'}) 148 | ] 149 | super(DefaultComplexPredictor, self).__init__(predictor_args) 150 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/convolution.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | from itertools import product 9 | 10 | import skimage.measure as sk_measure 11 | 12 | from base.iodata import * 13 | from base.field import * 14 | from predictors.boosting_tree import BTFeatureExtractor 15 | from predictors.basic import * 16 | 17 | 18 | def DBA(inp_size, out_size, activation=torch.nn.LeakyReLU()): 19 | return nn.Sequential( 20 | nn.Conv2d(inp_size, out_size, kernel_size=3, padding=1), 21 | nn.BatchNorm2d(out_size), 22 | activation, 23 | ) 24 | 25 | 26 | class NLU(nn.Module): 27 | def __init__(self, inp_size, out_size=None, activation=torch.nn.LeakyReLU()): 28 | super().__init__() 29 | if out_size is None: 30 | out_size = inp_size // 2 31 | self.block1 = DBA(inp_size, out_size, activation) 32 | self.block2 = DBA(out_size, out_size, activation) 33 | 34 | def forward(self, x): 35 | y = self.block1(x) 36 | return torch.cat([y, self.block2(y)], 1) 37 | 38 | 39 | class StackedUnit(nn.Module): 40 | def __init__( 41 | self, 42 | input_size=2, 43 | out_size=10, 44 | n=3, 45 | activation=torch.nn.LeakyReLU(), 46 | last_activation=torch.nn.Softmax(dim=1), 47 | ): 48 | super().__init__() 49 | self.blocks = nn.ModuleList([NLU(input_size) for i in range(n)]) 50 | self.last_block = DBA(input_size, out_size, activation) 51 | 52 | def forward(self, x): 53 | y = x 54 | for block in self.blocks: 55 | y = block(y) 56 | return self.last_block(y) 57 | 58 | 59 | def filter_ones(col, split_count=1): 60 | coords = np.argwhere(col > 0).flatten() 61 | if len(coords) == 0: 62 | return col, col 63 | split0 = [] 64 | split1 = [] 65 | last_seq = [] 66 | for c in coords: 67 | if len(last_seq) < 1: 68 | last_seq.append(c) 69 | continue 70 | if last_seq[-1] + 1 == c: 71 | last_seq.append(c) 72 | continue 73 | if len(last_seq) <= split_count: 74 | split0.extend(last_seq) 75 | else: 76 | split1.extend(last_seq) 77 | last_seq = [c] 78 | if len(last_seq) > 0: 79 | if len(last_seq) <= split_count: 80 | split0.extend(last_seq) 81 | else: 82 | split1.extend(last_seq) 83 | s0 = np.zeros(col.shape) 84 | s0[split0] = 1 85 | s1 = np.zeros(col.shape) 86 | s1[split1] = 1 87 | return s0 * col, s1 * col 88 | 89 | 90 | def split_coords(data, color, split_count=1): 91 | col = np.sum(d.data == color, 0) 92 | row = np.sum(d.data == color, 1) 93 | 94 | col0, col1 = filter_ones(col, split_count=split_count) 95 | row0, row1 = filter_ones(row, split_count=split_count) 96 | return col0 * row0.reshape(-1, 1), col1 * row1.reshape(-1, 1) 97 | 98 | 99 | def dice_loss(pred, gt): 100 | def binary_dice(a, b, eps=1.0): 101 | # print(a.shape) 102 | s = torch.sum(a) + torch.sum(b) + eps 103 | if s != 0: 104 | return 2 * torch.sum(a * b) / s 105 | return None # torch.tensor() 106 | 107 | # print(pred.shape, gt.shape) 108 | res = [binary_dice(pred[:, i], gt[:, i]) for i in range(10)] 109 | res = [r for r in res if r is not None] 110 | 111 | return torch.sum(torch.stack(res)) 112 | 113 | 114 | def make_conv_features(field, nfeat=13, local_neighb=5): 115 | nrows, ncols = field.shape 116 | # feat = np.zeros((nrows*ncols, nfeat)) 117 | all_features = [] 118 | cur_idx = 0 119 | for i in range(nrows): 120 | feature_list = [] 121 | for j in range(ncols): 122 | color = field.data[i, j] 123 | features = [i, j, field.data[i, j]] 124 | features.extend( 125 | BTFeatureExtractor.get_moore_neighbours(field, i, j, nrows, ncols) 126 | ) 127 | features.extend(BTFeatureExtractor.get_tl_tr(field, i, j, nrows, ncols)) 128 | features.extend( 129 | [ 130 | len(np.unique(field.data[i, :])), 131 | len(np.unique(field.data[:, j])), 132 | # next goes count of non-zero points 133 | np.sum(field.data[i, :] > 0), 134 | np.sum(field.data[:, j] > 0), 135 | (i + j), 136 | len( 137 | np.unique( 138 | field.data[ 139 | i - local_neighb : i + local_neighb, 140 | j - local_neighb : j + local_neighb, 141 | ] 142 | ) 143 | ), 144 | ] 145 | ) 146 | 147 | # feat[cur_idx,13] 148 | features.extend( 149 | [ 150 | (i + ncols - j - 1), 151 | (i + j) % 2, 152 | (i + j + 1) % 2, 153 | # (i + ncols - j - 1) % 2 154 | # (nrows - 1 - i + ncols - j - 1), 155 | # (nrows - 1 - i + j) 156 | ] 157 | ) 158 | features.extend( 159 | [field.get(i + k, j + v) for k, v in product([-1, 0, 1], [-1, 0, 1])] 160 | ) 161 | features.extend( 162 | [ 163 | field.data[nrows - 1 - i, j], 164 | field.data[nrows - 1 - i, ncols - 1 - j], 165 | field.data[i, ncols - 1 - j], 166 | ] 167 | ) 168 | features.extend( 169 | [ 170 | field.data[i, j] != 0, 171 | np.sum( 172 | [ 173 | field.get(i + k, j + v) == color 174 | for k, v in product([-1, 1], [-1, 1]) 175 | ] 176 | ), 177 | np.sum( 178 | [ 179 | field.get(i + 1, j) == color, 180 | field.get(i - 1, j) == color, 181 | field.get(i, j + 1) == color, 182 | field.get(i, j - 1) == color, 183 | ] 184 | ), 185 | # np.sum([ field.get(i + k, j + v) == 0 186 | # for k, v in product([-1, 1], [-1, 1])]), 187 | # np.sum([ 188 | # field.get(i + 1, j) == 0, 189 | # field.get(i - 1, j) == 0, 190 | # field.get(i, j + 1) == 0, 191 | # field.get(i, j - 1) == 0 192 | # ]) 193 | ] 194 | ) 195 | feature_list.append(features) 196 | all_features.append(feature_list) 197 | 198 | feat = np.asarray(all_features) 199 | # feat = np.concatenate([ 200 | # feat, 201 | # np.stack([label(field.data==i) for i in range(10)], -1) 202 | # ], -1) 203 | return feat 204 | 205 | 206 | def make_conv_features2(field, nfeat=13, local_neighb=5): 207 | nrows, ncols = field.shape 208 | # feat = np.zeros((nrows*ncols, nfeat)) 209 | all_features = [] 210 | cur_idx = 0 211 | for i in range(nrows): 212 | feature_list = [] 213 | for j in range(ncols): 214 | color = field.data[i, j] 215 | features = [ 216 | # i, 217 | # j, 218 | field.data[i, j] 219 | ] 220 | # features.extend(get_moore_neighbours(field, i, j, nrows, ncols)) 221 | # features.extend(get_tl_tr(field, i, j, nrows, ncols)) 222 | features.extend( 223 | [ 224 | len(np.unique(field.data[i, :])), 225 | len(np.unique(field.data[:, j])), 226 | # next goes count of non-zero points 227 | # np.sum(field.data[i, :] > 0), 228 | # np.sum(field.data[:, j] > 0), 229 | (i + j), 230 | # len(np.unique(field.data[ 231 | # i-local_neighb:i+local_neighb, 232 | # j-local_neighb:j+local_neighb])) 233 | ] 234 | ) 235 | 236 | # feat[cur_idx,13] 237 | # features.extend([ 238 | # (i + ncols - j - 1), 239 | # (i + j) % 2, 240 | # (i + j + 1) % 2, 241 | # (i + ncols - j - 1) % 2, 242 | # (nrows - 1 - i + ncols - j - 1), 243 | # (nrows - 1 - i + j) 244 | # ]) 245 | features.extend( 246 | [ 247 | field.get(i + k, j + v) 248 | for k, v in product([-1, 0, 1], [-1, 0, 1]) 249 | if k != 0 or v != 0 250 | ] 251 | ) 252 | features.extend( 253 | [ 254 | field.data[nrows - 1 - i, j], 255 | field.data[nrows - 1 - i, ncols - 1 - j], 256 | field.data[i, ncols - 1 - j], 257 | ] 258 | ) 259 | features.extend( 260 | [ 261 | field.data[i, j] != 0, 262 | np.sum( 263 | [ 264 | field.get(i + k, j + v) == color 265 | for k, v in product([-1, 1], [-1, 1]) 266 | ] 267 | ), 268 | np.sum( 269 | [ 270 | field.get(i + 1, j) == color, 271 | field.get(i - 1, j) == color, 272 | field.get(i, j + 1) == color, 273 | field.get(i, j - 1) == color, 274 | ] 275 | ), 276 | np.sum( 277 | [ 278 | field.get(i + k, j + v) == 0 279 | for k, v in product([-1, 1], [-1, 1]) 280 | ] 281 | ), 282 | np.sum( 283 | [ 284 | field.get(i + 1, j) == 0, 285 | field.get(i - 1, j) == 0, 286 | field.get(i, j + 1) == 0, 287 | field.get(i, j - 1) == 0, 288 | ] 289 | ), 290 | ] 291 | ) 292 | features.extend( 293 | [ 294 | np.sum(field.data[i, :] == c) + np.sum(field.data[:, j] == c) 295 | for c in range(10) 296 | ] 297 | ) 298 | feature_list.append(features) 299 | all_features.append(feature_list) 300 | 301 | feat = np.asarray(all_features) 302 | feat = np.concatenate( 303 | [feat, np.stack([sk_measure.label(field.data == i) for i in range(10)], -1)], -1 304 | ) 305 | masks = [] 306 | for c in range(10): 307 | col = np.sum(field.data == i, 0) 308 | row = np.sum(field.data == i, 1) 309 | col0, col1 = filter_ones(col, split_count=1) 310 | row0, row1 = filter_ones(row, split_count=1) 311 | # return col0*row0.reshape(-1, 1), col1*row1.reshape(-1, 1) 312 | mask = col * row.reshape(-1, 1) 313 | masks.extend( 314 | [ 315 | col * row.reshape(-1, 1), 316 | col0 * row0.reshape(-1, 1), 317 | col1 * row1.reshape(-1, 1), 318 | ] 319 | ) 320 | 321 | masks = np.stack(masks, -1) 322 | # print(masks.shape) 323 | feat = np.concatenate([feat, masks], -1) 324 | return feat 325 | 326 | 327 | def get_nonzero_ids(iodata_list, make_conv_features=make_conv_features): 328 | zero_ids = dict() 329 | max_count = 0 330 | nfeatures = 0 331 | max_count += len(iodata_list) 332 | for iodata in iodata_list: # [sample.train, sample.test]: 333 | features = make_conv_features(iodata.input_field) 334 | nfeatures = max(nfeatures, features.shape[-1]) 335 | features = features.reshape(-1, features.shape[-1]) 336 | for i in np.argwhere(features.sum(0) > 0).flatten(): 337 | if not i in zero_ids: 338 | zero_ids[i] = 0 339 | zero_ids[i] += 1 340 | return np.asarray( 341 | [i for i in np.arange(nfeatures) if zero_ids.get(i, 0) < max_count] 342 | ) 343 | 344 | 345 | def train_on_sample(sample, cutoff=0.5, debug=False, infeatures=70): 346 | feature_ids = get_nonzero_ids(sample.train + sample.test) 347 | model = StackedUnit(len(feature_ids), 10, last_activation=nn.Softmax(dim=1)) 348 | # model = nn.Sequential( 349 | # nn.Conv2d(len(feature_ids), 128, 3, padding=1), 350 | # nn.LeakyReLU(), 351 | # nn.Conv2d(128, 64, 3, padding=1), 352 | # nn.LeakyReLU(), 353 | # nn.Conv2d(64, 32, 3, padding=1), 354 | # nn.LeakyReLU(), 355 | # #nn.Sigmoid(), 356 | # nn.Conv2d(32, 10, 3, padding=1), 357 | # # nn.Sigmoid() 358 | # nn.Softmax(dim=1) 359 | # ) 360 | loss_func = torch.nn.MSELoss() # dice_loss 361 | # print(net.parameters()) 362 | 363 | optimizer = torch.optim.Adam(model.parameters(), lr=0.01) 364 | 365 | for epoch in range(20): 366 | model.train() 367 | if debug: 368 | print("Epoch", epoch) 369 | losses = [] 370 | optimizer.zero_grad() 371 | # train_x, train_y, result = make_features(iodata_list) 372 | for iodata in sample.train: 373 | features = make_conv_features( 374 | iodata.input_field 375 | ) # .reshape(iodata.input_field.shape+(-1,)) 376 | features = features[:, :, feature_ids] 377 | features = np.moveaxis(features, -1, 0) 378 | features = features[np.newaxis, ...] 379 | i = torch.tensor(features).float() 380 | 381 | o = iodata.output_field.t_splitted() 382 | o = torch.unsqueeze(o, dim=0).float() 383 | p = model.forward(i) 384 | # print(i.is_leaf, p.is_leaf) 385 | # print(p.sum(1)) 386 | # print(features.shape) 387 | # print(o.shape, p.shape) 388 | loss = loss_func(p, o) 389 | loss.backward() 390 | losses.append(loss.item()) 391 | if debug: 392 | print(losses) 393 | # if epoch % 10 == 0: 394 | # print("zero grad") 395 | optimizer.step() 396 | 397 | if debug: 398 | print("Validation:") 399 | val_results = [] 400 | model.eval() 401 | with torch.no_grad(): 402 | scores = [] 403 | for iodata in sample.test: 404 | features = make_conv_features( 405 | iodata.input_field 406 | ) # .reshape(iodata.input_field.shape+(-1,)) 407 | features = features[:, :, feature_ids] 408 | features = np.moveaxis(features, -1, 0) 409 | features = features[np.newaxis, ...] 410 | i = torch.tensor(features).float() 411 | 412 | o = iodata.output_field.t_splitted() 413 | o = torch.unsqueeze(o, dim=0).float() 414 | p = model.forward(i) 415 | p = torch.squeeze(p, dim=0) 416 | p = Field.from_splitted(p) 417 | score = Field.score(p, iodata.output_field) 418 | scores.append(score) 419 | val_results.append((p, iodata.input_field, iodata.output_field)) 420 | if debug: 421 | print(score) 422 | p.show() 423 | iodata.output_field.show() 424 | scores = np.mean(scores) 425 | # print(scores) 426 | if scores < cutoff: 427 | return None 428 | return scores, model, val_results 429 | 430 | 431 | class ConvolutionPredictor(Predictor, mixins.AvailableEqualShape): 432 | def __init__(self, nepochs=40, loss="mse"): 433 | # self.xgb = XGBClassifier(n_estimators=25*2, booster="dart", n_jobs=-1) 434 | if loss == "mse": 435 | self.loss_func = torch.nn.MSELoss() 436 | else: 437 | self.loss_func = dice_loss 438 | # print(net.parameters()) 439 | self.nepochs = nepochs 440 | self.lr = 0.01 441 | self.debug = False 442 | 443 | def build_model(self, feature_ids): 444 | model = nn.Sequential( 445 | nn.Conv2d(len(feature_ids), 128, 3, padding=1), 446 | nn.LeakyReLU(), 447 | nn.Conv2d(128, 64, 3, padding=1), 448 | nn.LeakyReLU(), 449 | nn.Conv2d(64, 32, 3, padding=1), 450 | nn.LeakyReLU(), 451 | # nn.Sigmoid(), 452 | nn.Conv2d(32, 10, 3, padding=1), 453 | # nn.Sigmoid() 454 | nn.Softmax(dim=1), 455 | ) 456 | return model 457 | # 458 | 459 | def train(self, iodata_list): 460 | self.feature_ids = get_nonzero_ids( 461 | iodata_list, make_conv_features=make_conv_features2 462 | ) 463 | self.model = self.build_model(self.feature_ids) 464 | self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr) 465 | all_losses = [] 466 | for epoch in range(self.nepochs): 467 | self.model.train() 468 | if self.debug: 469 | print("Epoch", epoch) 470 | losses = [] 471 | self.optimizer.zero_grad() 472 | # train_x, train_y, result = make_features(iodata_list) 473 | for iodata in iodata_list: 474 | features = make_conv_features2( 475 | iodata.input_field 476 | ) # .reshape(iodata.input_field.shape+(-1,)) 477 | features = features[:, :, self.feature_ids] 478 | features = np.moveaxis(features, -1, 0) 479 | features = features[np.newaxis, ...] 480 | i = torch.tensor(features).float() 481 | 482 | o = iodata.output_field.t_splitted() 483 | o = torch.unsqueeze(o, dim=0).float() 484 | p = self.model.forward(i) 485 | loss = self.loss_func(p, o) 486 | loss.backward() 487 | losses.append(loss.item()) 488 | if self.debug: 489 | print(losses) 490 | 491 | losses = np.mean(losses) 492 | if len(all_losses) > 0: 493 | if len(all_losses) > 10 and np.mean(all_losses[-10:]) <= losses: 494 | break 495 | all_losses.append(losses) 496 | 497 | # if epoch % 10 == 0: 498 | # print("zero grad") 499 | self.optimizer.step() 500 | 501 | def predict(self, field): 502 | if isinstance(field, IOData): 503 | for v in self.predict(field.input_field): 504 | yield v 505 | return 506 | self.model.eval() 507 | with torch.no_grad(): 508 | features = make_conv_features2(field) 509 | features = features[:, :, self.feature_ids] 510 | features = np.moveaxis(features, -1, 0) 511 | features = features[np.newaxis, ...] 512 | i = torch.tensor(features).float() 513 | p = self.model.forward(i) 514 | p = torch.squeeze(p, dim=0).detach().cpu().numpy() 515 | yield Field.from_splitted(p) 516 | 517 | def __str__(self): 518 | return "ConvolutionPredictor()" 519 | 520 | 521 | class Convolution2PointPredictor(Predictor, mixins.AvailableShape2PointOrConstColor): 522 | def __init__(self, nepochs=40, loss="mse"): 523 | # self.xgb = XGBClassifier(n_estimators=25*2, booster="dart", n_jobs=-1) 524 | if loss == "mse": 525 | self.loss_func = torch.nn.MSELoss() 526 | else: 527 | self.loss_func = dice_loss 528 | # print(net.parameters()) 529 | self.nepochs = nepochs 530 | self.lr = 0.01 531 | self.debug = False 532 | 533 | def build_model(self, feature_ids): 534 | model = nn.Sequential( 535 | nn.Conv2d(len(feature_ids), 128, 3, padding=1), 536 | nn.LeakyReLU(), 537 | nn.Conv2d(128, 64, 3, padding=1), 538 | nn.LeakyReLU(), 539 | nn.Conv2d(64, 32, 3, padding=1), 540 | nn.LeakyReLU(), 541 | # nn.Sigmoid(), 542 | nn.Conv2d(32, 10, 3, padding=1), 543 | nn.AvgPool2d(3), 544 | nn.Sigmoid(), 545 | # nn.Softmax(dim=1) 546 | ) 547 | return model 548 | # 549 | 550 | def train(self, iodata_list): 551 | self.feature_ids = get_nonzero_ids( 552 | iodata_list, make_conv_features=make_conv_features2 553 | ) 554 | self.model = self.build_model(self.feature_ids) 555 | self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr) 556 | all_losses = [] 557 | for epoch in range(self.nepochs): 558 | self.model.train() 559 | if self.debug: 560 | print("Epoch", epoch) 561 | losses = [] 562 | self.optimizer.zero_grad() 563 | # train_x, train_y, result = make_features(iodata_list) 564 | for iodata in iodata_list: 565 | features = make_conv_features2( 566 | iodata.input_field 567 | ) # .reshape(iodata.input_field.shape+(-1,)) 568 | features = features[:, :, self.feature_ids] 569 | features = np.moveaxis(features, -1, 0) 570 | features = features[np.newaxis, ...] 571 | i = torch.tensor(features).float() 572 | 573 | o = iodata.output_field.t_splitted() 574 | o = torch.unsqueeze(o, dim=0).float() 575 | p = self.model.forward(i) 576 | loss = self.loss_func(p, o) 577 | loss.backward() 578 | losses.append(loss.item()) 579 | if self.debug: 580 | print(losses) 581 | 582 | losses = np.mean(losses) 583 | if len(all_losses) > 0: 584 | if len(all_losses) > 10 and np.mean(all_losses[-10:]) <= losses: 585 | break 586 | all_losses.append(losses) 587 | 588 | # if epoch % 10 == 0: 589 | # print("zero grad") 590 | self.optimizer.step() 591 | 592 | def predict(self, field): 593 | if isinstance(field, IOData): 594 | for v in self.predict(field.input_field): 595 | yield v 596 | return 597 | self.model.eval() 598 | with torch.no_grad(): 599 | features = make_conv_features2(field) 600 | features = features[:, :, self.feature_ids] 601 | features = np.moveaxis(features, -1, 0) 602 | features = features[np.newaxis, ...] 603 | i = torch.tensor(features).float() 604 | p = self.model.forward(i) 605 | p = torch.squeeze(p, dim=0).detach().cpu().numpy() 606 | yield Field.from_splitted(p) 607 | 608 | def __str__(self): 609 | return "ConvolutionPredictor()" 610 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/decision_tree.py: -------------------------------------------------------------------------------- 1 | """Code in next predictor is based on this kernel 2 | 3 | https://www.kaggle.com/adityaork/decision-tree-smart-data-augmentation/comments 4 | """ 5 | 6 | import rootutils 7 | 8 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 9 | 10 | import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) 11 | import json 12 | from pathlib import Path 13 | from collections import defaultdict 14 | from itertools import product 15 | from matplotlib import colors 16 | import matplotlib.pyplot as plt 17 | import numpy as np 18 | from itertools import combinations, permutations 19 | 20 | from sklearn.tree import DecisionTreeClassifier 21 | from sklearn.ensemble import BaggingClassifier 22 | import random 23 | import math 24 | 25 | 26 | from predictors.basic import * 27 | import predictors.availability_mixins as mixins 28 | 29 | 30 | class Augmenter: 31 | __slots__ = () 32 | 33 | @staticmethod 34 | def getiorc(iodata): 35 | inp = iodata.input_field 36 | return iodata.input_field, iodata.output_field, inp.shape[0], inp.shape[1] 37 | 38 | @classmethod 39 | def getBkgColor(cls, iodata_list): 40 | color_dict = defaultdict(int) 41 | 42 | for iodata in iodata_list: 43 | inp, oup, r, c = cls.getiorc(iodata) 44 | for i in range(r): 45 | for j in range(c): 46 | color_dict[inp.data[i, j]] += 1 47 | color = -1 48 | max_count = 0 49 | for col, cnt in color_dict.items(): 50 | if cnt > max_count: 51 | color = col 52 | max_count = cnt 53 | return color 54 | 55 | @classmethod 56 | def get_bl_cols(cls, iodata_list): 57 | result = [] 58 | bkg_col = cls.getBkgColor(iodata_list) 59 | result.append(bkg_col) 60 | # num_input,input_cnt,num_output,output_cnt 61 | met_map = {} 62 | for i in range(10): 63 | met_map[i] = [0, 0, 0, 0] 64 | 65 | total_ex = 0 66 | for iodata in iodata_list: 67 | inp, oup = iodata.input_field, iodata.output_field 68 | u, uc = np.unique(inp.data, return_counts=True) 69 | inp_cnt_map = dict(zip(u, uc)) 70 | u, uc = np.unique(oup.data, return_counts=True) 71 | oup_cnt_map = dict(zip(u, uc)) 72 | 73 | for col, cnt in inp_cnt_map.items(): 74 | met_map[col][0] = met_map[col][0] + 1 75 | met_map[col][1] = met_map[col][1] + cnt 76 | for col, cnt in oup_cnt_map.items(): 77 | met_map[col][2] = met_map[col][2] + 1 78 | met_map[col][3] = met_map[col][3] + cnt 79 | total_ex += 1 80 | 81 | for col, met in met_map.items(): 82 | num_input, input_cnt, num_output, output_cnt = met 83 | if num_input == total_ex or num_output == total_ex: 84 | result.append(col) 85 | elif num_input == 0 and num_output > 0: 86 | result.append(col) 87 | 88 | result = np.unique(result).tolist() 89 | if len(result) == 10: 90 | result.append(bkg_col) 91 | return np.unique(result).tolist() 92 | 93 | @staticmethod 94 | def getAround(i, j, inp, size=1): 95 | # v = [-1,-1,-1,-1,-1,-1,-1,-1,-1] 96 | r, c = inp.shape 97 | v = [] 98 | sc = [0] 99 | for q in range(size): 100 | sc.append(q + 1) 101 | sc.append(-(q + 1)) 102 | for idx, (x, y) in enumerate(product(sc, sc)): 103 | ii = i + x 104 | jj = j + y 105 | v.append(-1) 106 | if (0 <= ii < r) and (0 <= jj < c): 107 | v[idx] = inp.data[ii, jj] 108 | return v 109 | 110 | @classmethod 111 | def getX(cls, inp, i, j, size): 112 | n_inp = inp.data 113 | z = [i, j] 114 | r, c = inp.shape 115 | for m in range(5): 116 | z.append(i % (m + 1)) 117 | z.append(j % (m + 1)) 118 | z.append(i + j) 119 | z.append(i * j) 120 | # z.append(i%j) 121 | # z.append(j%i) 122 | z.append((i + 1) / (j + 1)) 123 | z.append((j + 1) / (i + 1)) 124 | z.append(r) 125 | z.append(c) 126 | z.append(len(np.unique(n_inp[i, :]))) 127 | z.append(len(np.unique(n_inp[:, j]))) 128 | arnd = cls.getAround(i, j, inp, size) 129 | z.append(len(np.unique(arnd))) 130 | z.extend(arnd) 131 | return z 132 | 133 | @classmethod 134 | def getXy(cls, inp, oup, size): 135 | x = [] 136 | y = [] 137 | r, c = inp.shape 138 | for i in range(r): 139 | for j in range(c): 140 | # print(inp) 141 | x.append(cls.getX(inp, i, j, size)) 142 | y.append(oup.data[i][j]) 143 | return x, y 144 | 145 | @staticmethod 146 | def replace(inp, uni, perm): 147 | # uni = '234' perm = ['5','7','9'] 148 | # print(uni,perm) 149 | # print(uni, perm) 150 | r_map = {int(c): int(s) for c, s in zip(uni, perm)} 151 | r, c = inp.shape 152 | rp = inp.data.tolist() 153 | # print(rp) 154 | for i in range(r): 155 | for j in range(c): 156 | if rp[i][j] in r_map: 157 | rp[i][j] = r_map[rp[i][j]] 158 | return Field(rp) 159 | 160 | @classmethod 161 | def augment(cls, inp, oup, bl_cols): 162 | cols = "0123456789" 163 | npr_map = [1, 9, 72, 3024, 15120, 60480, 181440, 362880, 362880] 164 | uni = "".join([str(x) for x in np.unique(inp.data).tolist()]) 165 | for c in bl_cols: 166 | cols = cols.replace(str(c), "") 167 | uni = uni.replace(str(c), "") 168 | 169 | exp_size = inp.shape[0] * inp.shape[1] * npr_map[len(uni)] 170 | 171 | mod = math.floor(exp_size / 120000) 172 | mod = 1 if mod == 0 else mod 173 | 174 | # print(exp_size,mod,len(uni)) 175 | result = [] 176 | count = 0 177 | for comb in combinations(cols, len(uni)): 178 | for perm in permutations(comb): 179 | count += 1 180 | if count % mod == 0: 181 | # print(uni) 182 | result.append( 183 | (cls.replace(inp, uni, perm), cls.replace(oup, uni, perm)) 184 | ) 185 | return result 186 | 187 | @staticmethod 188 | def get_flips(i, o): 189 | result = [] 190 | # inp = input_field.data 191 | # oup = output_field.data 192 | operations = [ 193 | lambda inp: np.fliplr(inp), 194 | lambda inp: np.rot90(np.fliplr(inp), 1), 195 | lambda inp: np.rot90(np.fliplr(inp), 2), 196 | lambda inp: np.rot90(np.fliplr(inp), 3), 197 | lambda inp: np.flipud(inp), 198 | lambda inp: np.rot90(np.flipud(inp), 1), 199 | lambda inp: np.rot90(np.flipud(inp), 2), 200 | lambda inp: np.rot90(np.flipud(inp), 3), 201 | lambda inp: np.fliplr(np.flipud(inp)), 202 | lambda inp: np.flipud(np.fliplr(inp)), 203 | ] 204 | for op in operations: 205 | yield Field(op(i.data)), Field(op(o.data)) 206 | # result.append((np.fliplr(inp).tolist(),np.fliplr(oup).tolist())) 207 | # result.append((np.rot90(np.fliplr(inp),1).tolist(),np.rot90(np.fliplr(oup),1).tolist())) 208 | # result.append((np.rot90(np.fliplr(inp),2).tolist(),np.rot90(np.fliplr(oup),2).tolist())) 209 | # result.append((np.rot90(np.fliplr(inp),3).tolist(),np.rot90(np.fliplr(oup),3).tolist())) 210 | # result.append((np.flipud(inp).tolist(),np.flipud(oup).tolist())) 211 | # result.append((np.rot90(np.flipud(inp),1).tolist(),np.rot90(np.flipud(oup),1).tolist())) 212 | # result.append((np.rot90(np.flipud(inp),2).tolist(),np.rot90(np.flipud(oup),2).tolist())) 213 | # result.append((np.rot90(np.flipud(inp),3).tolist(),np.rot90(np.flipud(oup),3).tolist())) 214 | # result.append((np.fliplr(np.flipud(inp)).tolist(),np.fliplr(np.flipud(oup)).tolist())) 215 | # result.append((np.flipud(np.fliplr(inp)).tolist(),np.flipud(np.fliplr(oup)).tolist())) 216 | # return result 217 | 218 | @classmethod 219 | def gettaskxy(cls, iodata_list, aug, around_size, bl_cols, flip=True): 220 | X = [] 221 | Y = [] 222 | for iodata in iodata_list: 223 | inp, oup = iodata.input_field, iodata.output_field 224 | tx, ty = cls.getXy(inp, oup, around_size) 225 | X.extend(tx) 226 | Y.extend(ty) 227 | if flip: 228 | for ainp, aoup in cls.get_flips(inp, oup): 229 | tx, ty = cls.getXy(ainp, aoup, around_size) 230 | X.extend(tx) 231 | Y.extend(ty) 232 | if aug: 233 | augs = cls.augment(ainp, aoup, bl_cols) 234 | for ainp, aoup in augs: 235 | # print("1", ainp) 236 | tx, ty = cls.getXy(ainp, aoup, around_size) 237 | X.extend(tx) 238 | Y.extend(ty) 239 | if aug: 240 | augs = cls.augment(inp, oup, bl_cols) 241 | for ainp, aoup in augs: 242 | # print("2", ainp) 243 | tx, ty = cls.getXy(ainp, aoup, around_size) 244 | X.extend(tx) 245 | Y.extend(ty) 246 | return X, Y 247 | 248 | 249 | class AugmentedPredictor(Predictor, mixins.AvailableEqualShapeAndMaxNColors): 250 | def __init__(self): 251 | # self.value = value 252 | # self.multiplier = multiplier 253 | pass 254 | 255 | def predict_on_tree_model(self, inp, model, size): 256 | r, c = inp.shape 257 | oup = np.zeros(inp.shape, dtype=int) 258 | for i in range(r): 259 | for j in range(c): 260 | x = Augmenter.getX(inp, i, j, size) 261 | o = int(model.predict([x])) 262 | o = 0 if o < 0 else o 263 | oup[i][j] = o 264 | return Field(oup) 265 | 266 | def train(self, iodata_list): 267 | a_size = 4 # get_a_size(task_json) 268 | bl_cols = Augmenter.get_bl_cols(iodata_list) 269 | 270 | isflip = False 271 | X1, Y1 = Augmenter.gettaskxy(iodata_list, True, 1, bl_cols, isflip) 272 | X3, Y3 = Augmenter.gettaskxy(iodata_list, True, 3, bl_cols, isflip) 273 | X5, Y5 = Augmenter.gettaskxy(iodata_list, True, 5, bl_cols, isflip) 274 | 275 | self.model_1 = BaggingClassifier( 276 | estimator=DecisionTreeClassifier(), n_estimators=100 277 | ).fit(X1, Y1) 278 | self.model_3 = BaggingClassifier( 279 | estimator=DecisionTreeClassifier(), n_estimators=100 280 | ).fit(X3, Y3) 281 | self.model_5 = BaggingClassifier( 282 | estimator=DecisionTreeClassifier(), n_estimators=100 283 | ).fit(X5, Y5) 284 | 285 | def predict(self, field): 286 | if isinstance(field, IOData): 287 | for v in self.predict(field.input_field): 288 | yield v 289 | return 290 | # while True: 291 | # pred_map_1 = submit_predict(task_json,model_1, 1) 292 | pred1 = self.predict_on_tree_model(field, self.model_1, 1) 293 | yield pred1 294 | pred3 = self.predict_on_tree_model(field, self.model_3, 3) 295 | yield pred3 296 | pred5 = self.predict_on_tree_model(field, self.model_5, 5) 297 | yield pred5 298 | 299 | def __str__(self): 300 | return f"AugmentedPredictor()" 301 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/draft_predictors/cam_predictor.py: -------------------------------------------------------------------------------- 1 | ## TODO: this was unfinished and shouldn't be used now 2 | import rootutils 3 | 4 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 5 | 6 | 7 | import torch 8 | from torch import nn 9 | 10 | from base.field import * 11 | import predictors.availability_mixins as mixins 12 | from predictors.basic import Predictor 13 | from base.iodata import IOData 14 | 15 | 16 | class CAModel(nn.Module): 17 | def __init__(self, num_states): 18 | super(CAModel, self).__init__() 19 | self.transition = nn.Sequential( 20 | nn.Conv2d(num_states, 128, kernel_size=3, padding=1), 21 | nn.BatchNorm2d(128), 22 | nn.LeakyReLU(), 23 | nn.Conv2d(128, 128, kernel_size=3, padding=1), 24 | nn.BatchNorm2d(128), 25 | nn.ReLU(), 26 | nn.Conv2d(128, num_states, kernel_size=1), 27 | ) 28 | 29 | def forward(self, x, steps=1): 30 | for _ in range(steps): 31 | x = self.transition(torch.softmax(x, dim=1)) 32 | return x 33 | 34 | 35 | def solve_task(iodata_list, max_steps=10, num_epochs=100, device="cpu"): 36 | model = CAModel(10).to(device) 37 | criterion = nn.CrossEntropyLoss() 38 | losses = np.zeros((max_steps - 1) * num_epochs) 39 | 40 | for num_steps in range(1, max_steps): 41 | optimizer = torch.optim.Adam(model.parameters(), lr=(0.1 / (num_steps * 2))) 42 | 43 | for e in range(num_epochs): 44 | optimizer.zero_grad() 45 | loss = 0.0 46 | 47 | for sample in task: 48 | # predict output from input 49 | x = ( 50 | torch.from_numpy(inp2img(sample["input"])) 51 | .unsqueeze(0) 52 | .float() 53 | .to(device) 54 | ) 55 | y = torch.tensor(sample["output"]).long().unsqueeze(0).to(device) 56 | y_pred = model(x, num_steps) 57 | loss += criterion(y_pred, y) 58 | 59 | # predit output from output 60 | # enforces stability after solution is reached 61 | y_in = ( 62 | torch.from_numpy(inp2img(sample["output"])) 63 | .unsqueeze(0) 64 | .float() 65 | .to(device) 66 | ) 67 | y_pred = model(y_in, 1) 68 | loss += criterion(y_pred, y) 69 | 70 | loss.backward() 71 | optimizer.step() 72 | losses[(num_steps - 1) * num_epochs + e] = loss.item() 73 | return model, num_steps, losses 74 | 75 | 76 | class CAMPredictor(Predictor, mixins.AvailableEqualShape): 77 | def __init__(self, max_steps=10, num_epochs=100): 78 | self.max_steps = max_steps 79 | self.num_epochs = num_epochs 80 | self.device = "cpu" 81 | self.model = CAModel(10).to(self.device) 82 | self.criterion = nn.CrossEntropyLoss() 83 | # self.optimizer = torch.optim.Adam(model.parameters(), lr=(0.1 / (max_steps * 2))) 84 | pass 85 | 86 | def train(self, iodata_list): 87 | losses = np.zeros((self.max_steps - 1) * self.num_epochs) 88 | self.model.train() 89 | for num_steps in range(1, self.max_steps): 90 | optimizer = torch.optim.Adam( 91 | self.model.parameters(), lr=(0.1 / (num_steps * 2)) 92 | ) 93 | 94 | for e in range(self.num_epochs): 95 | optimizer.zero_grad() 96 | loss = 0.0 97 | 98 | for iodata in iodata_list: 99 | # predict output from input 100 | x = ( 101 | torch.from_numpy(iodata.input_field.data_splitted) 102 | .unsqueeze(0) 103 | .float() 104 | .to(self.device) 105 | ) 106 | y = ( 107 | torch.from_numpy(iodata.output_field.data) 108 | .long() 109 | .unsqueeze(0) 110 | .to(self.device) 111 | ) 112 | y_pred = self.model(x, num_steps) 113 | loss += self.criterion(y_pred, y) 114 | 115 | # predit output from output 116 | # enforces stability after solution is reached 117 | y_in = ( 118 | torch.from_numpy(iodata.output_field.data_splitted) 119 | .unsqueeze(0) 120 | .float() 121 | .to(self.device) 122 | ) 123 | y_pred = self.model(y_in, 1) 124 | loss += self.criterion(y_pred, y) 125 | 126 | loss.backward() 127 | optimizer.step() 128 | losses[(num_steps - 1) * self.num_epochs + e] = loss.item() 129 | self.losses = losses 130 | # model, num_steps, losses 131 | 132 | def predict(self, field): 133 | if isinstance(field, IOData): 134 | for v in self.predict(field.input_field): 135 | yield v 136 | return 137 | self.model.eval() 138 | with torch.no_grad(): 139 | x = ( 140 | torch.from_numpy(field.data_splitted) 141 | .unsqueeze(0) 142 | .float() 143 | .to(self.device) 144 | ) 145 | pred = self.model(x, 100).argmax(1).squeeze().detach().cpu().numpy() 146 | yield Field(pred) 147 | 148 | 149 | 150 | 151 | class MoverPredictor(Predictor, mixins.AvailableEqualShape): 152 | def __init__(self): 153 | pass 154 | 155 | def train(self, iodata_list): 156 | self.transitions = [] 157 | h = [] 158 | w = [] 159 | for iodata in iodata_list: 160 | i = iodata.input_field 161 | o = iodata.output_field 162 | coords = np.argwhere(i.data != o.data) 163 | if coords.shape[0] > 0: 164 | xmin, ymin = np.min(coords, 0) 165 | xmax, ymax = np.max(coords, 0) 166 | start = i.data[xmin : xmax + 1, ymin : ymax + 1] 167 | end = o.data[xmin : xmax + 1, ymin : ymax + 1] 168 | else: 169 | start = i.data.copy() 170 | end = o.data.copy() 171 | self.transitions.append((start, end)) 172 | h.append(start.shape[0]) 173 | w.append(start.shape[1]) 174 | if len(np.unique(h)) and len(np.unique(w)) == 1: 175 | self.single_step = True 176 | else: 177 | self.single_step = False 178 | self.minh = np.min(h) 179 | self.minw = np.min(w) 180 | 181 | def is_available(self, iodata_list): 182 | for iodata in iodata_list: 183 | if iodata.input_field.shape != iodata.output_field.shape: 184 | return False 185 | 186 | def predict(self, field): 187 | if isinstance(field, IOData): 188 | for v in self.predict(field.input_field): 189 | yield v 190 | return 191 | data = field.data.copy() 192 | offsets = np.ones(data.shape) 193 | offsets[-self.minh + 1 :] = 0 194 | offsets[:, -self.minw + 1 :] = 0 195 | for _ in range(100): 196 | something_changed = False 197 | # print(offsets) 198 | for offset0, offset1 in np.argwhere(offsets == 1): 199 | no_changes_with_offset = True 200 | for start, end in self.transitions: 201 | h, w = start.shape 202 | if offset0 + h > data.shape[0] or offset1 + w > data.shape[1]: 203 | # offsets[offset0:, offset1:] = 0 204 | continue 205 | if np.all( 206 | data[offset0 : offset0 + h, offset1 : offset1 + w] == start 207 | ): 208 | data[offset0 : offset0 + h, offset1 : offset1 + w] = end[:, :] 209 | offsets[offset0 : offset0 + h, offset1 : offset1 + w] = 1 210 | something_changed = True 211 | no_changes_with_offset = False 212 | if self.single_step: 213 | yield Field(data) 214 | return 215 | break 216 | if no_changes_with_offset: 217 | offsets[offset0, offset1] = 0 218 | if not something_changed: 219 | break 220 | yield Field(data) 221 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/draft_predictors/cf_combinator.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | 6 | from base.field import * 7 | from base.iodata import * 8 | from predictors.basic import Predictor 9 | from operations.reversible import * 10 | 11 | 12 | class SelectorCFPredictor(Predictor): 13 | """Selects one of the patterns based on some features and returns as a result""" 14 | 15 | def __init__(self): 16 | pass 17 | 18 | def is_available(self, iodata_list): 19 | for iodata in iodata_list: 20 | if isinstance(iodata, IOData): 21 | return False 22 | i, o = iodata 23 | if not isinstance(i, ComplexField): 24 | return False 25 | if not isinstance(o, ComplexField): 26 | return False 27 | if o.shape != (1, 1): 28 | return False 29 | return True 30 | 31 | def train(self, iodata_list): 32 | pass 33 | 34 | def predict(self, complex_field): 35 | yield complex_field 36 | 37 | 38 | class CombinatorCFPredictor(Predictor): 39 | def __init__(self): 40 | self.iopairs = dict() 41 | pass 42 | 43 | def is_available(self, iodata_list): 44 | for iodata in iodata_list: 45 | if isinstance(iodata, IOData): 46 | return False 47 | i, o = iodata 48 | if not isinstance(i, ComplexField): 49 | return False 50 | if not isinstance(o, ComplexField): 51 | return False 52 | if o.shape != (1, 1): 53 | return False 54 | return True 55 | 56 | def train(self, iodata_list): 57 | result = dict() 58 | for i, o in iodata_list: 59 | ifields = [[x for line in d.data for x in line] for d in i.flat_iter()] 60 | ifields = list(zip(*ifields)) 61 | ofields = [x for line in o.data[0][0].data for x in line] 62 | for inp, out in zip(ifields, ofields): 63 | if inp in result: 64 | if result[inp] != out: 65 | continue 66 | # raise Exception("incorrect solution") 67 | else: 68 | result[inp] = out 69 | self.iopairs = result 70 | 71 | def predict(self, complex_field): 72 | inp = [ 73 | [[x for x in line] for line in d.data] for d in complex_field.flat_iter() 74 | ] 75 | inp = list(zip(*inp)) 76 | 77 | # print(self.iopairs) 78 | result = [[self.iopairs.get(x, 0) for x in zip(*line)] for line in inp] 79 | cf = ComplexField([[Field(result)]]) 80 | yield cf 81 | 82 | 83 | class WrappedCFPredictor(Predictor): 84 | def __init__(self): 85 | self.combinator = CombinatorCFPredictor() 86 | self.op = None 87 | 88 | def is_available(self, iodata_list): 89 | for iodata in iodata_list: 90 | i = iodata.input_field 91 | o = iodata.output_field 92 | (oh, ow) = o.shape 93 | (ih, iw) = i.shape 94 | if oh == 1 and ow == 1: 95 | return False 96 | if oh > ih or ow > iw: 97 | return False 98 | if oh == ih and ow == iw: 99 | return False 100 | hparts = 1 101 | wparts = 1 102 | all_parts = [] 103 | for hsep in (0, 1, 2): 104 | for wsep in (0, 1, 2): 105 | for outer_sep in (True, False): 106 | if hsep == 0 and wsep == 0 and outer_sep: 107 | continue 108 | res = [] 109 | for iodata in iodata_list: 110 | if res is None: 111 | break 112 | i = iodata.input_field.data 113 | o = iodata.output_field.data 114 | (oh, ow) = o.shape 115 | (ih, iw) = i.shape 116 | if hsep > 0: 117 | hvalues = set(i[: outer_sep * hsep].flatten()) 118 | for start in range(outer_sep * hsep + oh, ih, hsep + oh): 119 | for x in np.unique(i[start : start + hsep]): 120 | hvalues.add(x) 121 | if len(hvalues) > 1: 122 | res = None 123 | break 124 | # if len(hvalues) > 1: 125 | # return False 126 | if wsep > 0: 127 | wvalues = set(i[:, : outer_sep * wsep].flatten()) 128 | for start in range(outer_sep * wsep + ow, iw, wsep + ow): 129 | for x in np.unique(i[:, start : start + wsep]): 130 | wvalues.add(x) 131 | if len(wvalues) > 1: 132 | res = None 133 | break 134 | # return False 135 | if outer_sep: 136 | ih -= hsep 137 | iw -= wsep 138 | else: 139 | ih += hsep 140 | iw += wsep 141 | h = ih // (oh + hsep) 142 | 143 | if h * (oh + hsep) != ih or h < 1: 144 | res = None 145 | continue 146 | # h -= hsep 147 | w = iw // (ow + wsep) 148 | if w * (ow + wsep) != iw or w < 1: 149 | res = None 150 | break 151 | # print(h, w, ih, oh, iw, ow) 152 | # w -= wsep 153 | res.append((h, w)) 154 | if res is None: 155 | continue 156 | res = set(res) 157 | if len(res) == 1: 158 | all_parts.append([list(res)[0], hsep, wsep, outer_sep]) 159 | if len(all_parts) < 1: 160 | return False 161 | if len(all_parts) > 1: 162 | return False 163 | (h, w), hsep, wsep, outer_sep = all_parts[0] 164 | self.shape = (h, w) 165 | self.hsep = hsep 166 | self.wsep = wsep 167 | self.outer_sep = outer_sep 168 | self.op = WrappedOperation( 169 | ReversibleSplit( 170 | (h, w), hsep=hsep, wsep=wsep, outer_sep=outer_sep 171 | ), # , splitter_func=split_by_shape), 172 | ReversibleCombine( 173 | (1, 1), hsep=0, wsep=0, outer_sep=False, sep_color=0 174 | ), # , splitter_func=split_by_shape) 175 | ) 176 | data = [self.op.wrap(iodata) for iodata in iodata_list] 177 | 178 | return self.combinator.is_available(data) 179 | 180 | def train(self, iodata_list): 181 | data = [self.op.wrap(iodata) for iodata in iodata_list] 182 | self.combinator.train(data) 183 | 184 | def predict(self, field): 185 | field_inp, postprocess = self.op.run(field) 186 | for x in self.combinator.predict(field_inp): 187 | yield postprocess(x) 188 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/draft_predictors/cf_filler.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | from predictors.basic import Predictor 6 | import predictors.availability_mixins as mixins 7 | from base.iodata import IOData 8 | 9 | class FillerCFPredictor(Predictor): 10 | def __init__(self): 11 | pass 12 | 13 | def is_available(self, iodata_list): 14 | for iodata in iodata_list: 15 | if isinstance(iodata, IOData): 16 | return False 17 | i, o = iodata 18 | if not isinstance(i, ComplexField): 19 | return False 20 | if not isinstance(o, ComplexField): 21 | return False 22 | if i.shape != o.shape: 23 | return False 24 | return True 25 | 26 | def train(self, iodata_list): 27 | pass 28 | 29 | def predict(self, complex_field): 30 | yield complex_field 31 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/draft_predictors/cf_selector.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | from predictors.basic import Predictor 6 | 7 | 8 | class SelectorCFPredictor(Predictor): 9 | def __init__(self): 10 | pass 11 | 12 | def is_available(self, iodata_list): 13 | for iodata in iodata_list: 14 | if isinstance(iodata, IOData): 15 | return False 16 | i, o = iodata 17 | if not isinstance(i, ComplexField): 18 | return False 19 | if not isinstance(o, ComplexField): 20 | return False 21 | if o.shape != (1, 1): 22 | return False 23 | return True 24 | 25 | def train(self, iodata_list): 26 | pass 27 | 28 | def predict(self, complex_field): 29 | yield complex_field 30 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/draft_predictors/cf_sorter.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | from predictors.basic import Predictor 6 | import predictors.availability_mixins as mixins 7 | 8 | def to_tuple(field): 9 | return tuple([x for line in field.data for x in line]) 10 | 11 | 12 | class SorterCFPredictor(Predictor): 13 | def __init__(self): 14 | pass 15 | 16 | def is_available(self, iodata_list): 17 | for iodata in iodata_list: 18 | if isinstance(iodata, IOData): 19 | return False 20 | i, o = iodata 21 | if not isinstance(i, ComplexField): 22 | return False 23 | if not isinstance(o, ComplexField): 24 | return False 25 | if i.shape != o.shape: 26 | return False 27 | 28 | it = sorted([to_tuple(f) for f in i.flat_iter()]) 29 | ot = sorted([to_tuple(f) for f in o.flat_iter()]) 30 | if it != ot: 31 | return False 32 | return True 33 | 34 | def train(self, iodata_list): 35 | pass 36 | 37 | def predict(self, complex_field): 38 | yield complex_field 39 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/field2point.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | from predictors.basic import * 6 | from operations.field2point import SimpleSummarizeOperation 7 | import predictors.availability_mixins as mixins 8 | 9 | 10 | class SimpleSummarizePredictor(Predictor): 11 | def __init__(self): 12 | self.op = SimpleSummarizeOperation() 13 | 14 | def is_available(self, iodata_list): 15 | for iodata in iodata_list: 16 | if iodata.output_field.shape != (1, 1): 17 | return False 18 | return True 19 | 20 | def train(self, iodata_list): 21 | self.op.train(iodata_list) 22 | 23 | def predict(self, field): 24 | result = self.op.do(field, bg=self.op.bg) 25 | yield result 26 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/graph_boosting_tree.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | 5 | import rootutils 6 | 7 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 8 | 9 | import networkx as nx 10 | import numpy as np 11 | from xgboost import XGBClassifier 12 | from sklearn.preprocessing import LabelEncoder 13 | 14 | from predictors.basic import Predictor 15 | from predictors.boosting_tree import BTFeatureExtractor 16 | from base.field import Field 17 | from base.iodata import IOData 18 | import predictors.availability_mixins as mixins 19 | 20 | 21 | class GraphFeatureExtractor: 22 | @staticmethod 23 | def compare_components(GI, GO): 24 | if nx.number_connected_components(GI) != nx.number_connected_components(GO): 25 | return False 26 | for x, y in zip(nx.connected_components(GI), nx.connected_components(GO)): 27 | if len(x) != len(y): 28 | return False 29 | return True 30 | 31 | @staticmethod 32 | def get_comp_params(G): 33 | for x in nx.connected_components(G): 34 | gx = G.subgraph(x) 35 | nfeatures = [] 36 | positions = set() 37 | ncolors = [] 38 | props = set() 39 | comp_features = [] 40 | for n in gx.nodes.values(): 41 | ncolors.append(n["neighbour_colors"]) 42 | color = n["color"] 43 | nfeatures.append(n["features"]) 44 | positions.add(n["pos"]) 45 | comp_features = n["component_params"] 46 | if "properties" in n: 47 | props.add(n["properties"]) 48 | data = { 49 | "color": color, 50 | "features": np.stack(nfeatures, 0).sum(0), 51 | "comp_features": comp_features, 52 | "ncolors": np.stack(ncolors, 0).sum(0), 53 | "pos": positions, 54 | "size": len(x), 55 | } 56 | if len(props) > 0: 57 | data["properties"] = list(props) 58 | yield data 59 | 60 | @staticmethod 61 | def reorder(component_params_in, component_params_out): 62 | comp_dict = dict() 63 | for i, comp in enumerate(component_params_out): 64 | for pos in comp["pos"]: 65 | comp_dict[pos] = i 66 | order = [comp_dict.get(list(comp["pos"])[0]) for comp in component_params_in] 67 | component_params_out = [component_params_out[i] for i in order] 68 | return component_params_in, component_params_out 69 | 70 | @staticmethod 71 | def get_data(cpi, cpo=None, use_zeros=True): 72 | if cpo is None: 73 | for gi in cpi: 74 | if not use_zeros and gi["color"] == 0: 75 | continue 76 | # print(gi['features'].shape) 77 | # yield gi['color'], gi['features'], gi['ncolors'], gi['size'] 78 | if "properties" in gi: 79 | yield gi["color"], gi["features"], gi["comp_features"], gi[ 80 | "ncolors" 81 | ], gi["size"], gi["properties"] 82 | else: 83 | yield gi["color"], gi["features"], gi["comp_features"], gi[ 84 | "ncolors" 85 | ], gi["size"] 86 | return 87 | for gi, go in zip(cpi, cpo): 88 | if not use_zeros and gi["color"] == 0: 89 | continue 90 | target = gi["color"] != go["color"] 91 | yield gi["color"], gi["features"], gi["comp_features"], gi["ncolors"], gi[ 92 | "size" 93 | ], target * 1.0, go["color"] 94 | 95 | @staticmethod 96 | def collect_graph_data(cpi, cpo=None, use_zeros=True): 97 | if cpo is None: 98 | colors, features, comp_features, ncolors, sizes = list( 99 | zip(*GraphFeatureExtractor.get_data(cpi, use_zeros=use_zeros)) 100 | ) 101 | else: 102 | ( 103 | colors, 104 | features, 105 | comp_features, 106 | ncolors, 107 | sizes, 108 | targets_bin, 109 | targets_color, 110 | ) = list( 111 | zip(*GraphFeatureExtractor.get_data(cpi, cpo, use_zeros=use_zeros)) 112 | ) 113 | 114 | colors = np.asarray([[i == c for i in range(10)] for c in colors]).astype( 115 | np.float 116 | ) 117 | features = (np.stack(features, 0) > 0) * 1.0 118 | comp_features = np.stack(comp_features, 0) 119 | ncolors = (np.stack(ncolors, 0) > 0).astype(np.float) 120 | sizes = np.asarray(sizes).reshape(-1, 1) 121 | inputs = np.concatenate([colors, features, comp_features, ncolors, sizes], 1) 122 | if cpo is None: 123 | return inputs 124 | targets = np.asarray(targets_bin) # .reshape(-1, 1) 125 | targets_color = np.asarray(targets_color) 126 | # targets_color = np.asarray([[(c == i)*1.0 for i in range(10)]for c in targets_color]) 127 | return inputs, targets, targets_color 128 | 129 | @staticmethod 130 | def prepare_graph_features(iodata, use_zeros=True): 131 | GI = iodata.input_field.build_nxgraph(connectivity={i: 4 for i in range(10)}) 132 | GO = iodata.output_field.build_nxgraph(connectivity={i: 4 for i in range(10)}) 133 | component_params_in, component_params_out = GraphFeatureExtractor.reorder( 134 | list(GraphFeatureExtractor.get_comp_params(GI)), 135 | list(GraphFeatureExtractor.get_comp_params(GO)), 136 | ) 137 | 138 | inputs, targets, targets_color = GraphFeatureExtractor.collect_graph_data( 139 | component_params_in, component_params_out, use_zeros=use_zeros 140 | ) 141 | 142 | return inputs, targets, targets_color 143 | 144 | @staticmethod 145 | def prepare_graph_features_diff(iodata, use_zeros=False): 146 | GI = iodata.input_field.build_nxgraph( 147 | connectivity={i: 4 for i in range(10)}, 148 | properties=iodata.input_field.data != iodata.output_field.data, 149 | ) 150 | graph_data = list(GraphFeatureExtractor.get_comp_params(GI)) 151 | colors, features, comp_features, ncolors, sizes, properties = list( 152 | zip(*GraphFeatureExtractor.get_data(graph_data)) 153 | ) 154 | 155 | colors = np.asarray([[i == c for i in range(10)] for c in colors]).astype( 156 | np.float 157 | ) 158 | features = (np.stack(features, 0) > 0) * 1.0 159 | comp_features = np.stack(comp_features, 0) 160 | ncolors = (np.stack(ncolors, 0) > 0).astype(np.float) 161 | sizes = np.asarray(sizes).reshape(-1, 1) 162 | targets = np.asarray( 163 | [np.any(list(p)) * 1.0 for p in properties] 164 | ) # np.asarray(targets_bin)#.reshape(-1, 1) 165 | 166 | inputs = np.concatenate([colors, features, comp_features, ncolors, sizes], 1) 167 | 168 | return inputs, targets # , targets_color 169 | 170 | @staticmethod 171 | def prepare_graph_features_for_eval(field, use_zeros=True): 172 | GI = field.build_nxgraph(connectivity={i: 4 for i in range(10)}) 173 | graph_data = list(GraphFeatureExtractor.get_comp_params(GI)) 174 | if not use_zeros: 175 | graph_data = [g for g in graph_data if g["color"] != 0] 176 | inputs = GraphFeatureExtractor.collect_graph_data( 177 | graph_data, use_zeros=use_zeros 178 | ) 179 | 180 | return graph_data, inputs # , targets, targets_color 181 | 182 | 183 | class AvailableEqualShapeAndComponents: 184 | def is_available(self, iodata_list): 185 | for iodata in iodata_list: 186 | if iodata.input_field.shape != iodata.output_field.shape: 187 | return False 188 | for iodata in iodata_list: 189 | GI = iodata.input_field.build_nxgraph( 190 | connectivity={i: 4 for i in range(10)} 191 | ) 192 | GO = iodata.output_field.build_nxgraph( 193 | connectivity={i: 4 for i in range(10)} 194 | ) 195 | equal_shapes_of_components = GraphFeatureExtractor.compare_components( 196 | GI, GO 197 | ) 198 | if not equal_shapes_of_components: 199 | return False 200 | return True 201 | 202 | 203 | class GraphBoostingTreePredictor(Predictor, AvailableEqualShapeAndComponents): 204 | def __init__(self, n_estimators=10): 205 | self.xgb_binary = XGBClassifier( 206 | n_estimators=n_estimators, booster="dart", n_jobs=-1 207 | ) 208 | self.xgb = XGBClassifier( 209 | n_estimators=n_estimators, 210 | booster="dart", 211 | n_jobs=-1, 212 | objective="multi:softmax", 213 | num_class=10, 214 | ) 215 | self.target_encoder = LabelEncoder() 216 | 217 | def train(self, iodata_list): 218 | train_x, train_y_bin, train_y = list( 219 | zip( 220 | *[ 221 | GraphFeatureExtractor.prepare_graph_features(iodata) 222 | for iodata in iodata_list 223 | ] 224 | ) 225 | ) 226 | train_x = np.concatenate(train_x, 0) 227 | train_y_bin = np.concatenate(train_y_bin, 0) 228 | train_y = np.concatenate(train_y, 0) 229 | train_y_encoded = self.target_encoder.fit_transform(train_y) 230 | # print(train_y_bin, train_y) 231 | # feat, target, _ = GraphFeatureExtractor.prepare_graph_features(iodata_list) 232 | self.xgb_binary.fit(train_x, train_y_bin, verbose=0) 233 | self.xgb.fit(train_x, train_y_encoded, verbose=0) 234 | 235 | def predict(self, field): 236 | if isinstance(field, IOData): 237 | for v in self.predict(field.input_field): 238 | yield v 239 | return 240 | # repainter = Repaint(field.data) 241 | prediction_data = np.zeros(field.shape) 242 | graph_data, inputs = GraphFeatureExtractor.prepare_graph_features_for_eval( 243 | field 244 | ) 245 | preds_binary = self.xgb_binary.predict(inputs) 246 | preds_colors_encoded = self.xgb.predict(inputs) # .tolist() 247 | preds_colors = self.target_encoder.inverse_transform(preds_colors_encoded) 248 | # result = repainter(preds).tolist() 249 | for comp, cbin, new_col in zip(graph_data, preds_binary, preds_colors): 250 | color = int(new_col) if cbin > 0.5 else comp["color"] 251 | # if cbin > 0.5: 252 | # print("new color", new_col, "old_color", comp['color']) 253 | for i, j in comp["pos"]: 254 | prediction_data[i, j] = color 255 | 256 | yield Field(prediction_data) 257 | 258 | def __str__(self): 259 | return "GraphBoostingTreePredictor()" 260 | 261 | 262 | class GraphBoostingTreePredictor2(Predictor): 263 | def __init__(self, n_estimators=10): 264 | # self.xgb_binary = XGBClassifier(n_estimators=n_estimators, booster="dart", n_jobs=-1) 265 | # self.xgb = XGBClassifier(n_estimators=n_estimators, booster="dart", n_jobs=-1, 266 | # objective="multi:softmax", num_class=10) 267 | self.xgb_classifiers = [] 268 | self.target_encoders = [] 269 | 270 | def is_available(self, iodata_list): 271 | for iodata in iodata_list: 272 | if iodata.input_field.shape != iodata.output_field.shape: 273 | return False 274 | components = [] 275 | components_nonzero = [] 276 | for iodata in iodata_list: 277 | GI = iodata.input_field.build_nxgraph( 278 | connectivity={i: 4 for i in range(10)} 279 | ) 280 | GO = iodata.output_field.build_nxgraph( 281 | connectivity={i: 4 for i in range(10)} 282 | ) 283 | equal_shapes_of_components = GraphFeatureExtractor.compare_components( 284 | GI, GO 285 | ) 286 | if not equal_shapes_of_components: 287 | return False 288 | compdata = list(GraphFeatureExtractor.get_comp_params(GI)) 289 | components.append(len(compdata)) 290 | components_nonzero.append(len([gi for gi in compdata if gi["color"] != 0])) 291 | self.ncomponents = -1 292 | # print(components, components_nonzero) 293 | if len(components) < 1: 294 | return False 295 | if len(np.unique(components)) == 1: 296 | self.use_zeros = True 297 | self.ncomponents = np.unique(components)[0] 298 | if len(components_nonzero) > 0: 299 | if len(np.unique(components_nonzero)) == 1: 300 | self.use_zeros = False 301 | self.ncomponents = np.unique(components_nonzero)[0] 302 | # [GraphFeatureExtractor.prepare_graph_features(iodata) 303 | # for iodata in iodata_list])) 304 | if self.ncomponents < 1: 305 | return False 306 | return True 307 | 308 | def train(self, iodata_list, n_estimators=20): 309 | # train_x, train_y_bin, train_y = list( 310 | train_sets = [[] for i in range(self.ncomponents)] 311 | for iodata in iodata_list: 312 | features, target_binary, target = ( 313 | GraphFeatureExtractor.prepare_graph_features(iodata, self.use_zeros) 314 | ) 315 | for i in range(min(features.shape[0], self.ncomponents)): 316 | train_sets[i].append((features[i], target_binary[i], target[i])) 317 | # print(len(train_sets)) 318 | for ts in train_sets: 319 | features, target_binary, target = list(zip(*ts)) 320 | features = np.stack(features, 0) 321 | target = np.stack(target, 0) 322 | xgb = XGBClassifier( 323 | n_estimators=n_estimators, 324 | booster="dart", 325 | n_jobs=-1, 326 | objective="multi:softmax", 327 | num_class=10, 328 | ) 329 | te = LabelEncoder() 330 | encoded_target = te.fit_transform(target) 331 | xgb.fit(features, encoded_target) 332 | self.xgb_classifiers.append(xgb) 333 | self.target_encoders.append(te) 334 | 335 | def predict(self, field): 336 | if isinstance(field, IOData): 337 | for v in self.predict(field.input_field): 338 | yield v 339 | return 340 | # repainter = Repaint(field.data) 341 | prediction_data = np.zeros(field.shape) 342 | # print(self.use_zeros) 343 | graph_data, inputs = GraphFeatureExtractor.prepare_graph_features_for_eval( 344 | field, self.use_zeros 345 | ) 346 | if inputs.shape[0] < 1: 347 | return field 348 | all_predictions = [] 349 | # print(inputs.shape, len(graph_data), len(self.xgb_classifiers)) 350 | # print(len(self.xgb_classifiers), inputs.shape) 351 | for i in range(min(inputs.shape[0], self.ncomponents)): 352 | xgb = self.xgb_classifiers[i] 353 | encoded_predictions = xgb.predict([inputs[i]]) 354 | predictions = self.target_encoders[i].inverse_transform(encoded_predictions) 355 | all_predictions.append(predictions) 356 | # result = repainter(preds).tolist() 357 | # TODO: check dimensions 358 | for comp, color in zip(graph_data, all_predictions): 359 | for i, j in comp["pos"]: 360 | prediction_data[i, j] = color 361 | 362 | yield Field(prediction_data) 363 | 364 | def __str__(self): 365 | return "GraphBoostingTreePredictor2()" 366 | 367 | 368 | class GraphBoostingTreePredictor3(Predictor, mixins.AvailableEqualShapeAndLessThanNComponents): 369 | def __init__(self, n_estimators=100, max_components=10): 370 | # self.xgb_binary = XGBClassifier(n_estimators=n_estimators, booster="dart", n_jobs=-1) 371 | # self.xgb = XGBClassifier(n_estimators=n_estimators, booster="dart", n_jobs=-1, 372 | # objective="multi:softmax", num_class=10) 373 | self.n_estimators = n_estimators 374 | self.max_components = max_components 375 | 376 | self.xgb_binary = XGBClassifier( 377 | n_estimators=n_estimators, booster="dart", n_jobs=-1 378 | ) 379 | self.xgb = XGBClassifier( 380 | n_estimators=n_estimators, 381 | booster="dart", 382 | n_jobs=-1, 383 | objective="multi:softmax", 384 | num_class=10, 385 | ) 386 | self.use_zeros = True 387 | self.target_encoder = LabelEncoder() 388 | 389 | def is_available(self, iodata_list): 390 | return super().is_available(iodata_list, n_components=self.max_components) 391 | 392 | def _make_train_binary_features(self, iodata_list): 393 | train_x_binary, train_y_binary = list( 394 | zip( 395 | *[ 396 | GraphFeatureExtractor.prepare_graph_features_diff( 397 | iodata, self.use_zeros 398 | ) 399 | for iodata in iodata_list 400 | ] 401 | ) 402 | ) 403 | train_x_binary = np.concatenate(train_x_binary, 0) 404 | train_y_binary = np.concatenate(train_y_binary, 0) 405 | return train_x_binary, train_y_binary 406 | 407 | def train(self, iodata_list, n_estimators=20): 408 | # train_x, train_y_bin, train_y = list( 409 | train_x_binary, train_y_binary = self._make_train_binary_features(iodata_list) 410 | # feat, target, _ = GraphFeatureExtractor.prepare_graph_features(iodata_list) 411 | self.xgb_binary.fit(train_x_binary, train_y_binary, verbose=0) 412 | # print("binary",train_y_binary) 413 | 414 | feat, target, _ = BTFeatureExtractor.get_features( 415 | iodata_list, features_maker=BTFeatureExtractor.make_features_v2 416 | ) 417 | encoded_target = self.target_encoder.fit_transform(target) 418 | # print(target) 419 | self.xgb.fit(feat, encoded_target, verbose=0) 420 | # next - train xgboost 421 | 422 | def validate_binary(self, iodata_list): 423 | train_x_binary, train_y_binary = self._make_train_binary_features(iodata_list) 424 | return self.xgb_binary.predict(train_x_binary), train_y_binary 425 | 426 | def predict(self, field, return_binary=False): 427 | if isinstance(field, IOData): 428 | for v in self.predict(field.input_field): 429 | yield v 430 | return 431 | # repainter = Repaint(field.data) 432 | nrows, ncols = field.shape 433 | prediction_data = np.zeros(field.shape, dtype=np.uint8) 434 | # print(self.use_zeros) 435 | graph_data, inputs = GraphFeatureExtractor.prepare_graph_features_for_eval( 436 | field, self.use_zeros 437 | ) 438 | if inputs.shape[0] < 1: 439 | # return field 440 | preds_binary = [] 441 | else: 442 | preds_binary = self.xgb_binary.predict(inputs) 443 | 444 | feat = BTFeatureExtractor.make_features_v2(field) 445 | preds = self.xgb.predict(feat) 446 | preds = self.target_encoder.inverse_transform(preds) 447 | preds = preds.reshape(nrows, ncols) 448 | preds = preds.astype(int) # .tolist() 449 | # result = repainter(preds).tolist() 450 | prediction_data = preds 451 | if len(preds) > 0 and np.sum(preds) > 0: 452 | for comp, cbin in zip(graph_data, preds_binary): 453 | # color = int(new_col) if cbin > 0.5 else comp['color'] 454 | # if cbin > 0.5: 455 | # print("new color", new_col, "old_color", comp['color']) 456 | for i, j in comp["pos"]: 457 | if cbin > 0.5: 458 | prediction_data[i, j] = preds[i, j] 459 | else: 460 | prediction_data[i, j] = comp["color"] 461 | if return_binary: 462 | print(111) 463 | yield preds_binary, graph_data, Field(prediction_data) 464 | else: 465 | yield Field(prediction_data) 466 | 467 | def __str__(self): 468 | return "GraphBoostingTreePredictor3()" 469 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/shapes.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | import numpy as np 6 | 7 | from base.field import Field 8 | from base.iodata import IOData 9 | from base.transformers import resize_output, crop_data 10 | 11 | from predictors.basic import * 12 | 13 | # Predictor, AvailableAll, AvailableWithIntMultiplier, AvailableMirror 14 | from predictors.boosting_tree import BoostingTreePredictor 15 | 16 | from operations.basic import Repaint 17 | from operations.resizing import Repeater, Resizer, Fractal, Mirror 18 | # from utils import check_if_can_be_mirrored 19 | import predictors.availability_mixins as mixins 20 | 21 | 22 | class RepeatingPredictor(Predictor, mixins.AvailableWithIntMultiplier): 23 | def __init__(self, args=[], kwargs=dict()): 24 | # self.predictor = predictor_class(*args, **kwargs) 25 | pass 26 | 27 | def train(self, iodata_list): 28 | # self.predictor.train(iodata_list) 29 | pass 30 | 31 | def predict(self, field): 32 | if isinstance(field, IOData): 33 | for v in self.predict(field.input_field): 34 | yield v 35 | return 36 | repeater = Repeater(self.m1, self.m2) 37 | result = repeater(field.data) 38 | yield Field(result) 39 | 40 | def __str__(self): 41 | return f"RepeatingPredictor(m1={self.m1}, m2={self.m2})" 42 | 43 | 44 | class MirrorPredictor(Predictor, mixins.AvailableMirror): 45 | def __init__(self, predictor=BoostingTreePredictor): 46 | self.predictor = predictor() 47 | 48 | def train(self, iodata_list): 49 | self.mirror = Mirror( 50 | self.m1, self.m2, vertical=self.vertical, horizontal=self.horizontal 51 | ) 52 | self.predictor.train(resize_output(iodata_list)) 53 | # train_ds[i].show(predictor=predictor) 54 | 55 | def freeze_by_score(self, iodata_list): 56 | self.predictor.freeze_by_score(resize_output(iodata_list)) 57 | 58 | def predict(self, field): 59 | if isinstance(field, IOData): 60 | for v in self.predict(field.input_field): 61 | yield v 62 | return 63 | repainter = Repaint(field.data) 64 | for prediction in self.predictor.predict(field): 65 | result = self.mirror(prediction.data) 66 | result = repainter(result) 67 | yield Field(result) 68 | 69 | def __str__(self): 70 | return f"ResizingPredictor(m1={self.m1}, m2={self.m2})" 71 | 72 | 73 | class ResizingPredictor(Predictor, mixins.AvailableWithIntMultiplier): 74 | def __init__(self): 75 | pass 76 | 77 | def train(self, iodata_list): 78 | pass 79 | 80 | def predict(self, field): 81 | if isinstance(field, IOData): 82 | for v in self.predict(field.input_field): 83 | yield v 84 | return 85 | resizer = Resizer(self.m1, self.m2) 86 | result = resizer(field.data) 87 | yield Field(result) 88 | 89 | def __str__(self): 90 | return f"ResizingPredictor(m1={self.m1}, m2={self.m2})" 91 | 92 | 93 | class FractalPredictor(Predictor, mixins.AvailableWithIntMultiplier): 94 | def __init__(self): 95 | pass 96 | 97 | def train(self, iodata_list): 98 | pass 99 | 100 | def predict(self, field): 101 | if isinstance(field, IOData): 102 | for v in self.predict(field.input_field): 103 | yield v 104 | return 105 | fractal = Fractal(self.m1, self.m2) 106 | result = fractal(field.data) 107 | yield Field(result) 108 | 109 | def __str__(self): 110 | return f"FractalPredictor(m1={self.m1}, m2={self.m2})" 111 | 112 | 113 | def change_colors(data, background_colors=[]): 114 | colormap = {c: 0 for c in background_colors} 115 | # colormap = dict() 116 | if len(np.unique(data)) == len(background_colors): 117 | colormap = dict() 118 | 119 | current_id = 1 120 | for line in data: 121 | if len(colormap) > 10: 122 | break 123 | for c in line: 124 | if not c in colormap: 125 | colormap[c] = current_id 126 | current_id += 1 # chr(ord(current_id) + 1) 127 | # print(line) 128 | # redraw 129 | # print(colormap) 130 | data_modified = np.asarray([[colormap[c] for c in line] for line in data]) 131 | return data_modified 132 | 133 | 134 | def process_iodata_input(iodata, pattern, crop_func=crop_data): 135 | i = iodata.input_field 136 | o = iodata.output_field 137 | # bg = {c: 0 for c in o.data[np.where(pattern == 0)]} 138 | bg = dict(list(zip(*np.stack([o.data, pattern], 0).reshape(2, -1)))) 139 | # current_id = 1 140 | cropped_data = crop_func(i.data) 141 | # #print(cropped_data, id(i.data)) 142 | # for line in cropped_data: 143 | # for x in line: 144 | # if x in bg: 145 | # continue 146 | # bg[x] = current_id 147 | # current_id += 1 148 | # return np.asarray([ [ bg[x] for x in line ] for line in cropped_data ]) 149 | data = [[bg.get(x, 0) for x in line] for line in cropped_data] 150 | data = Field(data) 151 | return iodata.reconstruct(data).data 152 | 153 | 154 | class ConstantShaper(Predictor): 155 | def __init__(self): 156 | self.pattern = None 157 | 158 | def is_available(self, iodata_list): 159 | colormaps = [ 160 | change_colors( 161 | iodata.output_field.data, 162 | background_colors=[ 163 | c 164 | for c in range(10) 165 | if np.sum(iodata.input_field.data == c) 166 | >= np.sum(iodata.output_field.data == c) 167 | ], 168 | ) 169 | for iodata in iodata_list 170 | ] 171 | if len(colormaps) < 1: 172 | return False 173 | shapes = {c.shape for c in colormaps} 174 | if len(shapes) != 1: 175 | return False 176 | # print(colormaps, np.stack(colormaps).std()) 177 | # print((np.unique(np.stack(colormaps)))) 178 | if ( 179 | np.stack(colormaps).std(0).max() > 0 180 | or len(np.unique(np.stack(colormaps))) == 1 181 | ): 182 | for background_colors in [[]] + [[i] for i in range(10)]: 183 | colormaps = [ 184 | change_colors( 185 | iodata.output_field.data, background_colors=background_colors 186 | ) 187 | for iodata in iodata_list 188 | ] 189 | # print(background_colors, colormaps) 190 | if len(colormaps) < 1: 191 | return False 192 | shapes = {c.shape for c in colormaps} 193 | if len(shapes) != 1: 194 | return False 195 | if ( 196 | np.stack(colormaps).std(0).max() <= 0 197 | and np.stack(colormaps).std() > 0 198 | ): 199 | break 200 | # print(np.stack(colormaps).std(0)) 201 | if np.stack(colormaps).std(0).max() > 0: 202 | return False 203 | self.pattern = colormaps[0] 204 | # if self.pattern.std() == 0: 205 | # return False 206 | return True 207 | 208 | def train(self, iodata_list): 209 | self.input_pattern = None 210 | self.crop_func = lambda x: x 211 | if self.pattern.std() == 0: 212 | return 213 | 214 | for crop_func in [lambda x: x, crop_data]: 215 | colormap = [ 216 | process_iodata_input(iodata, self.pattern, crop_func) 217 | for iodata in iodata_list 218 | ] 219 | # print(colormap) 220 | if len({x.shape for x in colormap}) == 1: 221 | break 222 | # print({ x.shape for x in colormap }, colormap) 223 | if len({x.shape for x in colormap}) == 1: 224 | self.crop_func = crop_func 225 | 226 | if np.stack(colormap, 0).std(0).max() <= 0.0: 227 | self.input_pattern = colormap[0] 228 | # self.input_pattern = None 229 | # # actual training is done in is_available method 230 | # for iodata in iodata_list: 231 | # i = iodata.input_field 232 | # o = iodata.output_field 233 | # np.stack([o.data, self.pattern], 0) 234 | # pass 235 | pass 236 | 237 | def predict(self, field): 238 | if isinstance(field, IOData): 239 | for v in self.predict(field.input_field): 240 | yield v 241 | # yield field.reconstruct(v) 242 | return 243 | if self.input_pattern is not None: 244 | color_convertor = dict( 245 | set( 246 | list( 247 | zip( 248 | *np.stack( 249 | [self.input_pattern, self.crop_func(field.data)], 0 250 | ).reshape(2, -1) 251 | ) 252 | ) 253 | ) 254 | ) 255 | result = np.asarray( 256 | [[color_convertor.get(x, x) for x in line] for line in self.pattern] 257 | ) 258 | result = Field(result) 259 | yield result # field.reconstruct(result) 260 | # return 261 | data = self.crop_func(field.data) 262 | h, w = data.shape 263 | h = min(self.pattern.shape[0], h) 264 | w = min(self.pattern.shape[1], w) 265 | ss = self.pattern[:h, :w] 266 | background_colors = np.unique(data[np.where(ss == 0)]) 267 | colormap = { 268 | i: [ 269 | c 270 | for c in np.unique(data[np.where(ss == i)]) 271 | if i == 0 or c not in background_colors 272 | ] 273 | for i in np.unique(ss) 274 | } 275 | result = np.zeros(self.pattern.shape, dtype=np.uint8) 276 | result[:h, :w] = data[:h, :w] 277 | for key in colormap: 278 | if key == 0: 279 | continue 280 | value = colormap[key] 281 | if len(value) < 1: 282 | continue 283 | coords = np.where(self.pattern == key) 284 | result[coords] = value[0] 285 | yield Field(result) # field.reconstruct(Field(result)) 286 | 287 | def __str__(self): 288 | return "ConstantShaper" 289 | -------------------------------------------------------------------------------- /kaggle_arc/predictors/subpattern.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) 4 | 5 | from xgboost import XGBClassifier 6 | 7 | from operations.reversible import * 8 | 9 | from predictors.basic import * # BTFeatureExtractor, BoostingTreePredictor2 10 | from predictors.boosting_tree import * 11 | import predictors.availability_mixins as mixins 12 | 13 | 14 | class SubpatternMatcher: 15 | @staticmethod 16 | def get_separator_length(sequence, size): 17 | if len(sequence) == 0: 18 | return None 19 | if len(sequence) == 1 and size // 2 == sequence[0]: 20 | return sequence[0] 21 | w = sequence[0] 22 | if w <= 1 or w > size // 2: 23 | return None 24 | xlast = w 25 | for x in sequence[1:]: 26 | if x != xlast + w + 1: 27 | return None 28 | return w 29 | 30 | @staticmethod 31 | def get_separating_lines(i): 32 | for c in np.unique(i.flatten()): 33 | bmap = i == c 34 | s0 = bmap.std(0) == 0 35 | s1 = bmap.std(1) == 0 36 | s0 = bmap.all(0) * s0 37 | s1 = bmap.all(1) * s1 38 | 39 | # print(bmap[:, s0==0]) 40 | s0 = np.argwhere(s0).flatten() 41 | s1 = np.argwhere(s1).flatten() 42 | r0 = SubpatternMatcher.get_separator_length(s0, i.shape[1]) 43 | r1 = SubpatternMatcher.get_separator_length(s1, i.shape[0]) 44 | if r0 is None and r1 is None: 45 | # yield c, i.shape[0], i.shape[1] 46 | continue 47 | if r0 is None: 48 | yield c, r1, i.shape[1] 49 | continue 50 | if r1 is None: 51 | yield c, i.shape[0], r0 52 | continue 53 | yield c, r1, r0 54 | 55 | @staticmethod 56 | def get_availability_param(iodata): 57 | i = iodata.input_field.data 58 | o = iodata.output_field.data 59 | isep = {(h, w): c for c, h, w in SubpatternMatcher.get_separating_lines(i)} 60 | osep = {(h, w): c for c, h, w in SubpatternMatcher.get_separating_lines(o)} 61 | 62 | common_areas = isep.keys() & osep.keys() 63 | if len(common_areas) < 1: 64 | return None 65 | return {k: (isep[k], osep[k]) for k in common_areas} 66 | 67 | @staticmethod 68 | def process_iodata_list(iodata_list): 69 | all_params = [] 70 | total = set() 71 | for t in iodata_list: 72 | param = SubpatternMatcher.get_availability_param(t) 73 | if param is None: 74 | return set(), [] 75 | for k in param.keys(): 76 | total.add(k) 77 | all_params.append(param) 78 | sizes = {k for k in total if np.all([k in x for x in all_params])} 79 | all_params = [{k: x[k] for k in sizes} for x in all_params] 80 | return sizes, all_params 81 | 82 | 83 | class SubpatternMatcherPredictor(Predictor): 84 | 85 | def __init__(self): 86 | self.xgb = XGBClassifier( 87 | n_estimators=10, 88 | booster="dart", 89 | n_jobs=-1, 90 | objective="multi:softmax", 91 | num_class=10, 92 | ) 93 | pass 94 | 95 | def is_available(self, iodata_list): 96 | for iodata in iodata_list: 97 | if iodata.input_field.shape != iodata.output_field.shape: 98 | return False 99 | # m1 = iodata.output_field.shape # iodata.input_field.height // iodata.output_field.height 100 | # m2 = iodata.output_field.width # iodata.input_field.width // iodata.output_field.width 101 | # all_sizes.add((m1, m2)) 102 | sizes, params = SubpatternMatcher.process_iodata_list(iodata_list) 103 | if len(sizes) < 1: 104 | return False 105 | self.sizes = sizes 106 | self.params = params 107 | (h, w) = list(sizes)[0] 108 | self.op = WrappedOperation( 109 | ReversibleSplit( 110 | (h, w), hsep=1, wsep=1, outer_sep=False, splitter_func=split_by_shape 111 | ), 112 | ReversibleCombine( 113 | (h, w), 114 | hsep=1, 115 | wsep=1, 116 | outer_sep=False, 117 | sep_color=5, 118 | splitter_func=split_by_shape, 119 | ), 120 | ) 121 | # self.op.train(iodata_list) 122 | return True 123 | 124 | def train(self, iodata_list): 125 | all_samples = [] 126 | self.op.train(iodata_list) 127 | for iodata in iodata_list: 128 | i, o = self.op.wrap(iodata) 129 | all_samples.append((i, o)) 130 | all_samples = [ 131 | (xi, xo) 132 | for (i, o) in all_samples 133 | for xi, xo in zip(i.flat_iter(), o.flat_iter()) 134 | # for li, lo in zip(i, o) 135 | # for xi, xo in zip(li, lo) 136 | ] 137 | # print(all_samples) 138 | feat, target, _ = BTFeatureExtractor.get_features( 139 | all_samples, features_maker=BTFeatureExtractor.make_features_v3 140 | ) 141 | # print(feat.shape, target.shape) 142 | self.xgb.fit(feat, target, verbose=-1) 143 | 144 | def predict(self, field): 145 | if isinstance(field, IOData): 146 | for v in self.predict(field.input_field): 147 | yield v 148 | return 149 | # repainter = Repaint(field.data) 150 | 151 | feature_field, postprocess = self.op.run(field) 152 | 153 | # print(feature_field) 154 | def predict_on_subfield(x): 155 | nrows, ncols = x.shape 156 | feat = BTFeatureExtractor.make_features_v3(x) 157 | preds = self.xgb.predict(feat).reshape(nrows, ncols) 158 | preds = preds.astype(int) # .tolist() 159 | # print(x.data) 160 | return Field(preds) 161 | 162 | lines = feature_field.map(predict_on_subfield) 163 | result = postprocess(lines) 164 | yield result 165 | pass 166 | -------------------------------------------------------------------------------- /kaggle_arc/scripts/0_idpredictor_on_test_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | from constants import * 5 | from base.field import Field 6 | 7 | from utils import * 8 | from base.field import * 9 | 10 | from predictors.basic import IdPredictor 11 | 12 | 13 | datasets = read_datasets(DATADIR) 14 | train_ds, eval_ds, test_ds = [convert2samples(x) for x in datasets] 15 | 16 | # predictor = IdPredictor() 17 | 18 | save_predictions(IdPredictor, test_ds, TEST_SAVEPATH) 19 | -------------------------------------------------------------------------------- /kaggle_arc/scripts/1_complexpredictor_on_test_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | from constants import * 5 | from base.field import Field 6 | 7 | from utils import * 8 | from base.field import * 9 | 10 | from predictors.basic import IdPredictor, ZerosPredictor, ConstPredictor 11 | from predictors.complex import ComplexPredictor 12 | 13 | 14 | datasets = read_datasets(DATADIR) 15 | train_ds, eval_ds, test_ds = [convert2samples(x) for x in datasets] 16 | 17 | # predictor = IdPredictor() 18 | predictor_args = [IdPredictor, ZerosPredictor] 19 | for i in range(1, 10): 20 | predictor_args.append((ConstPredictor, [], {"value": i})) 21 | 22 | save_predictions(ComplexPredictor, test_ds, TEST_SAVEPATH, k=3, args=[predictor_args]) 23 | -------------------------------------------------------------------------------- /kaggle_arc/scripts/2_complexpredictor_coloring.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | from constants import * 5 | from base.field import Field 6 | 7 | from utils import * 8 | from base.field import * 9 | 10 | from predictors.basic import IdPredictor, ZerosPredictor, ConstPredictor, FillPredictor 11 | from predictors.complex import ComplexPredictor 12 | from predictors.color_counting import ColorCountingPredictor 13 | from predictors.shapes import ( 14 | RepeatingPredictor, 15 | FractalPredictor, 16 | ResizingPredictor, 17 | MirrorPredictor, 18 | ConstantShaper, 19 | ) 20 | from predictors.boosting_tree import ( 21 | BoostingTreePredictor, 22 | BoostingTreePredictor2, 23 | BoostingTreePredictor3, 24 | ) 25 | from predictors.convolution import ConvolutionPredictor 26 | from predictors.graph_boosting_tree import ( 27 | GraphBoostingTreePredictor, 28 | GraphBoostingTreePredictor2, 29 | GraphBoostingTreePredictor3, 30 | ) 31 | from predictors.decision_tree import AugmentedPredictor 32 | from predictors.subpattern import SubpatternMatcherPredictor 33 | from predictors.connector import PointConnectorPredictor 34 | from predictors.cam_predictor import * 35 | from predictors.connector import PointConnectorPredictor 36 | from predictors.cf_combinator import WrappedCFPredictor 37 | 38 | datasets = read_datasets(DATADIR) 39 | train_ds, eval_ds, test_ds = [convert2samples(x) for x in datasets] 40 | 41 | # predictor = IdPredictor() 42 | predictor_args = [ 43 | IdPredictor, 44 | ZerosPredictor, 45 | # ColorCountingPredictor, 46 | RepeatingPredictor, 47 | FractalPredictor, 48 | ResizingPredictor, 49 | # GraphBoostingTreePredictor,#no impact 50 | # GraphBoostingTreePredictor3, 51 | # ConstantShaper, 52 | # BoostingTreePredictor, 53 | # BoostingTreePredictor2, 54 | BoostingTreePredictor3, 55 | SubpatternMatcherPredictor, 56 | # GraphBoostingTreePredictor2, 57 | # PointConnectorPredictor, 58 | # AugmentedPredictor, 59 | # FillPredictor, 60 | WrappedCFPredictor, 61 | MirrorPredictor, 62 | # (ConvolutionPredictor, [], {'loss': 'mse'}), 63 | # (ConvolutionPredictor, [], {'loss': 'dice'}) 64 | ] 65 | # for i in range(1, 10): 66 | # predictor_args.append((ConstPredictor, [], {'value': i})) 67 | 68 | save_predictions(ComplexPredictor, test_ds, TEST_SAVEPATH, k=3, args=[predictor_args]) 69 | -------------------------------------------------------------------------------- /kaggle_arc/scripts/convert2ipynb.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | import sys 5 | import networkx as nx 6 | import subprocess 7 | import matplotlib 8 | 9 | try: 10 | if not matplotlib.is_interactive(): 11 | matplotlib.use("svg") 12 | except: 13 | pass 14 | 15 | import matplotlib.pyplot as plt 16 | 17 | TEMPLATE = { 18 | "cells": [], 19 | "metadata": { 20 | "kernelspec": { 21 | "display_name": "Python 3", 22 | "language": "python", 23 | "name": "python3", 24 | }, 25 | "language_info": { 26 | "codemirror_mode": {"name": "ipython", "version": 3}, 27 | "file_extension": ".py", 28 | "mimetype": "text/x-python", 29 | "name": "python", 30 | "nbconvert_exporter": "python", 31 | "pygments_lexer": "ipython3", 32 | "version": "3.7.3", 33 | }, 34 | }, 35 | "nbformat": 4, 36 | "nbformat_minor": 2, 37 | } 38 | 39 | 40 | def filter_imports(lines, debug=False): 41 | ids = dict() 42 | for i, l in enumerate(lines): 43 | if l.startswith("import "): 44 | ids[i] = l[6:].strip().split()[0] 45 | continue 46 | import_pos = l.find(" import ") 47 | if import_pos < 0: 48 | continue 49 | package = l[:import_pos].split()[1] 50 | ids[i] = package 51 | if debug: 52 | print(ids) 53 | return ids 54 | 55 | 56 | def filter_local(ids, local_dirs=[], basedir="..", debug=False): 57 | result = dict() 58 | 59 | for k, v in ids.items(): 60 | path = os.path.join(basedir, v.replace(".", os.sep)) 61 | if not (path + ".py" in local_dirs): 62 | continue 63 | 64 | # path = os.path.join(basedir, s) 65 | if os.path.exists(path + ".py"): 66 | result[k] = (v, path + ".py") 67 | if os.path.exists(path) and os.path.isdir(path): 68 | if os.path.exists(path + "/__init__.py"): 69 | result[k] = (v, path + "/__init__.py") 70 | return result 71 | 72 | 73 | def filter_local_and_remote(ids, local_dirs=[], basedir="..", debug=False): 74 | local_imports = dict() 75 | remote_imports = dict() 76 | 77 | for k, v in ids.items(): 78 | path = os.path.join(basedir, v.replace(".", os.sep)) 79 | if not (path + ".py" in local_dirs): 80 | remote_imports[k] = (v, path) 81 | continue 82 | 83 | # path = os.path.join(basedir, s) 84 | if os.path.exists(path + ".py"): 85 | local_imports[k] = (v, path + ".py") 86 | if os.path.exists(path) and os.path.isdir(path): 87 | if os.path.exists(path + "/__init__.py"): 88 | local_imports[k] = (v, path + "/__init__.py") 89 | return local_imports, remote_imports 90 | 91 | 92 | def walk_deps( 93 | filename, 94 | processed=set(), 95 | local_dirs=[], 96 | basedir="..", 97 | debug=False, 98 | split_header=True, 99 | ): 100 | with open(filename) as f: 101 | lines = f.readlines() 102 | # local_dirs = [ 103 | # os.path.join(base, f) for base, dirs, files in os.walk(basedir) 104 | # for f in files 105 | # if os.path.splitext(f)[-1]==".py"] 106 | header_lines = [filename] 107 | if split_header: 108 | if lines[0].strip() == '"""': 109 | for i in range(1, len(lines)): 110 | pos = lines[i].strip().find('"""') 111 | if pos >= 0: 112 | break 113 | if pos >= 0: 114 | header_lines = lines[1:i] 115 | header_lines.append(lines[i][:pos]) 116 | lines = lines[i + 1 :] 117 | 118 | # print(local_files) 119 | ids = filter_imports(lines, debug=debug) 120 | local_imports, remote_imports = filter_local_and_remote( 121 | ids, local_dirs=local_dirs, basedir=basedir, debug=debug 122 | ) 123 | # ids_ = { k: (package, path) 124 | # for k, (package, path) in ids.items() 125 | # #if not path in processed 126 | # } 127 | # if len(ids_) < 1: 128 | remote_import_lines = [lines[k] for k in remote_imports] 129 | lines = [l for i, l in enumerate(lines) if not i in ids] 130 | yield filename, header_lines, lines, local_imports, remote_import_lines 131 | 132 | # lines = [ l for i, l in enumerate(lines) if not i in ids ] 133 | paths = set() 134 | for k, (package, path) in local_imports.items(): 135 | if path in processed: 136 | continue 137 | paths.add(path) 138 | new_processed = set() 139 | for path in paths: 140 | new_processed.add(path) 141 | for w in walk_deps( 142 | path, {*processed, *new_processed}, local_dirs=local_dirs, basedir=basedir 143 | ): 144 | yield w 145 | 146 | 147 | def make_graph(start_file="../scripts/runner.py", basedir=".."): 148 | data = [] 149 | nodes = dict() 150 | node_names = [] 151 | local_dirs = [ 152 | os.path.join(base, f) 153 | for base, dirs, files in os.walk(basedir) 154 | for f in files 155 | if os.path.splitext(f)[-1] == ".py" 156 | ] 157 | # print(local_dirs) 158 | all_remote_imports = set() 159 | G = nx.DiGraph() 160 | for file, header_lines, lines, deps, remote_import_lines in walk_deps( 161 | start_file, local_dirs=local_dirs, basedir=basedir, debug=False 162 | ): 163 | 164 | all_remote_imports.update(remote_import_lines) 165 | 166 | dependencies = set([dep for i, (package, dep) in deps.items()]) 167 | # print("-"*10) 168 | # print(file, dependencies, deps) 169 | if file not in nodes: 170 | nodes[file] = len(nodes) 171 | node_names.append(file) 172 | for d in dependencies: 173 | if not d in node_names: 174 | nodes[d] = len(nodes) 175 | node_names.append(d) 176 | if nodes[file] in G.nodes: 177 | G.nodes[nodes[file]]["lines"] = lines 178 | G.nodes[nodes[file]]["name"] = file 179 | G.nodes[nodes[file]]["header"] = header_lines 180 | else: 181 | G.add_node(nodes[file], lines=lines, name=file, header=header_lines) 182 | for d in dependencies: 183 | if not nodes[d] in G.nodes: 184 | G.add_node(nodes[d], name=d) 185 | e = (nodes[d], nodes[file]) 186 | if not e in G.edges: 187 | G.add_edge(*e) 188 | index = len(G.nodes) 189 | 190 | G.add_node(index, name="All imports", lines=sorted(all_remote_imports)) 191 | for i in range(index): 192 | G.add_edge(index, i) 193 | return G 194 | 195 | 196 | class DepGraph: 197 | def __init__(self, mainpy, basedir="."): 198 | self.graph = make_graph(mainpy, basedir=basedir) 199 | 200 | def sorted_files(self): 201 | for i in nx.topological_sort(self.graph): 202 | header = self.graph.nodes[i].get("header", []) 203 | lines = self.graph.nodes[i].get("lines", []) 204 | name = self.graph.nodes[i].get("name", []) 205 | if len(lines) < 1: 206 | continue 207 | yield header, name, lines 208 | 209 | def draw(self): 210 | pos = nx.spring_layout(self.graph) 211 | nx.draw(self.graph, pos=pos) 212 | labels = {i: self.graph.nodes[i]["name"] for i in self.graph.nodes} 213 | nx.draw_networkx_labels(self.graph, pos=pos, labels=labels) 214 | plt.savefig("filename.png") 215 | 216 | 217 | def read_file(file_path): 218 | with open(file_path) as f: 219 | lines = f.readlines() 220 | return lines 221 | 222 | 223 | def strip_lines(lines): 224 | i = 0 225 | j = 0 226 | for i in range(len(lines)): 227 | if len(lines[i].strip()) > 0: 228 | break 229 | for j in range(len(lines) - 1, i, -1): 230 | if len(lines[j].strip()) > 0: 231 | break 232 | return lines[i : j + 1] 233 | 234 | 235 | def wrap2cell(data, ctype="code"): 236 | code_params = {"code": {"execution_count": 0, "outputs": []}} 237 | base = {"cell_type": ctype, "metadata": {}, "source": data} 238 | return {**base, **code_params.get(ctype, {})} 239 | 240 | 241 | if __name__ == "__main__": 242 | parser = argparse.ArgumentParser() 243 | parser.add_argument( 244 | "mainpy", help="path or name of main file which contains runnable code" 245 | ) 246 | parser.add_argument("savepath", help="path where .ipynb notebook should be saved") 247 | parser.add_argument( 248 | "--draw", 249 | action="store_true", 250 | help="use this flag to indicate that graph should be saved", 251 | ) 252 | args = parser.parse_args() 253 | 254 | graph = DepGraph(args.mainpy) 255 | if args.draw: 256 | graph.draw() 257 | git_hash = ( 258 | subprocess.check_output(["git", "rev-parse", "HEAD"]).decode().strip() 259 | ) # , stdout=subprocess.PIPE) 260 | cmd = " ".join(sys.argv) 261 | header_text = f""" 262 | This file was autogenerated from code at my github repo. 263 | - Main script: `{args.mainpy}` 264 | - Commit hash: {git_hash} 265 | 266 | I generated this with the command (using python 3.7): 267 | ``` 268 | python {cmd} 269 | ``` 270 | """ 271 | # header_text = [linefor line in header_text.split("\n")] 272 | TEMPLATE["cells"].append(wrap2cell([header_text], ctype="markdown")) 273 | for header, name, lines in graph.sorted_files(): 274 | stripped_header = strip_lines(header) 275 | if len(stripped_header) > 0: 276 | TEMPLATE["cells"].append(wrap2cell(stripped_header, ctype="markdown")) 277 | lines = strip_lines(lines) 278 | if len(lines) > 0: 279 | TEMPLATE["cells"].append(wrap2cell(lines)) 280 | 281 | with open(args.savepath, "w") as f: 282 | json.dump(TEMPLATE, f, indent=2) 283 | -------------------------------------------------------------------------------- /kaggle_arc/scripts/predictor_validator.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import pandas as pd 4 | import json 5 | import argparse 6 | 7 | from constants import * 8 | from base.field import Field 9 | 10 | from utils import * 11 | from base.field import * 12 | 13 | from predictors.basic import ( 14 | IdPredictor, 15 | ZerosPredictor, 16 | ConstPredictor, 17 | FillPredictor, 18 | Predictor, 19 | ) 20 | from predictors.complex import ComplexPredictor 21 | from predictors.color_counting import ColorCountingPredictor 22 | from predictors.shapes import ( 23 | RepeatingPredictor, 24 | FractalPredictor, 25 | ResizingPredictor, 26 | MirrorPredictor, 27 | ConstantShaper, 28 | ) 29 | from predictors.boosting_tree import ( 30 | BoostingTreePredictor, 31 | BoostingTreePredictor2, 32 | BoostingTreePredictor3, 33 | ) 34 | from predictors.convolution import ConvolutionPredictor 35 | from predictors.graph_boosting_tree import ( 36 | GraphBoostingTreePredictor, 37 | GraphBoostingTreePredictor2, 38 | GraphBoostingTreePredictor3, 39 | ) 40 | from predictors.decision_tree import AugmentedPredictor 41 | from predictors.subpattern import SubpatternMatcherPredictor 42 | from predictors.field2point import * 43 | from predictors.complex import DefaultComplexPredictor 44 | from predictors.cam_predictor import * 45 | from predictors.connector import * 46 | from predictors.connector import PointConnectorPredictor 47 | from predictors.cf_combinator import * 48 | 49 | datasets = read_datasets(DATADIR) 50 | train_ds, eval_ds, test_ds = [convert2samples(x) for x in datasets] 51 | 52 | 53 | def evaluate_on_dataset( 54 | predictor_class, 55 | ds, 56 | cutoff=1.0, 57 | draw_results=True, 58 | imagedir="../temp/images", 59 | dataset_id=0, 60 | ): 61 | nsamples = 0 62 | train1 = 0 63 | test1 = 0 64 | params = {} 65 | params["total"] = len(ds) 66 | params["train_score"] = dict() 67 | params["test_score"] = dict() 68 | for i, sample in enumerate(ds): 69 | predictor = predictor_class() 70 | if not predictor.is_available(sample.train): 71 | continue 72 | nsamples += 1 73 | predictor.train(sample.train) 74 | predictor.freeze_by_score(sample.train) 75 | score_train = predictor.validate(sample.train) 76 | # print(score_train) 77 | params["train_score"][i] = score_train 78 | score_test = predictor.validate(sample.test) 79 | params["test_score"][i] = score_test 80 | if score_train >= cutoff: 81 | train1 += 1 82 | if score_test >= cutoff: 83 | test1 += 1 84 | if draw_results: # and score_train == 1 and score_test < 1: 85 | title = f"Image {i}: train={score_train:2.2f}, test={score_test:2.2f}\n" 86 | sample.show(predictor=predictor, title=title) 87 | # bbox_inches='tight', 88 | # plt.tight_layout() 89 | plt.savefig( 90 | os.path.join( 91 | imagedir, 92 | f"image_{dataset_id}_{score_train:0.2f}_{score_test:0.2f}_{i:03d}.png", 93 | ) 94 | ) 95 | plt.close("all") 96 | 97 | return train1, test1, nsamples, params 98 | 99 | 100 | if len(sys.argv) < 1: 101 | print("no predictor classes were provided") 102 | 103 | names = sys.argv[1:] + [ 104 | n + "Predictor" for n in sys.argv[1:] if n.find("Predictor") < 0 105 | ] 106 | savedir = "../temp/eval" 107 | if not os.path.exists(savedir): 108 | os.makedirs(savedir) 109 | 110 | for name in names: 111 | if not name in globals(): 112 | # print(f"{name} predictor not found") 113 | continue 114 | predictor_class = globals()[name] 115 | imagedir = os.path.join("../temp/images", name) 116 | if not os.path.exists(imagedir): 117 | os.makedirs(imagedir) 118 | # if not isinstance(predictor_class, Predictor): 119 | # print(f"{name} is not a predictor") 120 | # continue 121 | all_results = [name] 122 | for i, ds in enumerate([train_ds, eval_ds]): 123 | result = evaluate_on_dataset( 124 | predictor_class, ds, cutoff=1.0, imagedir=imagedir, dataset_id=i 125 | ) 126 | params = result[-1] 127 | with open(os.path.join(savedir, f"{name}_{i}.json"), "w") as f: 128 | json.dump(params, f) 129 | result = result[:-1] 130 | result = " / ".join(([f"{r:d}" for r in result])) 131 | all_results.append(result) 132 | all_results = " | ".join(all_results) 133 | print(all_results) 134 | -------------------------------------------------------------------------------- /kaggle_arc/utils.py: -------------------------------------------------------------------------------- 1 | import rootutils 2 | 3 | root = rootutils.setup_root( 4 | __file__, indicator=".project-root", pythonpath=True, cwd=False 5 | ) 6 | 7 | import json 8 | import os 9 | from collections import OrderedDict 10 | 11 | import pandas as pd 12 | import numpy as np 13 | from pathlib import Path 14 | 15 | from base.iodata import Sample 16 | from base.field import Field 17 | 18 | 19 | def read_single_dataset(basedir, prefix): 20 | if isinstance(basedir, str): 21 | basedir = Path(basedir) 22 | challenges_file = basedir / f"{prefix}_challenges.json" 23 | solutions_file = basedir / f"{prefix}_solutions.json" 24 | if not challenges_file.exists(): 25 | return None 26 | with open(challenges_file.as_posix()) as f: 27 | puzzles = json.load(f) 28 | solutions = {} 29 | if solutions_file.exists(): 30 | with open(solutions_file.as_posix()) as f: 31 | solutions = json.load(f) 32 | # we are interested in puzzles with corresponding solutions, so we'll ignore solutions 33 | # which are not present in the puzzles file 34 | puzzle_id_list = sorted(puzzles) 35 | records = [] 36 | for puzzle_id in puzzle_id_list: 37 | puzzle = puzzles[puzzle_id] 38 | solution = solutions.get(puzzle_id) 39 | records.append((puzzle_id, (puzzle, solution))) 40 | return records 41 | 42 | 43 | def read_datasets_old(basedir="../data"): 44 | train_dir = os.path.join(basedir, "training") 45 | train_data = OrderedDict( 46 | (os.path.splitext(x)[0], os.path.join(train_dir, x)) 47 | for x in os.listdir(train_dir) 48 | ) 49 | eval_dir = os.path.join(basedir, "evaluation") 50 | eval_data = OrderedDict( 51 | (os.path.splitext(x)[0], os.path.join(eval_dir, x)) 52 | for x in os.listdir(eval_dir) 53 | ) 54 | test_dir = os.path.join(basedir, "test") 55 | test_data = OrderedDict( 56 | (os.path.splitext(x)[0], os.path.join(test_dir, x)) 57 | for x in os.listdir(test_dir) 58 | ) 59 | return train_data, eval_data, test_data 60 | 61 | 62 | def read_datasets( 63 | basedir="../data", 64 | train_prefix="arc-agi_training", 65 | eval_prefix="arc-agi_evaluation", 66 | test_prefix="arc-agi_test", 67 | ): 68 | checked_path = Path(basedir) / "training" 69 | if checked_path.exists(): 70 | return read_datasets_old(basedir) 71 | 72 | train_data = read_single_dataset(basedir, prefix=train_prefix) 73 | eval_data = read_single_dataset(basedir, prefix=eval_prefix) 74 | test_data = read_single_dataset(basedir, prefix=test_prefix) 75 | return train_data, eval_data, test_data 76 | 77 | 78 | def convert2samples(data): 79 | if isinstance(data, OrderedDict): 80 | return [Sample(name, path) for name, path in data.items()] 81 | return [Sample(name, puzzle_data) for name, puzzle_data in data] 82 | 83 | 84 | def save_predictions( 85 | predictor, ds, savepath, k=3, args=[], kwargs=dict(), verbose=True 86 | ): 87 | all_data = [] 88 | for name, i, prediction in predictor.predict_on( 89 | ds, k=k, args=args, kwargs=kwargs, verbose=verbose 90 | ): 91 | if isinstance(prediction, Field): 92 | preds = [str(prediction)] * k 93 | if isinstance(prediction, list): 94 | preds = [str(p) for p in prediction] 95 | if len(preds) < k: 96 | preds = (preds * k)[:k] 97 | preds = " ".join(preds) 98 | all_data.append({"output_id": f"{name}_{i}", "output": preds}) 99 | pd.DataFrame(all_data, columns=["output_id", "output"]).to_csv(savepath, index=None) 100 | 101 | 102 | def check_if_can_be_mirrored(data, h=14, w=9): 103 | # w, h = iodata.input_field.shape 104 | sample = data[:h, :w] 105 | h1, w1 = data.shape 106 | m1, m2 = h1 // h, w1 // w 107 | buf = dict() 108 | buf[(0, 0)] = sample 109 | for i in range(m1): 110 | for j in range(m2): 111 | if i == 0 and j == 0: 112 | continue 113 | current = data[i * h : i * h + h, j * w : j * w + w] 114 | p = (i % 2, j % 2) 115 | # print(p, h, w) 116 | if p in buf: 117 | if not np.all(buf[p] == current): 118 | return None 119 | else: 120 | buf[p] = current 121 | a1 = np.all(sample == buf[0, 1]) 122 | a2 = np.all(sample == buf[1, 0]) 123 | a3 = np.all(sample == buf[1, 1]) 124 | if a1 and a2 and a3: 125 | return (False, False) 126 | b1 = np.all(sample[:, ::-1] == buf[0, 1]) 127 | b2 = np.all(sample[::-1, :] == buf[1, 0]) 128 | b3 = np.all(buf[1, 1] == buf[1, 0]) 129 | b4 = np.all(buf[1, 1] == buf[0, 1]) 130 | b5 = np.all(sample[::-1, ::-1] == buf[1, 1]) 131 | if b1 and b2 and b5: 132 | return (True, True) 133 | if b1 and a2 and b4: 134 | return (False, True) 135 | if b2 and a1 and b3: 136 | return (True, False) 137 | return None 138 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "kaggle_arc" 7 | authors = [ 8 | {name = "Tatiana Malygina"}, 9 | ] 10 | version = "0.0.1" 11 | dependencies = [ 12 | "numpy", 13 | "pandas", 14 | "rootutils", 15 | "scikit-learn", 16 | "scikit-image", 17 | "networkx", 18 | "xgboost", 19 | "torch" 20 | ] 21 | readme = "readme.md" 22 | requires-python = ">=3.8" 23 | 24 | [tool.setuptools.packages] 25 | find = {} 26 | 27 | [tool.setuptools.package-data] 28 | "*" = [".project-root"] -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | 2 | ![Project image](https://github.com/latticetower/kaggle-arc/blob/main/images/ministry_of_predictors.png) 3 | 4 | # Description 5 | 6 | Legacy code with my solutions from https://www.kaggle.com/c/abstraction-and-reasoning-challenge. 7 | 8 | I'm refactoring it to reuse in the ongoing kaggle ARC Prize competition. 9 | 10 | It is not guaranteed that there are no typos or missing code pieces. If you use this, use at your own risk, especially predictor classes which use xgboost (I've noticed that they crash kaggle notebook on some data samples, but haven't figured out how to fix). 11 | 12 | ## Installation & usage 13 | ```bash 14 | git clone https://github.com/latticetower/kaggle-arc.git kaggle-arc 15 | pip install kaggle-arc 16 | ``` 17 | 18 | ```mermaid 19 | --- 20 | title: Main project classes 21 | --- 22 | classDiagram 23 | Field --o IOData 24 | IOData --o Sample 25 | class Field["kaggle_arc.base.Field"]{ 26 | numpy.array data 27 | show(ax=None, label=None) 28 | } 29 | class IOData["kaggle_arc.iodata.IOData"]{ 30 | Field input_field 31 | Field output_field 32 | show(predictor=None, npredictions=1, ...) 33 | } 34 | class Sample["kaggle_arc.iodata.Sample"]{ 35 | String name 36 | List[IOData] train 37 | List[IOData] test 38 | show(predictor=None, npredictions=3, ...) 39 | } 40 | Predictor <|-- ComplexPredictor 41 | AvailableAll <|-- ComplexPredictor 42 | note for Predictor "has many descendant classes" 43 | class Predictor["kaggle_arc.predictors.basic.Predictor"]{ 44 | <> 45 | train(iodata_list) 46 | predict(field) 47 | validate(iodata_list, k=3) 48 | predict_on(predictor_class, ds, ...) 49 | } 50 | class ComplexPredictor["kaggle_arc.predictors.complex.ComplexPredictor"]{ 51 | List[Predictor] predictors 52 | train(iodata_list) 53 | predict(field) 54 | validate(iodata_list, k=3) 55 | freeze_by_score(iodata_list, k=3) 56 | } 57 | note for AvailableAll "mixin class\nhas many descendant classes" 58 | class AvailableAll["kaggle_arc.predictors.basic.AvailableAll"]{ 59 | <> 60 | is_available(iodata_list) 61 | } 62 | ``` 63 | --------------------------------------------------------------------------------