├── .gitignore
├── README.md
├── example_classification.py
├── example_regression.py
├── example_sklearn_classification.py
├── example_sklearn_regression.py
├── img
├── classification_forest_AxisAligned_hard.png
├── classification_forest_AxisAligned_soft.png
├── classification_forest_Conic_hard.png
├── classification_forest_Conic_soft.png
├── classification_forest_Linear_hard.png
├── classification_forest_Linear_soft.png
├── classification_forest_Parabola_hard.png
├── classification_forest_Parabola_soft.png
├── classification_forest_sklearn_AxisAligned_hard.png
├── classification_forest_sklearn_AxisAligned_soft.png
├── classification_forest_sklearn_Conic_hard.png
├── classification_forest_sklearn_Conic_soft.png
├── classification_forest_sklearn_Linear_hard.png
├── classification_forest_sklearn_Linear_soft.png
├── classification_forest_sklearn_Parabola_hard.png
├── classification_forest_sklearn_Parabola_soft.png
├── classification_tree_AxisAligned.png
├── classification_tree_Conic.png
├── classification_tree_Linear.png
├── classification_tree_Parabola.png
├── regression_forest_AxisAligned.png
├── regression_forest_Conic.png
├── regression_forest_Linear.png
├── regression_forest_Parabola.png
├── regression_forest_sklearn_AxisAligned.png
├── regression_forest_sklearn_Conic.png
├── regression_forest_sklearn_Linear.png
├── regression_forest_sklearn_Parabola.png
├── regression_tree_AxisAligned.png
├── regression_tree_Conic.png
├── regression_tree_Linear.png
└── regression_tree_Parabola.png
└── randomforest
├── __init__.py
├── classification_forest.py
├── classification_tree.py
├── forest.py
├── regression_forest.py
├── regression_tree.py
├── tree.py
└── weakLearner.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[cod]
2 |
3 | # C extensions
4 | *.so
5 |
6 | # Packages
7 | *.egg
8 | *.egg-info
9 | dist/
10 | build/
11 | eggs/
12 | parts/
13 | bin/
14 | var/
15 | sdist/
16 | develop-eggs/
17 | .installed.cfg
18 | lib/
19 | lib64/
20 | tmp/
21 | __pycache__
22 |
23 | # Installer logs
24 | pip-log.txt
25 |
26 | # Unit test / coverage reports
27 | .coverage
28 | .tox
29 | nosetests.xml
30 |
31 | # Translations
32 | *.mo
33 |
34 | # Mr Developer
35 | .mr.developer.cfg
36 | .project
37 | .pydevproject
38 |
39 | # latex
40 | auto/
41 | *.aux
42 | *.glo
43 | *.idx
44 | *.log
45 | *.toc
46 | *.ist
47 | *.acn
48 | *.acr
49 | *.alg
50 | *.bbl
51 | *.blg
52 | *.dvi
53 | *.glg
54 | *.gls
55 | *.ilg
56 | *.ind
57 | *.lof
58 | *.lot
59 | *.maf
60 | *.mtc
61 | *.mtc1
62 | *.out
63 | *.synctex.gz
64 | *.nav
65 | *.pyg2
66 | *.snm
67 | *.vrb
68 |
69 | # emacs
70 | *~
71 |
72 | # Mac OS X
73 | .DS_Store
74 | .idea/
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Random Forests in Python
2 | ========================
3 |
4 | This module is a basic implementation of Random Forests which allows users to
5 | define their own weak learners (the tests performed at each node). It was
6 | written as a prototype for a C++ version with templates. It is slow, but pure
7 | Python and easy to play with. People looking for a Python Random Forest usable in real
8 | problems should start with [scikit-learn](http://scikit-learn.org/).
9 |
10 | It is written for Python 3.
11 |
12 |
13 | Classification example
14 | ----------------------
15 |
16 | These examples train on three spiral (without noise) and predict the whole
17 | plane. They try 4 different weak learners: axis aligned, linear, conic and parabolas.
18 |
19 | ``python example_classification.py``
20 |
21 | Using one single tree:
22 |
23 | Axis aligned:
24 |
25 |
26 | Linear:
27 |
28 |
29 | Conic:
30 |
31 |
32 | Parabola:
33 |
34 |
35 |
36 | Using a forest of 10 trees, with soft or hard decision boundaries:
37 |
38 | Axis aligned:
39 |
40 |
41 | Linear:
42 |
43 |
44 | Conic:
45 |
46 |
47 | Parabola:
48 |
49 |
50 | Regression example
51 | ------------------
52 |
53 | These examples train on two circles and predict the center of the bottom right quadrant
54 | (predicting the center of the image would be too easy!).
55 | They try 4 different weak learners: axis aligned, linear, conic and parabolas.
56 |
57 | ``python example_regression.py``
58 |
59 | Using one single tree:
60 |
61 | Axis aligned:
62 |
63 |
64 | Linear:
65 |
66 |
67 | Conic:
68 |
69 |
70 | Parabola:
71 |
72 |
73 |
74 | Using a forest of 10 trees
75 |
76 | Axis aligned:
77 |
78 |
79 | Linear:
80 |
81 |
82 | Conic:
83 |
84 |
85 | Parabola:
86 |
87 |
88 | Reference
89 | ---------
90 |
91 | A. Criminisi, J. Shotton, and E. Konukoglu, "Decision Forests for Classification,
92 | Regression, Density Estimation, Manifold Learning and Semi-Supervised Learning",
93 | no. MSR-TR-2011-114, 28 October 2011.
94 | http://research.microsoft.com/en-us/projects/decisionforests/
95 |
--------------------------------------------------------------------------------
/example_classification.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import numpy as np
4 | import cv2 # OpenCV
5 |
6 | from randomforest import *
7 | from randomforest import weakLearner
8 |
9 |
10 | def img_test(forest, points, colors, filename, size=512, radius=3, proba=False):
11 | img = np.zeros((size, size, 3))
12 | v_min = points.min()
13 | v_max = points.max()
14 | step = float(v_max - v_min) / img.shape[0]
15 | grid = np.arange(v_min, v_max, step)
16 |
17 | for x in grid:
18 | for y in grid:
19 | if proba:
20 | r = forest.predict_proba([x, y])
21 | col = np.zeros(3, dtype=float)
22 | for c in forest.labels:
23 | col += r[int(c)] * np.array(colors[int(c)])
24 | col = tuple(col.astype('int'))
25 | else:
26 | r = forest.predict([x, y])
27 | col = colors[int(r)]
28 | img[int((y - v_min) / step),
29 | int((x - v_min) / step), :] = col
30 |
31 | points = ((points - v_min) / step).astype('int')
32 | for p, r in zip(points, responses):
33 | cv2.circle(img, tuple(p), radius + 1, (0, 0, 0), thickness=-1)
34 | cv2.circle(img, tuple(p), radius, colors[int(r)], thickness=-1)
35 |
36 | cv2.imwrite(filename, img)
37 |
38 |
39 | t = np.arange(0, 10, 0.1)
40 |
41 | theta = [0, 30, 60]
42 | colors = [(255, 0, 0),
43 | (0, 255, 0),
44 | (0, 0, 255)]
45 |
46 | points = np.zeros((len(t) * len(theta), 2))
47 | responses = np.zeros(len(t) * len(theta))
48 | for c in range(len(theta)):
49 | points[c * len(t):(c + 1) * len(t), 0] = t ** 2 * np.cos(t + theta[c]) # x
50 | points[c * len(t):(c + 1) * len(t), 1] = t ** 2 * np.sin(t + theta[c]) # y
51 | responses[c * len(t):(c + 1) * len(t)] = c
52 |
53 | for learner in weakLearner.__all__:
54 | params = {'max_depth': 10,
55 | 'min_sample_count': 5,
56 | 'test_count': 100,
57 | 'test_class': getattr(weakLearner, learner)()}
58 |
59 | tree = ClassificationTree(params)
60 | tree.fit(points, responses)
61 |
62 | # save tree to a text file
63 | tree.save('tree.txt')
64 | tree = ClassificationTree()
65 | tree.load('tree.txt', test=params['test_class'])
66 |
67 | for i in range(len(points)):
68 | print(responses[i], tree.predict(points[i]))
69 |
70 | img_test(tree, points, colors,
71 | 'img/classification_tree_' + str(learner) + '.png',
72 | proba=False)
73 |
74 | forest = ClassificationForest(10, params)
75 | forest.fit(points, responses)
76 |
77 | # save forest to a directory of text files
78 | forest.save('saved_model')
79 | forest = ClassificationForest()
80 | forest.load('saved_model', test=params['test_class'])
81 |
82 | for i in range(len(points)):
83 | print(responses[i], forest.predict_proba(points[i]))
84 |
85 | img_test(forest, points, colors,
86 | 'img/classification_forest_' + str(learner) + '_soft.png',
87 | proba=True)
88 | img_test(forest, points, colors,
89 | 'img/classification_forest_' + str(learner) + '_hard.png',
90 | proba=False)
91 |
--------------------------------------------------------------------------------
/example_regression.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import numpy as np
4 | import cv2
5 | import itertools
6 |
7 | from randomforest.regression_tree import RegressionTree
8 | from randomforest.regression_forest import RegressionForest
9 | from randomforest import weakLearner
10 |
11 |
12 | def img_test(tree, points, colors, filename, size=512, radius=3):
13 | img = np.zeros((size, size, 3), dtype='float')
14 | v_min = points.min()
15 | v_max = points.max()
16 | step = float(v_max - v_min) / img.shape[0]
17 | grid = np.arange(v_min, v_max, step)
18 |
19 | xy = np.array(list(itertools.product(grid, grid)))
20 |
21 | for x in grid:
22 | for y in grid:
23 | prediction = np.array([x, y]) + tree.predict([x, y])
24 | x0, y0 = np.round((prediction - v_min) / step).astype('int32')
25 | if 0 <= x0 < size and 0 <= y0 < size:
26 | img[y0, x0, :] += 1
27 |
28 | img *= 255 / img.max()
29 |
30 | points = ((points - v_min) / step).astype('int')
31 | for p, r in zip(points, responses):
32 | cv2.circle(img, tuple(p), radius + 1, (0, 0, 0), thickness=-1)
33 | cv2.circle(img, tuple(p), radius, (0, 255, 0), thickness=-1)
34 |
35 | cv2.imwrite(filename, img.astype('uint8'))
36 |
37 |
38 | t = np.linspace(0, 2 * np.pi, num=50)
39 |
40 | radius = [30, 60]
41 | colors = np.array([[255, 0, 0],
42 | [0, 255, 0],
43 | [0, 0, 255]], dtype='float32')
44 |
45 | points = np.zeros((len(t) * len(radius), 2))
46 | for r in range(len(radius)):
47 | points[r * len(t):(r + 1) * len(t), 0] = radius[r] * np.cos(t) # x
48 | points[r * len(t):(r + 1) * len(t), 1] = radius[r] * np.sin(t) # y
49 | center = points.mean(axis=0) + 45 * np.ones((2)) / np.sqrt(2)
50 | responses = center[np.newaxis, ...] - points
51 |
52 | for learner in weakLearner.__all__:
53 | print(learner)
54 | params = {'max_depth': 10,
55 | 'min_sample_count': 5,
56 | 'test_count': 100,
57 | 'test_class': getattr(weakLearner, learner)()}
58 | tree = RegressionTree(params)
59 | tree.fit(points, responses)
60 |
61 | # save tree to a text file
62 | tree.save('tree.txt')
63 | tree = RegressionTree()
64 | tree.load('tree.txt', test=params['test_class'])
65 |
66 | img_test(tree, points, colors,
67 | 'img/regression_tree_' + str(learner) + '.png')
68 |
69 | forest = RegressionForest(10, params)
70 | forest.fit(points, responses)
71 |
72 | # save forest to a directory of text files
73 | forest.save('saved_model')
74 | forest = RegressionForest()
75 | forest.load('saved_model', test=params['test_class'])
76 |
77 | img_test(forest, points, colors,
78 | 'img/regression_forest_' + str(learner) + '.png')
79 |
--------------------------------------------------------------------------------
/example_sklearn_classification.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import numpy as np
4 | import cv2 # OpenCV
5 | import itertools
6 |
7 | from randomforest import weakLearner
8 | from randomforest.weakLearner import FeatureExtractor
9 |
10 | from sklearn import ensemble
11 |
12 |
13 | def img_test(forest, feature_extractor, points, colors, filename, size=512,
14 | radius=3, proba=True):
15 | img = np.zeros((size, size, 3))
16 | v_min = points.min()
17 | v_max = points.max()
18 | step = float(v_max - v_min) / img.shape[0]
19 | grid = np.arange(v_min, v_max, step)
20 |
21 | xy = np.array(list(itertools.product(grid, grid)))
22 | features = feature_extractor.apply_all(xy)
23 |
24 | if proba:
25 | r = forest.predict_proba(features)
26 | col = np.dot(r, colors)
27 | else:
28 | r = forest.predict(features).astype('int32')
29 | col = colors[r]
30 | img[((xy[:, 1] - v_min) / step).astype('int32'),
31 | ((xy[:, 0] - v_min) / step).astype('int32')] = col
32 |
33 | points = ((points - v_min) / step).astype('int')
34 | for p, r in zip(points, responses):
35 | col = tuple(colors[int(r)])
36 | cv2.circle(img, tuple(p), radius + 1, (0, 0, 0), thickness=-1)
37 | cv2.circle(img, tuple(p), radius, col, thickness=-1)
38 |
39 | cv2.imwrite(filename, img)
40 |
41 |
42 | t = np.arange(0, 10, 0.1)
43 |
44 | theta = [0, 30, 60]
45 | colors = np.array([[255, 0, 0],
46 | [0, 255, 0],
47 | [0, 0, 255]], dtype='float')
48 |
49 | points = np.zeros((len(t) * len(theta), 2))
50 | responses = np.zeros(len(t) * len(theta))
51 | for c in range(len(theta)):
52 | points[c * len(t):(c + 1) * len(t), 0] = t ** 2 * np.cos(t + theta[c]) # x
53 | points[c * len(t):(c + 1) * len(t), 1] = t ** 2 * np.sin(t + theta[c]) # y
54 | responses[c * len(t):(c + 1) * len(t)] = c
55 |
56 | for learner in weakLearner.__all__:
57 | test_class = getattr(weakLearner, learner)()
58 | params = {'max_depth': None,
59 | 'min_samples_split': 2,
60 | 'n_jobs': 1,
61 | 'n_estimators': 100}
62 |
63 | print(str(learner))
64 |
65 | forest = ensemble.RandomForestClassifier(**params)
66 | feature_extractor = FeatureExtractor(test_class, n_features=1000)
67 | features = feature_extractor.fit_transform(points)
68 | forest.fit(features, responses)
69 |
70 | img_test(forest, feature_extractor, points, colors,
71 | 'img/classification_forest_sklearn_' + str(learner) + '_soft.png',
72 | proba=True)
73 | img_test(forest, feature_extractor, points, colors,
74 | 'img/classification_forest_sklearn_' + str(learner) + '_hard.png',
75 | proba=False)
76 |
--------------------------------------------------------------------------------
/example_sklearn_regression.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import numpy as np
4 | import cv2 # OpenCV
5 | import itertools
6 |
7 | from randomforest import weakLearner
8 | from randomforest.weakLearner import FeatureExtractor
9 |
10 | from sklearn import ensemble
11 |
12 |
13 | def img_test(tree, feature_extractor, points, colors, filename, size=512,
14 | radius=3):
15 | img = np.zeros((size, size, 3), dtype='float')
16 | v_min = points.min()
17 | v_max = points.max()
18 | step = float(v_max - v_min) / img.shape[0]
19 | grid = np.arange(v_min, v_max, step)
20 |
21 | xy = np.array(list(itertools.product(grid, grid)))
22 | features = feature_extractor.apply_all(xy)
23 |
24 | predictions = xy + tree.predict(features)
25 | predictions = np.round((predictions - v_min) / step).astype('int32')
26 |
27 | flat_indices = np.ravel_multi_index(np.transpose(predictions),
28 | img.shape[:2], mode='clip')
29 | bins = np.bincount(flat_indices, minlength=np.prod(img.shape[:2]))
30 | img += bins.reshape(img.shape[:2])[..., np.newaxis]
31 |
32 | # artefacts of clipping
33 | img[0] = 0
34 | img[-1] = 0
35 | img[:, 0] = 0
36 | img[:, -1] = 0
37 |
38 | img *= 255 / img.max()
39 |
40 | points = ((points - v_min) / step).astype('int')
41 | for p, r in zip(points, responses):
42 | cv2.circle(img, tuple(p), radius + 1, (0, 0, 0), thickness=-1)
43 | cv2.circle(img, tuple(p), radius, (0, 255, 0), thickness=-1)
44 |
45 | cv2.imwrite(filename, img.astype('uint8'))
46 |
47 |
48 | t = np.linspace(0, 2 * np.pi, num=50)
49 |
50 | radius = [30, 60]
51 | colors = np.array([[255, 0, 0],
52 | [0, 255, 0],
53 | [0, 0, 255]], dtype='float32')
54 |
55 | points = np.zeros((len(t) * len(radius), 2))
56 | for r in range(len(radius)):
57 | points[r * len(t):(r + 1) * len(t), 0] = radius[r] * np.cos(t) # x
58 | points[r * len(t):(r + 1) * len(t), 1] = radius[r] * np.sin(t) # y
59 | center = points.mean(axis=0) + 45 * np.ones((2)) / np.sqrt(2)
60 | responses = center[np.newaxis, ...] - points
61 |
62 | for learner in weakLearner.__all__:
63 | test_class = getattr(weakLearner, learner)()
64 | params = {'max_depth': None,
65 | 'min_samples_split': 2,
66 | 'n_jobs': 1,
67 | 'n_estimators': 100}
68 |
69 | print(str(learner))
70 |
71 | forest = ensemble.RandomForestRegressor(**params)
72 | feature_extractor = FeatureExtractor(test_class, n_features=1000)
73 | features = feature_extractor.fit_transform(points)
74 | forest.fit(features, responses)
75 |
76 | img_test(forest, feature_extractor, points, colors,
77 | 'img/regression_forest_sklearn_' + str(learner) + '.png')
78 |
--------------------------------------------------------------------------------
/img/classification_forest_AxisAligned_hard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_AxisAligned_hard.png
--------------------------------------------------------------------------------
/img/classification_forest_AxisAligned_soft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_AxisAligned_soft.png
--------------------------------------------------------------------------------
/img/classification_forest_Conic_hard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_Conic_hard.png
--------------------------------------------------------------------------------
/img/classification_forest_Conic_soft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_Conic_soft.png
--------------------------------------------------------------------------------
/img/classification_forest_Linear_hard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_Linear_hard.png
--------------------------------------------------------------------------------
/img/classification_forest_Linear_soft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_Linear_soft.png
--------------------------------------------------------------------------------
/img/classification_forest_Parabola_hard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_Parabola_hard.png
--------------------------------------------------------------------------------
/img/classification_forest_Parabola_soft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_Parabola_soft.png
--------------------------------------------------------------------------------
/img/classification_forest_sklearn_AxisAligned_hard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_AxisAligned_hard.png
--------------------------------------------------------------------------------
/img/classification_forest_sklearn_AxisAligned_soft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_AxisAligned_soft.png
--------------------------------------------------------------------------------
/img/classification_forest_sklearn_Conic_hard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_Conic_hard.png
--------------------------------------------------------------------------------
/img/classification_forest_sklearn_Conic_soft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_Conic_soft.png
--------------------------------------------------------------------------------
/img/classification_forest_sklearn_Linear_hard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_Linear_hard.png
--------------------------------------------------------------------------------
/img/classification_forest_sklearn_Linear_soft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_Linear_soft.png
--------------------------------------------------------------------------------
/img/classification_forest_sklearn_Parabola_hard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_Parabola_hard.png
--------------------------------------------------------------------------------
/img/classification_forest_sklearn_Parabola_soft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_Parabola_soft.png
--------------------------------------------------------------------------------
/img/classification_tree_AxisAligned.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_tree_AxisAligned.png
--------------------------------------------------------------------------------
/img/classification_tree_Conic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_tree_Conic.png
--------------------------------------------------------------------------------
/img/classification_tree_Linear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_tree_Linear.png
--------------------------------------------------------------------------------
/img/classification_tree_Parabola.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_tree_Parabola.png
--------------------------------------------------------------------------------
/img/regression_forest_AxisAligned.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_AxisAligned.png
--------------------------------------------------------------------------------
/img/regression_forest_Conic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_Conic.png
--------------------------------------------------------------------------------
/img/regression_forest_Linear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_Linear.png
--------------------------------------------------------------------------------
/img/regression_forest_Parabola.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_Parabola.png
--------------------------------------------------------------------------------
/img/regression_forest_sklearn_AxisAligned.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_sklearn_AxisAligned.png
--------------------------------------------------------------------------------
/img/regression_forest_sklearn_Conic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_sklearn_Conic.png
--------------------------------------------------------------------------------
/img/regression_forest_sklearn_Linear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_sklearn_Linear.png
--------------------------------------------------------------------------------
/img/regression_forest_sklearn_Parabola.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_sklearn_Parabola.png
--------------------------------------------------------------------------------
/img/regression_tree_AxisAligned.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_tree_AxisAligned.png
--------------------------------------------------------------------------------
/img/regression_tree_Conic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_tree_Conic.png
--------------------------------------------------------------------------------
/img/regression_tree_Linear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_tree_Linear.png
--------------------------------------------------------------------------------
/img/regression_tree_Parabola.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_tree_Parabola.png
--------------------------------------------------------------------------------
/randomforest/__init__.py:
--------------------------------------------------------------------------------
1 | from .classification_tree import ClassificationTree
2 | from .classification_forest import ClassificationForest
3 | from .regression_tree import RegressionTree
4 | from .regression_forest import RegressionForest
5 | from .weakLearner import *
6 |
7 | __all__ = ["ClassificationTree",
8 | "ClassificationForest",
9 | "RegressionTree",
10 | "RegressionForest"]
11 |
12 | from . import weakLearner
13 |
14 | __all__.extend(weakLearner.__all__)
15 |
--------------------------------------------------------------------------------
/randomforest/classification_forest.py:
--------------------------------------------------------------------------------
1 | from .forest import Forest
2 | from .classification_tree import ClassificationTree
3 |
4 |
5 | class ClassificationForest(Forest):
6 | tree_class = ClassificationTree
7 | labels = []
8 |
9 | def fit(self, points, responses):
10 | self.labels = []
11 | for r in responses:
12 | if r not in self.labels:
13 | self.labels.append(r)
14 | for i in range(self.ntrees):
15 | self.trees.append(ClassificationTree(self.tree_params))
16 | self.trees[i].fit(points, responses, self.labels)
17 |
18 | def predict(self, point):
19 | r = {}
20 | for c in self.labels:
21 | r[c] = 0.0
22 | for i in range(self.ntrees):
23 | response = int(self.trees[i].predict(point))
24 | r[response] += 1
25 |
26 | response = None
27 | max_count = -1
28 | for c in self.labels:
29 | if r[c] > max_count:
30 | response = c
31 | max_count = r[c]
32 | return response
33 |
34 | def predict_proba(self, point):
35 | r = {}
36 | for c in self.labels:
37 | r[c] = 0.0
38 | for i in range(self.ntrees):
39 | response = int(self.trees[i].predict(point))
40 | r[response] += 1
41 |
42 | for c in self.labels:
43 | r[c] /= self.ntrees
44 | return r
45 |
--------------------------------------------------------------------------------
/randomforest/classification_tree.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from .tree import Tree
4 |
5 |
6 | class ClassificationTree(Tree):
7 | labels = []
8 |
9 | def entropy(self, d):
10 | E = 0.0
11 | for r in d.keys():
12 | if d['count'] > 0 and d[r] > 0:
13 | proba = float(d[r]) / d['count']
14 | E -= proba * np.log(proba)
15 | return E
16 |
17 | def split_points(self, points, responses, test):
18 | left = {'count': 0.0}
19 | right = {'count': 0.0}
20 | for c in self.labels:
21 | right[c] = 0.0
22 | left[c] = 0.0
23 | for p, r in zip(points, responses):
24 | if self.params['test_class'].run(p, test):
25 | right[r] += 1
26 | right['count'] += 1
27 | else:
28 | left[r] += 1
29 | left['count'] += 1
30 | return left, right
31 |
32 | def make_leaf(self, all_points):
33 | response = None
34 | max_count = -1
35 | for c in self.labels:
36 | if all_points[c] > max_count:
37 | response = c
38 | max_count = all_points[c]
39 | self.leaf = response
40 |
41 | def fit(self, points, responses, labels=None, depth=0):
42 | if labels is None:
43 | self.labels = []
44 | for r in responses:
45 | if r not in self.labels:
46 | self.labels.append(r)
47 | else:
48 | self.labels = labels
49 |
50 | print("Number of points:", len(points))
51 |
52 | all_points = {'count': len(points)}
53 | for c in self.labels:
54 | all_points[c] = 0.0
55 | for p, r in zip(points, responses):
56 | all_points[r] += 1
57 |
58 | H = self.entropy(all_points)
59 | print("Current entropy:", H)
60 |
61 | if (depth == self.params['max_depth']
62 | or len(points) <= self.params['min_sample_count']
63 | or H == 0):
64 | self.make_leaf(all_points)
65 | return
66 |
67 | all_tests = self.params['test_class'].generate_all(points, self.params[
68 | 'test_count'])
69 |
70 | best_gain = 0
71 | best_i = None
72 | for i, test in enumerate(all_tests):
73 | left, right = self.split_points(points, responses, test)
74 | I = H - (left['count'] / all_points['count'] * self.entropy(left)
75 | + right['count'] / all_points['count'] * self.entropy(
76 | right))
77 | if I > best_gain:
78 | best_gain = I
79 | best_i = i
80 |
81 | print("Information gain:", best_gain)
82 |
83 | if best_i is None:
84 | print("no best split found: creating a leaf")
85 | self.make_leaf(all_points)
86 | return
87 |
88 | self.test = all_tests[best_i]
89 | print("TEST:", self.test)
90 | left_points = []
91 | left_responses = []
92 | right_points = []
93 | right_responses = []
94 | for p, r in zip(points, responses):
95 | if self.params['test_class'].run(p, self.test):
96 | right_points.append(p)
97 | right_responses.append(r)
98 | else:
99 | left_points.append(p)
100 | left_responses.append(r)
101 | self.left = ClassificationTree(self.params)
102 | self.right = ClassificationTree(self.params)
103 |
104 | self.left.fit(np.array(left_points), left_responses, self.labels,
105 | depth + 1)
106 | self.right.fit(np.array(right_points), right_responses, self.labels,
107 | depth + 1)
108 |
--------------------------------------------------------------------------------
/randomforest/forest.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | from glob import glob
4 | import shutil
5 |
6 | from .weakLearner import WeakLearner, AxisAligned
7 |
8 |
9 | class Forest(object):
10 | def __init__(self,
11 | ntrees=20,
12 | tree_params={'max_depth': 10,
13 | 'min_sample_count': 5,
14 | 'test_count': 100,
15 | 'test_class': AxisAligned()}):
16 | self.ntrees = ntrees
17 | self.tree_params = tree_params
18 | self.trees = []
19 |
20 | def __len__(self):
21 | return self.ntrees
22 |
23 | def save(self, folder):
24 | if os.path.exists(folder):
25 | shutil.rmtree(folder)
26 | os.makedirs(folder)
27 | template = '%0' + str(int(np.log10(self.ntrees))) + 'd.data'
28 | for i in range(self.ntrees):
29 | filename = template % i
30 | self.trees[i].save(folder + '/' + filename)
31 |
32 | def load(self, folder, test=WeakLearner()):
33 | self.trees = []
34 | for f in glob(folder + '/*'):
35 | self.trees.append(self.tree_class())
36 | self.trees[-1].load(f, test)
37 | self.ntrees = len(self.trees)
38 | if 'labels' in dir(self):
39 | self.labels = self.trees[0].labels
40 |
--------------------------------------------------------------------------------
/randomforest/regression_forest.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from .regression_tree import RegressionTree
4 | from .forest import Forest
5 |
6 |
7 | class RegressionForest(Forest):
8 | tree_class = RegressionTree
9 |
10 | def fit(self, points, responses):
11 | for i in range(self.ntrees):
12 | self.trees.append(RegressionTree(self.tree_params))
13 | self.trees[i].fit(points, responses)
14 |
15 | def predict(self, point):
16 | response = []
17 | for i in range(self.ntrees):
18 | response.append(self.trees[i].predict(point))
19 | return np.mean(response, axis=0)
20 |
--------------------------------------------------------------------------------
/randomforest/regression_tree.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from .tree import Tree
4 |
5 |
6 | class RegressionTree(Tree):
7 | def MSE(self, responses):
8 | mean = np.mean(responses, axis=0)
9 | return np.mean((responses - mean) ** 2)
10 |
11 | def split_points(self, points, responses, test):
12 | left = []
13 | right = []
14 | for p, r in zip(points, responses):
15 | if self.params['test_class'].run(p, test):
16 | right.append(r)
17 | else:
18 | left.append(r)
19 | return left, right
20 |
21 | def make_leaf(self, responses):
22 | self.leaf = np.mean(responses, axis=0)
23 |
24 | def fit(self, points, responses, depth=0):
25 |
26 | print("Number of points:", len(points))
27 |
28 | error = self.MSE(responses)
29 | print("Current MSE:", error)
30 |
31 | if (depth == self.params['max_depth']
32 | or len(points) <= self.params['min_sample_count']
33 | or error == 0):
34 | self.make_leaf(responses)
35 | return
36 |
37 | all_tests = self.params['test_class'].generate_all(points, self.params[
38 | 'test_count'])
39 |
40 | best_error = np.inf
41 | best_i = None
42 | for i, test in enumerate(all_tests):
43 | left, right = self.split_points(points, responses, test)
44 | error = (len(left) / len(points) * self.MSE(left)
45 | + len(right) / len(points) * self.MSE(right))
46 | if error < best_error:
47 | best_error = error
48 | best_i = i
49 |
50 | print("Best error:", best_error)
51 |
52 | if best_i is None:
53 | print("no best split found: creating a leaf")
54 | self.make_leaf(responses)
55 | return
56 |
57 | self.test = all_tests[best_i]
58 | print("TEST:", self.test)
59 | left_points = []
60 | left_responses = []
61 | right_points = []
62 | right_responses = []
63 | for p, r in zip(points, responses):
64 | if self.params['test_class'].run(p, self.test):
65 | right_points.append(p)
66 | right_responses.append(r)
67 | else:
68 | left_points.append(p)
69 | left_responses.append(r)
70 | self.left = RegressionTree(self.params)
71 | self.right = RegressionTree(self.params)
72 |
73 | self.left.fit(np.array(left_points), left_responses, depth + 1)
74 | self.right.fit(np.array(right_points), right_responses, depth + 1)
75 |
--------------------------------------------------------------------------------
/randomforest/tree.py:
--------------------------------------------------------------------------------
1 | # Format for saving tree:
2 | # for an internal node:
3 | # left right test
4 | # for a leaf
5 | # -1 -1 class
6 |
7 | import numpy as np
8 | from .weakLearner import WeakLearner, AxisAligned
9 |
10 |
11 | class Tree(object):
12 | def __init__(self,
13 | params={'max_depth': 10,
14 | 'min_sample_count': 5,
15 | 'test_count': 100,
16 | 'test_class': AxisAligned()}):
17 | self.params = params
18 | self.leaf = None
19 | self.left = None
20 | self.right = None
21 | self.test = None
22 |
23 | def __len__(self):
24 | if self.leaf is not None:
25 | return 1
26 | else:
27 | return 1 + len(self.left) + len(self.right)
28 |
29 | def write_node(self, line_index, file_buffer, ref_next_available_id):
30 | if self.leaf is not None:
31 | file_buffer[line_index] = '-1\t-1\t'
32 | if isinstance(self.leaf, np.ndarray):
33 | file_buffer[line_index] += '\t'.join(map(str, self.leaf))
34 | else:
35 | file_buffer[line_index] += str(self.leaf)
36 | return
37 |
38 | else:
39 | left_index = ref_next_available_id[0]
40 | right_index = ref_next_available_id[0] + 1
41 | ref_next_available_id[0] += 2
42 |
43 | file_buffer[line_index] += (str(left_index)
44 | + '\t'
45 | + str(right_index)
46 | + '\t'
47 | + '\t'.join(map(str, self.test))
48 | )
49 |
50 | self.left.write_node(left_index, file_buffer, ref_next_available_id)
51 | self.right.write_node(right_index, file_buffer,
52 | ref_next_available_id)
53 |
54 | def save(self, filename):
55 | file_buffer = ['' for i in range(2 + len(self))]
56 |
57 | # save params
58 | keys = list(self.params.keys())
59 | for i in range(len(keys)):
60 | file_buffer[0] += keys[i] + '\t' + str(self.params[keys[i]])
61 | if i < len(keys) - 1:
62 | file_buffer[0] += '\t'
63 |
64 | # save labels
65 | if 'labels' in dir(self):
66 | for i in range(len(self.labels)):
67 | file_buffer[1] += str(int(self.labels[i]))
68 | if i < len(self.labels) - 1:
69 | file_buffer[1] += '\t'
70 |
71 | # save nodes
72 | line_index = 2
73 | ref_next_available_id = [line_index + 1]
74 | self.write_node(line_index, file_buffer, ref_next_available_id)
75 | f = open(filename, 'wt')
76 | for line in file_buffer:
77 | f.write(line + '\n')
78 | f.close()
79 |
80 | def load_node(self, id, lines):
81 | line = lines[id].split('\t')
82 | line = list(map(float, line))
83 | if line[0] == -1:
84 | if 'labels' in dir(self):
85 | self.leaf = line[2]
86 | else:
87 | self.leaf = np.array(line[2:])
88 | return
89 | else:
90 | self.left = Tree(self.params)
91 | self.right = Tree(self.params)
92 | self.test = line[2:]
93 | self.left.load_node(int(line[0]), lines)
94 | self.right.load_node(int(line[1]), lines)
95 | return
96 |
97 | def load(self, filename, test=WeakLearner()):
98 | f = open(filename, 'rt')
99 |
100 | lines = f.readlines()
101 | lines = list(map(lambda x: x.rstrip(), lines))
102 |
103 | # read params
104 | params = lines[0].split('\t')
105 | self.params = dict(zip(params[::2], params[1::2]))
106 | self.params['max_depth'] = int(self.params['max_depth'])
107 | self.params['min_sample_count'] = int(self.params['min_sample_count'])
108 | self.params['test_count'] = int(self.params['test_count'])
109 |
110 | assert self.params['test_class'] == str(test), "expected %s, got %s" % (
111 | self.params['test_class'],
112 | str(test))
113 |
114 | self.params['test_class'] = test
115 |
116 | # read labels
117 | if 'labels' in dir(self):
118 | self.labels = list(map(int, lines[1].split('\t')))
119 |
120 | self.load_node(2, lines)
121 |
122 | def predict(self, point):
123 | if self.leaf is not None:
124 | return self.leaf
125 | else:
126 | if self.params['test_class'].run(point, self.test):
127 | return self.right.predict(point)
128 | else:
129 | return self.left.predict(point)
130 |
--------------------------------------------------------------------------------
/randomforest/weakLearner.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numpy.random import uniform, random_integers
3 |
4 | __all__ = ["AxisAligned",
5 | "Linear",
6 | "Conic",
7 | "Parabola"]
8 |
9 |
10 | class WeakLearner(object):
11 | def generate_all(self, points, count):
12 | return None
13 |
14 | def __str__(self):
15 | return None
16 |
17 | def run(self, point, test):
18 | return None
19 |
20 |
21 | class AxisAligned(WeakLearner):
22 | """Axis aligned"""
23 |
24 | def __str__(self):
25 | return "AxisAligned"
26 |
27 | def generate_all(self, points, count):
28 | x_min = points.min(0)[0]
29 | y_min = points.min(0)[1]
30 | x_max = points.max(0)[0]
31 | y_max = points.max(0)[1]
32 | tests = []
33 | tests.extend(zip(np.zeros(count, dtype=int),
34 | uniform(x_min, x_max, count)))
35 | tests.extend(zip(np.ones(count, dtype=int),
36 | uniform(y_min, y_max, count)))
37 | return np.array(tests)
38 |
39 | def run(self, point, test):
40 | return point[int(test[0])] > test[1]
41 |
42 | def run_all(self, points, test):
43 | return np.array(list(map(lambda test: points[:, test[0]] > test[1],
44 | test))).T
45 |
46 |
47 | class Linear(WeakLearner):
48 | """Linear"""
49 |
50 | def __str__(self):
51 | return "Linear"
52 |
53 | def generate_all(self, points, count):
54 | x_min = points.min(0)[0]
55 | y_min = points.min(0)[1]
56 | x_max = points.max(0)[0]
57 | y_max = points.max(0)[1]
58 | tests = []
59 | tests.extend(zip(uniform(x_min, x_max, count),
60 | uniform(y_min, y_max, count),
61 | uniform(0, 360, count)))
62 | return tests
63 |
64 | def run(self, point, test):
65 | theta = test[2] * np.pi / 180
66 | return (np.cos(theta) * (point[0] - test[0]) +
67 | np.sin(theta) * (point[1] - test[1])) > 0
68 |
69 | def run_all(self, points, tests):
70 | def _run(test):
71 | theta = test[2] * np.pi / 180
72 | return (np.cos(theta) * (points[:, 0] - test[0]) +
73 | np.sin(theta) * (points[:, 1] - test[1])) > 0
74 |
75 | return np.array(list(map(_run, tests))).T
76 |
77 |
78 | class Conic(WeakLearner):
79 | """Non-linear: conic"""
80 |
81 | def __str__(self):
82 | return "Conic"
83 |
84 | def generate_all(self, points, count):
85 | x_min = points.min(0)[0]
86 | y_min = points.min(0)[1]
87 | x_max = points.max(0)[0]
88 | y_max = points.max(0)[1]
89 | scale = max(points.max(), abs(points.min()))
90 | tests = []
91 | tests.extend(zip(uniform(x_min, x_max, count),
92 | uniform(y_min, y_max, count),
93 | uniform(-scale, scale,
94 | count) * random_integers(0, 1, count),
95 | uniform(-scale, scale,
96 | count) * random_integers(0, 1, count),
97 | uniform(-scale, scale,
98 | count) * random_integers(0, 1, count),
99 | uniform(-scale, scale,
100 | count) * random_integers(0, 1, count),
101 | uniform(-scale, scale,
102 | count) * random_integers(0, 1, count),
103 | uniform(-scale, scale,
104 | count) * random_integers(0, 1, count)))
105 |
106 | return tests
107 |
108 | def run(self, point, test):
109 | x = (point[0] - test[0])
110 | y = (point[1] - test[1])
111 | A, B, C, D, E, F = test[2:]
112 | return (A * x * x + B * y * y + C * x * x + D * x + E * y + F) > 0
113 |
114 | def run_all(self, points, tests):
115 | def _run(test):
116 | x = (points[:, 0] - test[0])
117 | y = (points[:, 1] - test[1])
118 | A, B, C, D, E, F = test[2:]
119 | return (A * x * x + B * y * y + C * x * x + D * x + E * y + F) > 0
120 |
121 | return np.array(list(map(_run, tests))).T
122 |
123 |
124 | class Parabola(WeakLearner):
125 | """Non-linear: parabola"""
126 |
127 | def __str__(self):
128 | return "Parabola"
129 |
130 | def generate_all(self, points, count):
131 | x_min = points.min(0)[0]
132 | y_min = points.min(0)[1]
133 | x_max = points.max(0)[0]
134 | y_max = points.max(0)[1]
135 | scale = abs(points.max() - points.min())
136 | tests = []
137 | tests.extend(zip(uniform(2 * x_min, 2 * x_max, count),
138 | uniform(2 * y_min, 2 * y_max, count),
139 | uniform(-scale, scale, count),
140 | random_integers(0, 1, count)))
141 |
142 | return tests
143 |
144 | def run(self, point, test):
145 | x = (point[0] - test[0])
146 | y = (point[1] - test[1])
147 | p, axis = test[2:]
148 | if axis == 0:
149 | return x * x < p * y
150 | else:
151 | return y * y < p * x
152 |
153 | def run_all(self, points, tests):
154 | def _run(test):
155 | x = (points[:, 0] - test[0])
156 | y = (points[:, 1] - test[1])
157 | p, axis = test[2:]
158 | if axis == 0:
159 | return x * x < p * y
160 | else:
161 | return y * y < p * x
162 |
163 | return np.array(list(map(_run, tests))).T
164 |
165 |
166 | class FeatureExtractor(object):
167 | def __init__(self, learner, n_features):
168 | self.learner = learner
169 | self.n_features = n_features
170 | self.tests = []
171 |
172 | def fit_transform(self, points):
173 | self.tests = self.learner.generate_all(points, self.n_features)
174 | return self.apply_all(points)
175 |
176 | def apply(self, point):
177 | return np.array(list(map(lambda t: self.learner.run(point, t),
178 | self.tests)))
179 |
180 | def apply_all(self, points):
181 | return self.learner.run_all(points, self.tests)
182 |
--------------------------------------------------------------------------------