├── .gitignore ├── README.md ├── example_classification.py ├── example_regression.py ├── example_sklearn_classification.py ├── example_sklearn_regression.py ├── img ├── classification_forest_AxisAligned_hard.png ├── classification_forest_AxisAligned_soft.png ├── classification_forest_Conic_hard.png ├── classification_forest_Conic_soft.png ├── classification_forest_Linear_hard.png ├── classification_forest_Linear_soft.png ├── classification_forest_Parabola_hard.png ├── classification_forest_Parabola_soft.png ├── classification_forest_sklearn_AxisAligned_hard.png ├── classification_forest_sklearn_AxisAligned_soft.png ├── classification_forest_sklearn_Conic_hard.png ├── classification_forest_sklearn_Conic_soft.png ├── classification_forest_sklearn_Linear_hard.png ├── classification_forest_sklearn_Linear_soft.png ├── classification_forest_sklearn_Parabola_hard.png ├── classification_forest_sklearn_Parabola_soft.png ├── classification_tree_AxisAligned.png ├── classification_tree_Conic.png ├── classification_tree_Linear.png ├── classification_tree_Parabola.png ├── regression_forest_AxisAligned.png ├── regression_forest_Conic.png ├── regression_forest_Linear.png ├── regression_forest_Parabola.png ├── regression_forest_sklearn_AxisAligned.png ├── regression_forest_sklearn_Conic.png ├── regression_forest_sklearn_Linear.png ├── regression_forest_sklearn_Parabola.png ├── regression_tree_AxisAligned.png ├── regression_tree_Conic.png ├── regression_tree_Linear.png └── regression_tree_Parabola.png └── randomforest ├── __init__.py ├── classification_forest.py ├── classification_tree.py ├── forest.py ├── regression_forest.py ├── regression_tree.py ├── tree.py └── weakLearner.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist/ 10 | build/ 11 | eggs/ 12 | parts/ 13 | bin/ 14 | var/ 15 | sdist/ 16 | develop-eggs/ 17 | .installed.cfg 18 | lib/ 19 | lib64/ 20 | tmp/ 21 | __pycache__ 22 | 23 | # Installer logs 24 | pip-log.txt 25 | 26 | # Unit test / coverage reports 27 | .coverage 28 | .tox 29 | nosetests.xml 30 | 31 | # Translations 32 | *.mo 33 | 34 | # Mr Developer 35 | .mr.developer.cfg 36 | .project 37 | .pydevproject 38 | 39 | # latex 40 | auto/ 41 | *.aux 42 | *.glo 43 | *.idx 44 | *.log 45 | *.toc 46 | *.ist 47 | *.acn 48 | *.acr 49 | *.alg 50 | *.bbl 51 | *.blg 52 | *.dvi 53 | *.glg 54 | *.gls 55 | *.ilg 56 | *.ind 57 | *.lof 58 | *.lot 59 | *.maf 60 | *.mtc 61 | *.mtc1 62 | *.out 63 | *.synctex.gz 64 | *.nav 65 | *.pyg2 66 | *.snm 67 | *.vrb 68 | 69 | # emacs 70 | *~ 71 | 72 | # Mac OS X 73 | .DS_Store 74 | .idea/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Random Forests in Python 2 | ======================== 3 | 4 | This module is a basic implementation of Random Forests which allows users to 5 | define their own weak learners (the tests performed at each node). It was 6 | written as a prototype for a C++ version with templates. It is slow, but pure 7 | Python and easy to play with. People looking for a Python Random Forest usable in real 8 | problems should start with [scikit-learn](http://scikit-learn.org/). 9 | 10 | It is written for Python 3. 11 | 12 | 13 | Classification example 14 | ---------------------- 15 | 16 | These examples train on three spiral (without noise) and predict the whole 17 | plane. They try 4 different weak learners: axis aligned, linear, conic and parabolas. 18 | 19 | ``python example_classification.py`` 20 | 21 | Using one single tree: 22 | 23 | Axis aligned: 24 | 25 | 26 | Linear: 27 | 28 | 29 | Conic: 30 | 31 | 32 | Parabola: 33 | 34 | 35 | 36 | Using a forest of 10 trees, with soft or hard decision boundaries: 37 | 38 | Axis aligned: 39 |   40 | 41 | Linear: 42 |   43 | 44 | Conic: 45 |   46 | 47 | Parabola: 48 |   49 | 50 | Regression example 51 | ------------------ 52 | 53 | These examples train on two circles and predict the center of the bottom right quadrant 54 | (predicting the center of the image would be too easy!). 55 | They try 4 different weak learners: axis aligned, linear, conic and parabolas. 56 | 57 | ``python example_regression.py`` 58 | 59 | Using one single tree: 60 | 61 | Axis aligned: 62 | 63 | 64 | Linear: 65 | 66 | 67 | Conic: 68 | 69 | 70 | Parabola: 71 | 72 | 73 | 74 | Using a forest of 10 trees 75 | 76 | Axis aligned: 77 | 78 | 79 | Linear: 80 | 81 | 82 | Conic: 83 | 84 | 85 | Parabola: 86 | 87 | 88 | Reference 89 | --------- 90 | 91 | A. Criminisi, J. Shotton, and E. Konukoglu, "Decision Forests for Classification, 92 | Regression, Density Estimation, Manifold Learning and Semi-Supervised Learning", 93 | no. MSR-TR-2011-114, 28 October 2011. 94 | http://research.microsoft.com/en-us/projects/decisionforests/ 95 | -------------------------------------------------------------------------------- /example_classification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import numpy as np 4 | import cv2 # OpenCV 5 | 6 | from randomforest import * 7 | from randomforest import weakLearner 8 | 9 | 10 | def img_test(forest, points, colors, filename, size=512, radius=3, proba=False): 11 | img = np.zeros((size, size, 3)) 12 | v_min = points.min() 13 | v_max = points.max() 14 | step = float(v_max - v_min) / img.shape[0] 15 | grid = np.arange(v_min, v_max, step) 16 | 17 | for x in grid: 18 | for y in grid: 19 | if proba: 20 | r = forest.predict_proba([x, y]) 21 | col = np.zeros(3, dtype=float) 22 | for c in forest.labels: 23 | col += r[int(c)] * np.array(colors[int(c)]) 24 | col = tuple(col.astype('int')) 25 | else: 26 | r = forest.predict([x, y]) 27 | col = colors[int(r)] 28 | img[int((y - v_min) / step), 29 | int((x - v_min) / step), :] = col 30 | 31 | points = ((points - v_min) / step).astype('int') 32 | for p, r in zip(points, responses): 33 | cv2.circle(img, tuple(p), radius + 1, (0, 0, 0), thickness=-1) 34 | cv2.circle(img, tuple(p), radius, colors[int(r)], thickness=-1) 35 | 36 | cv2.imwrite(filename, img) 37 | 38 | 39 | t = np.arange(0, 10, 0.1) 40 | 41 | theta = [0, 30, 60] 42 | colors = [(255, 0, 0), 43 | (0, 255, 0), 44 | (0, 0, 255)] 45 | 46 | points = np.zeros((len(t) * len(theta), 2)) 47 | responses = np.zeros(len(t) * len(theta)) 48 | for c in range(len(theta)): 49 | points[c * len(t):(c + 1) * len(t), 0] = t ** 2 * np.cos(t + theta[c]) # x 50 | points[c * len(t):(c + 1) * len(t), 1] = t ** 2 * np.sin(t + theta[c]) # y 51 | responses[c * len(t):(c + 1) * len(t)] = c 52 | 53 | for learner in weakLearner.__all__: 54 | params = {'max_depth': 10, 55 | 'min_sample_count': 5, 56 | 'test_count': 100, 57 | 'test_class': getattr(weakLearner, learner)()} 58 | 59 | tree = ClassificationTree(params) 60 | tree.fit(points, responses) 61 | 62 | # save tree to a text file 63 | tree.save('tree.txt') 64 | tree = ClassificationTree() 65 | tree.load('tree.txt', test=params['test_class']) 66 | 67 | for i in range(len(points)): 68 | print(responses[i], tree.predict(points[i])) 69 | 70 | img_test(tree, points, colors, 71 | 'img/classification_tree_' + str(learner) + '.png', 72 | proba=False) 73 | 74 | forest = ClassificationForest(10, params) 75 | forest.fit(points, responses) 76 | 77 | # save forest to a directory of text files 78 | forest.save('saved_model') 79 | forest = ClassificationForest() 80 | forest.load('saved_model', test=params['test_class']) 81 | 82 | for i in range(len(points)): 83 | print(responses[i], forest.predict_proba(points[i])) 84 | 85 | img_test(forest, points, colors, 86 | 'img/classification_forest_' + str(learner) + '_soft.png', 87 | proba=True) 88 | img_test(forest, points, colors, 89 | 'img/classification_forest_' + str(learner) + '_hard.png', 90 | proba=False) 91 | -------------------------------------------------------------------------------- /example_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import numpy as np 4 | import cv2 5 | import itertools 6 | 7 | from randomforest.regression_tree import RegressionTree 8 | from randomforest.regression_forest import RegressionForest 9 | from randomforest import weakLearner 10 | 11 | 12 | def img_test(tree, points, colors, filename, size=512, radius=3): 13 | img = np.zeros((size, size, 3), dtype='float') 14 | v_min = points.min() 15 | v_max = points.max() 16 | step = float(v_max - v_min) / img.shape[0] 17 | grid = np.arange(v_min, v_max, step) 18 | 19 | xy = np.array(list(itertools.product(grid, grid))) 20 | 21 | for x in grid: 22 | for y in grid: 23 | prediction = np.array([x, y]) + tree.predict([x, y]) 24 | x0, y0 = np.round((prediction - v_min) / step).astype('int32') 25 | if 0 <= x0 < size and 0 <= y0 < size: 26 | img[y0, x0, :] += 1 27 | 28 | img *= 255 / img.max() 29 | 30 | points = ((points - v_min) / step).astype('int') 31 | for p, r in zip(points, responses): 32 | cv2.circle(img, tuple(p), radius + 1, (0, 0, 0), thickness=-1) 33 | cv2.circle(img, tuple(p), radius, (0, 255, 0), thickness=-1) 34 | 35 | cv2.imwrite(filename, img.astype('uint8')) 36 | 37 | 38 | t = np.linspace(0, 2 * np.pi, num=50) 39 | 40 | radius = [30, 60] 41 | colors = np.array([[255, 0, 0], 42 | [0, 255, 0], 43 | [0, 0, 255]], dtype='float32') 44 | 45 | points = np.zeros((len(t) * len(radius), 2)) 46 | for r in range(len(radius)): 47 | points[r * len(t):(r + 1) * len(t), 0] = radius[r] * np.cos(t) # x 48 | points[r * len(t):(r + 1) * len(t), 1] = radius[r] * np.sin(t) # y 49 | center = points.mean(axis=0) + 45 * np.ones((2)) / np.sqrt(2) 50 | responses = center[np.newaxis, ...] - points 51 | 52 | for learner in weakLearner.__all__: 53 | print(learner) 54 | params = {'max_depth': 10, 55 | 'min_sample_count': 5, 56 | 'test_count': 100, 57 | 'test_class': getattr(weakLearner, learner)()} 58 | tree = RegressionTree(params) 59 | tree.fit(points, responses) 60 | 61 | # save tree to a text file 62 | tree.save('tree.txt') 63 | tree = RegressionTree() 64 | tree.load('tree.txt', test=params['test_class']) 65 | 66 | img_test(tree, points, colors, 67 | 'img/regression_tree_' + str(learner) + '.png') 68 | 69 | forest = RegressionForest(10, params) 70 | forest.fit(points, responses) 71 | 72 | # save forest to a directory of text files 73 | forest.save('saved_model') 74 | forest = RegressionForest() 75 | forest.load('saved_model', test=params['test_class']) 76 | 77 | img_test(forest, points, colors, 78 | 'img/regression_forest_' + str(learner) + '.png') 79 | -------------------------------------------------------------------------------- /example_sklearn_classification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import numpy as np 4 | import cv2 # OpenCV 5 | import itertools 6 | 7 | from randomforest import weakLearner 8 | from randomforest.weakLearner import FeatureExtractor 9 | 10 | from sklearn import ensemble 11 | 12 | 13 | def img_test(forest, feature_extractor, points, colors, filename, size=512, 14 | radius=3, proba=True): 15 | img = np.zeros((size, size, 3)) 16 | v_min = points.min() 17 | v_max = points.max() 18 | step = float(v_max - v_min) / img.shape[0] 19 | grid = np.arange(v_min, v_max, step) 20 | 21 | xy = np.array(list(itertools.product(grid, grid))) 22 | features = feature_extractor.apply_all(xy) 23 | 24 | if proba: 25 | r = forest.predict_proba(features) 26 | col = np.dot(r, colors) 27 | else: 28 | r = forest.predict(features).astype('int32') 29 | col = colors[r] 30 | img[((xy[:, 1] - v_min) / step).astype('int32'), 31 | ((xy[:, 0] - v_min) / step).astype('int32')] = col 32 | 33 | points = ((points - v_min) / step).astype('int') 34 | for p, r in zip(points, responses): 35 | col = tuple(colors[int(r)]) 36 | cv2.circle(img, tuple(p), radius + 1, (0, 0, 0), thickness=-1) 37 | cv2.circle(img, tuple(p), radius, col, thickness=-1) 38 | 39 | cv2.imwrite(filename, img) 40 | 41 | 42 | t = np.arange(0, 10, 0.1) 43 | 44 | theta = [0, 30, 60] 45 | colors = np.array([[255, 0, 0], 46 | [0, 255, 0], 47 | [0, 0, 255]], dtype='float') 48 | 49 | points = np.zeros((len(t) * len(theta), 2)) 50 | responses = np.zeros(len(t) * len(theta)) 51 | for c in range(len(theta)): 52 | points[c * len(t):(c + 1) * len(t), 0] = t ** 2 * np.cos(t + theta[c]) # x 53 | points[c * len(t):(c + 1) * len(t), 1] = t ** 2 * np.sin(t + theta[c]) # y 54 | responses[c * len(t):(c + 1) * len(t)] = c 55 | 56 | for learner in weakLearner.__all__: 57 | test_class = getattr(weakLearner, learner)() 58 | params = {'max_depth': None, 59 | 'min_samples_split': 2, 60 | 'n_jobs': 1, 61 | 'n_estimators': 100} 62 | 63 | print(str(learner)) 64 | 65 | forest = ensemble.RandomForestClassifier(**params) 66 | feature_extractor = FeatureExtractor(test_class, n_features=1000) 67 | features = feature_extractor.fit_transform(points) 68 | forest.fit(features, responses) 69 | 70 | img_test(forest, feature_extractor, points, colors, 71 | 'img/classification_forest_sklearn_' + str(learner) + '_soft.png', 72 | proba=True) 73 | img_test(forest, feature_extractor, points, colors, 74 | 'img/classification_forest_sklearn_' + str(learner) + '_hard.png', 75 | proba=False) 76 | -------------------------------------------------------------------------------- /example_sklearn_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import numpy as np 4 | import cv2 # OpenCV 5 | import itertools 6 | 7 | from randomforest import weakLearner 8 | from randomforest.weakLearner import FeatureExtractor 9 | 10 | from sklearn import ensemble 11 | 12 | 13 | def img_test(tree, feature_extractor, points, colors, filename, size=512, 14 | radius=3): 15 | img = np.zeros((size, size, 3), dtype='float') 16 | v_min = points.min() 17 | v_max = points.max() 18 | step = float(v_max - v_min) / img.shape[0] 19 | grid = np.arange(v_min, v_max, step) 20 | 21 | xy = np.array(list(itertools.product(grid, grid))) 22 | features = feature_extractor.apply_all(xy) 23 | 24 | predictions = xy + tree.predict(features) 25 | predictions = np.round((predictions - v_min) / step).astype('int32') 26 | 27 | flat_indices = np.ravel_multi_index(np.transpose(predictions), 28 | img.shape[:2], mode='clip') 29 | bins = np.bincount(flat_indices, minlength=np.prod(img.shape[:2])) 30 | img += bins.reshape(img.shape[:2])[..., np.newaxis] 31 | 32 | # artefacts of clipping 33 | img[0] = 0 34 | img[-1] = 0 35 | img[:, 0] = 0 36 | img[:, -1] = 0 37 | 38 | img *= 255 / img.max() 39 | 40 | points = ((points - v_min) / step).astype('int') 41 | for p, r in zip(points, responses): 42 | cv2.circle(img, tuple(p), radius + 1, (0, 0, 0), thickness=-1) 43 | cv2.circle(img, tuple(p), radius, (0, 255, 0), thickness=-1) 44 | 45 | cv2.imwrite(filename, img.astype('uint8')) 46 | 47 | 48 | t = np.linspace(0, 2 * np.pi, num=50) 49 | 50 | radius = [30, 60] 51 | colors = np.array([[255, 0, 0], 52 | [0, 255, 0], 53 | [0, 0, 255]], dtype='float32') 54 | 55 | points = np.zeros((len(t) * len(radius), 2)) 56 | for r in range(len(radius)): 57 | points[r * len(t):(r + 1) * len(t), 0] = radius[r] * np.cos(t) # x 58 | points[r * len(t):(r + 1) * len(t), 1] = radius[r] * np.sin(t) # y 59 | center = points.mean(axis=0) + 45 * np.ones((2)) / np.sqrt(2) 60 | responses = center[np.newaxis, ...] - points 61 | 62 | for learner in weakLearner.__all__: 63 | test_class = getattr(weakLearner, learner)() 64 | params = {'max_depth': None, 65 | 'min_samples_split': 2, 66 | 'n_jobs': 1, 67 | 'n_estimators': 100} 68 | 69 | print(str(learner)) 70 | 71 | forest = ensemble.RandomForestRegressor(**params) 72 | feature_extractor = FeatureExtractor(test_class, n_features=1000) 73 | features = feature_extractor.fit_transform(points) 74 | forest.fit(features, responses) 75 | 76 | img_test(forest, feature_extractor, points, colors, 77 | 'img/regression_forest_sklearn_' + str(learner) + '.png') 78 | -------------------------------------------------------------------------------- /img/classification_forest_AxisAligned_hard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_AxisAligned_hard.png -------------------------------------------------------------------------------- /img/classification_forest_AxisAligned_soft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_AxisAligned_soft.png -------------------------------------------------------------------------------- /img/classification_forest_Conic_hard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_Conic_hard.png -------------------------------------------------------------------------------- /img/classification_forest_Conic_soft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_Conic_soft.png -------------------------------------------------------------------------------- /img/classification_forest_Linear_hard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_Linear_hard.png -------------------------------------------------------------------------------- /img/classification_forest_Linear_soft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_Linear_soft.png -------------------------------------------------------------------------------- /img/classification_forest_Parabola_hard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_Parabola_hard.png -------------------------------------------------------------------------------- /img/classification_forest_Parabola_soft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_Parabola_soft.png -------------------------------------------------------------------------------- /img/classification_forest_sklearn_AxisAligned_hard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_AxisAligned_hard.png -------------------------------------------------------------------------------- /img/classification_forest_sklearn_AxisAligned_soft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_AxisAligned_soft.png -------------------------------------------------------------------------------- /img/classification_forest_sklearn_Conic_hard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_Conic_hard.png -------------------------------------------------------------------------------- /img/classification_forest_sklearn_Conic_soft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_Conic_soft.png -------------------------------------------------------------------------------- /img/classification_forest_sklearn_Linear_hard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_Linear_hard.png -------------------------------------------------------------------------------- /img/classification_forest_sklearn_Linear_soft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_Linear_soft.png -------------------------------------------------------------------------------- /img/classification_forest_sklearn_Parabola_hard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_Parabola_hard.png -------------------------------------------------------------------------------- /img/classification_forest_sklearn_Parabola_soft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_forest_sklearn_Parabola_soft.png -------------------------------------------------------------------------------- /img/classification_tree_AxisAligned.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_tree_AxisAligned.png -------------------------------------------------------------------------------- /img/classification_tree_Conic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_tree_Conic.png -------------------------------------------------------------------------------- /img/classification_tree_Linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_tree_Linear.png -------------------------------------------------------------------------------- /img/classification_tree_Parabola.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/classification_tree_Parabola.png -------------------------------------------------------------------------------- /img/regression_forest_AxisAligned.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_AxisAligned.png -------------------------------------------------------------------------------- /img/regression_forest_Conic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_Conic.png -------------------------------------------------------------------------------- /img/regression_forest_Linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_Linear.png -------------------------------------------------------------------------------- /img/regression_forest_Parabola.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_Parabola.png -------------------------------------------------------------------------------- /img/regression_forest_sklearn_AxisAligned.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_sklearn_AxisAligned.png -------------------------------------------------------------------------------- /img/regression_forest_sklearn_Conic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_sklearn_Conic.png -------------------------------------------------------------------------------- /img/regression_forest_sklearn_Linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_sklearn_Linear.png -------------------------------------------------------------------------------- /img/regression_forest_sklearn_Parabola.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_forest_sklearn_Parabola.png -------------------------------------------------------------------------------- /img/regression_tree_AxisAligned.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_tree_AxisAligned.png -------------------------------------------------------------------------------- /img/regression_tree_Conic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_tree_Conic.png -------------------------------------------------------------------------------- /img/regression_tree_Linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_tree_Linear.png -------------------------------------------------------------------------------- /img/regression_tree_Parabola.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevin-keraudren/randomforest-python/fc8267f226ae51cfe17965ce8b9f8651941b57bf/img/regression_tree_Parabola.png -------------------------------------------------------------------------------- /randomforest/__init__.py: -------------------------------------------------------------------------------- 1 | from .classification_tree import ClassificationTree 2 | from .classification_forest import ClassificationForest 3 | from .regression_tree import RegressionTree 4 | from .regression_forest import RegressionForest 5 | from .weakLearner import * 6 | 7 | __all__ = ["ClassificationTree", 8 | "ClassificationForest", 9 | "RegressionTree", 10 | "RegressionForest"] 11 | 12 | from . import weakLearner 13 | 14 | __all__.extend(weakLearner.__all__) 15 | -------------------------------------------------------------------------------- /randomforest/classification_forest.py: -------------------------------------------------------------------------------- 1 | from .forest import Forest 2 | from .classification_tree import ClassificationTree 3 | 4 | 5 | class ClassificationForest(Forest): 6 | tree_class = ClassificationTree 7 | labels = [] 8 | 9 | def fit(self, points, responses): 10 | self.labels = [] 11 | for r in responses: 12 | if r not in self.labels: 13 | self.labels.append(r) 14 | for i in range(self.ntrees): 15 | self.trees.append(ClassificationTree(self.tree_params)) 16 | self.trees[i].fit(points, responses, self.labels) 17 | 18 | def predict(self, point): 19 | r = {} 20 | for c in self.labels: 21 | r[c] = 0.0 22 | for i in range(self.ntrees): 23 | response = int(self.trees[i].predict(point)) 24 | r[response] += 1 25 | 26 | response = None 27 | max_count = -1 28 | for c in self.labels: 29 | if r[c] > max_count: 30 | response = c 31 | max_count = r[c] 32 | return response 33 | 34 | def predict_proba(self, point): 35 | r = {} 36 | for c in self.labels: 37 | r[c] = 0.0 38 | for i in range(self.ntrees): 39 | response = int(self.trees[i].predict(point)) 40 | r[response] += 1 41 | 42 | for c in self.labels: 43 | r[c] /= self.ntrees 44 | return r 45 | -------------------------------------------------------------------------------- /randomforest/classification_tree.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .tree import Tree 4 | 5 | 6 | class ClassificationTree(Tree): 7 | labels = [] 8 | 9 | def entropy(self, d): 10 | E = 0.0 11 | for r in d.keys(): 12 | if d['count'] > 0 and d[r] > 0: 13 | proba = float(d[r]) / d['count'] 14 | E -= proba * np.log(proba) 15 | return E 16 | 17 | def split_points(self, points, responses, test): 18 | left = {'count': 0.0} 19 | right = {'count': 0.0} 20 | for c in self.labels: 21 | right[c] = 0.0 22 | left[c] = 0.0 23 | for p, r in zip(points, responses): 24 | if self.params['test_class'].run(p, test): 25 | right[r] += 1 26 | right['count'] += 1 27 | else: 28 | left[r] += 1 29 | left['count'] += 1 30 | return left, right 31 | 32 | def make_leaf(self, all_points): 33 | response = None 34 | max_count = -1 35 | for c in self.labels: 36 | if all_points[c] > max_count: 37 | response = c 38 | max_count = all_points[c] 39 | self.leaf = response 40 | 41 | def fit(self, points, responses, labels=None, depth=0): 42 | if labels is None: 43 | self.labels = [] 44 | for r in responses: 45 | if r not in self.labels: 46 | self.labels.append(r) 47 | else: 48 | self.labels = labels 49 | 50 | print("Number of points:", len(points)) 51 | 52 | all_points = {'count': len(points)} 53 | for c in self.labels: 54 | all_points[c] = 0.0 55 | for p, r in zip(points, responses): 56 | all_points[r] += 1 57 | 58 | H = self.entropy(all_points) 59 | print("Current entropy:", H) 60 | 61 | if (depth == self.params['max_depth'] 62 | or len(points) <= self.params['min_sample_count'] 63 | or H == 0): 64 | self.make_leaf(all_points) 65 | return 66 | 67 | all_tests = self.params['test_class'].generate_all(points, self.params[ 68 | 'test_count']) 69 | 70 | best_gain = 0 71 | best_i = None 72 | for i, test in enumerate(all_tests): 73 | left, right = self.split_points(points, responses, test) 74 | I = H - (left['count'] / all_points['count'] * self.entropy(left) 75 | + right['count'] / all_points['count'] * self.entropy( 76 | right)) 77 | if I > best_gain: 78 | best_gain = I 79 | best_i = i 80 | 81 | print("Information gain:", best_gain) 82 | 83 | if best_i is None: 84 | print("no best split found: creating a leaf") 85 | self.make_leaf(all_points) 86 | return 87 | 88 | self.test = all_tests[best_i] 89 | print("TEST:", self.test) 90 | left_points = [] 91 | left_responses = [] 92 | right_points = [] 93 | right_responses = [] 94 | for p, r in zip(points, responses): 95 | if self.params['test_class'].run(p, self.test): 96 | right_points.append(p) 97 | right_responses.append(r) 98 | else: 99 | left_points.append(p) 100 | left_responses.append(r) 101 | self.left = ClassificationTree(self.params) 102 | self.right = ClassificationTree(self.params) 103 | 104 | self.left.fit(np.array(left_points), left_responses, self.labels, 105 | depth + 1) 106 | self.right.fit(np.array(right_points), right_responses, self.labels, 107 | depth + 1) 108 | -------------------------------------------------------------------------------- /randomforest/forest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from glob import glob 4 | import shutil 5 | 6 | from .weakLearner import WeakLearner, AxisAligned 7 | 8 | 9 | class Forest(object): 10 | def __init__(self, 11 | ntrees=20, 12 | tree_params={'max_depth': 10, 13 | 'min_sample_count': 5, 14 | 'test_count': 100, 15 | 'test_class': AxisAligned()}): 16 | self.ntrees = ntrees 17 | self.tree_params = tree_params 18 | self.trees = [] 19 | 20 | def __len__(self): 21 | return self.ntrees 22 | 23 | def save(self, folder): 24 | if os.path.exists(folder): 25 | shutil.rmtree(folder) 26 | os.makedirs(folder) 27 | template = '%0' + str(int(np.log10(self.ntrees))) + 'd.data' 28 | for i in range(self.ntrees): 29 | filename = template % i 30 | self.trees[i].save(folder + '/' + filename) 31 | 32 | def load(self, folder, test=WeakLearner()): 33 | self.trees = [] 34 | for f in glob(folder + '/*'): 35 | self.trees.append(self.tree_class()) 36 | self.trees[-1].load(f, test) 37 | self.ntrees = len(self.trees) 38 | if 'labels' in dir(self): 39 | self.labels = self.trees[0].labels 40 | -------------------------------------------------------------------------------- /randomforest/regression_forest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .regression_tree import RegressionTree 4 | from .forest import Forest 5 | 6 | 7 | class RegressionForest(Forest): 8 | tree_class = RegressionTree 9 | 10 | def fit(self, points, responses): 11 | for i in range(self.ntrees): 12 | self.trees.append(RegressionTree(self.tree_params)) 13 | self.trees[i].fit(points, responses) 14 | 15 | def predict(self, point): 16 | response = [] 17 | for i in range(self.ntrees): 18 | response.append(self.trees[i].predict(point)) 19 | return np.mean(response, axis=0) 20 | -------------------------------------------------------------------------------- /randomforest/regression_tree.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .tree import Tree 4 | 5 | 6 | class RegressionTree(Tree): 7 | def MSE(self, responses): 8 | mean = np.mean(responses, axis=0) 9 | return np.mean((responses - mean) ** 2) 10 | 11 | def split_points(self, points, responses, test): 12 | left = [] 13 | right = [] 14 | for p, r in zip(points, responses): 15 | if self.params['test_class'].run(p, test): 16 | right.append(r) 17 | else: 18 | left.append(r) 19 | return left, right 20 | 21 | def make_leaf(self, responses): 22 | self.leaf = np.mean(responses, axis=0) 23 | 24 | def fit(self, points, responses, depth=0): 25 | 26 | print("Number of points:", len(points)) 27 | 28 | error = self.MSE(responses) 29 | print("Current MSE:", error) 30 | 31 | if (depth == self.params['max_depth'] 32 | or len(points) <= self.params['min_sample_count'] 33 | or error == 0): 34 | self.make_leaf(responses) 35 | return 36 | 37 | all_tests = self.params['test_class'].generate_all(points, self.params[ 38 | 'test_count']) 39 | 40 | best_error = np.inf 41 | best_i = None 42 | for i, test in enumerate(all_tests): 43 | left, right = self.split_points(points, responses, test) 44 | error = (len(left) / len(points) * self.MSE(left) 45 | + len(right) / len(points) * self.MSE(right)) 46 | if error < best_error: 47 | best_error = error 48 | best_i = i 49 | 50 | print("Best error:", best_error) 51 | 52 | if best_i is None: 53 | print("no best split found: creating a leaf") 54 | self.make_leaf(responses) 55 | return 56 | 57 | self.test = all_tests[best_i] 58 | print("TEST:", self.test) 59 | left_points = [] 60 | left_responses = [] 61 | right_points = [] 62 | right_responses = [] 63 | for p, r in zip(points, responses): 64 | if self.params['test_class'].run(p, self.test): 65 | right_points.append(p) 66 | right_responses.append(r) 67 | else: 68 | left_points.append(p) 69 | left_responses.append(r) 70 | self.left = RegressionTree(self.params) 71 | self.right = RegressionTree(self.params) 72 | 73 | self.left.fit(np.array(left_points), left_responses, depth + 1) 74 | self.right.fit(np.array(right_points), right_responses, depth + 1) 75 | -------------------------------------------------------------------------------- /randomforest/tree.py: -------------------------------------------------------------------------------- 1 | # Format for saving tree: 2 | # for an internal node: 3 | # left right test 4 | # for a leaf 5 | # -1 -1 class 6 | 7 | import numpy as np 8 | from .weakLearner import WeakLearner, AxisAligned 9 | 10 | 11 | class Tree(object): 12 | def __init__(self, 13 | params={'max_depth': 10, 14 | 'min_sample_count': 5, 15 | 'test_count': 100, 16 | 'test_class': AxisAligned()}): 17 | self.params = params 18 | self.leaf = None 19 | self.left = None 20 | self.right = None 21 | self.test = None 22 | 23 | def __len__(self): 24 | if self.leaf is not None: 25 | return 1 26 | else: 27 | return 1 + len(self.left) + len(self.right) 28 | 29 | def write_node(self, line_index, file_buffer, ref_next_available_id): 30 | if self.leaf is not None: 31 | file_buffer[line_index] = '-1\t-1\t' 32 | if isinstance(self.leaf, np.ndarray): 33 | file_buffer[line_index] += '\t'.join(map(str, self.leaf)) 34 | else: 35 | file_buffer[line_index] += str(self.leaf) 36 | return 37 | 38 | else: 39 | left_index = ref_next_available_id[0] 40 | right_index = ref_next_available_id[0] + 1 41 | ref_next_available_id[0] += 2 42 | 43 | file_buffer[line_index] += (str(left_index) 44 | + '\t' 45 | + str(right_index) 46 | + '\t' 47 | + '\t'.join(map(str, self.test)) 48 | ) 49 | 50 | self.left.write_node(left_index, file_buffer, ref_next_available_id) 51 | self.right.write_node(right_index, file_buffer, 52 | ref_next_available_id) 53 | 54 | def save(self, filename): 55 | file_buffer = ['' for i in range(2 + len(self))] 56 | 57 | # save params 58 | keys = list(self.params.keys()) 59 | for i in range(len(keys)): 60 | file_buffer[0] += keys[i] + '\t' + str(self.params[keys[i]]) 61 | if i < len(keys) - 1: 62 | file_buffer[0] += '\t' 63 | 64 | # save labels 65 | if 'labels' in dir(self): 66 | for i in range(len(self.labels)): 67 | file_buffer[1] += str(int(self.labels[i])) 68 | if i < len(self.labels) - 1: 69 | file_buffer[1] += '\t' 70 | 71 | # save nodes 72 | line_index = 2 73 | ref_next_available_id = [line_index + 1] 74 | self.write_node(line_index, file_buffer, ref_next_available_id) 75 | f = open(filename, 'wt') 76 | for line in file_buffer: 77 | f.write(line + '\n') 78 | f.close() 79 | 80 | def load_node(self, id, lines): 81 | line = lines[id].split('\t') 82 | line = list(map(float, line)) 83 | if line[0] == -1: 84 | if 'labels' in dir(self): 85 | self.leaf = line[2] 86 | else: 87 | self.leaf = np.array(line[2:]) 88 | return 89 | else: 90 | self.left = Tree(self.params) 91 | self.right = Tree(self.params) 92 | self.test = line[2:] 93 | self.left.load_node(int(line[0]), lines) 94 | self.right.load_node(int(line[1]), lines) 95 | return 96 | 97 | def load(self, filename, test=WeakLearner()): 98 | f = open(filename, 'rt') 99 | 100 | lines = f.readlines() 101 | lines = list(map(lambda x: x.rstrip(), lines)) 102 | 103 | # read params 104 | params = lines[0].split('\t') 105 | self.params = dict(zip(params[::2], params[1::2])) 106 | self.params['max_depth'] = int(self.params['max_depth']) 107 | self.params['min_sample_count'] = int(self.params['min_sample_count']) 108 | self.params['test_count'] = int(self.params['test_count']) 109 | 110 | assert self.params['test_class'] == str(test), "expected %s, got %s" % ( 111 | self.params['test_class'], 112 | str(test)) 113 | 114 | self.params['test_class'] = test 115 | 116 | # read labels 117 | if 'labels' in dir(self): 118 | self.labels = list(map(int, lines[1].split('\t'))) 119 | 120 | self.load_node(2, lines) 121 | 122 | def predict(self, point): 123 | if self.leaf is not None: 124 | return self.leaf 125 | else: 126 | if self.params['test_class'].run(point, self.test): 127 | return self.right.predict(point) 128 | else: 129 | return self.left.predict(point) 130 | -------------------------------------------------------------------------------- /randomforest/weakLearner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.random import uniform, random_integers 3 | 4 | __all__ = ["AxisAligned", 5 | "Linear", 6 | "Conic", 7 | "Parabola"] 8 | 9 | 10 | class WeakLearner(object): 11 | def generate_all(self, points, count): 12 | return None 13 | 14 | def __str__(self): 15 | return None 16 | 17 | def run(self, point, test): 18 | return None 19 | 20 | 21 | class AxisAligned(WeakLearner): 22 | """Axis aligned""" 23 | 24 | def __str__(self): 25 | return "AxisAligned" 26 | 27 | def generate_all(self, points, count): 28 | x_min = points.min(0)[0] 29 | y_min = points.min(0)[1] 30 | x_max = points.max(0)[0] 31 | y_max = points.max(0)[1] 32 | tests = [] 33 | tests.extend(zip(np.zeros(count, dtype=int), 34 | uniform(x_min, x_max, count))) 35 | tests.extend(zip(np.ones(count, dtype=int), 36 | uniform(y_min, y_max, count))) 37 | return np.array(tests) 38 | 39 | def run(self, point, test): 40 | return point[int(test[0])] > test[1] 41 | 42 | def run_all(self, points, test): 43 | return np.array(list(map(lambda test: points[:, test[0]] > test[1], 44 | test))).T 45 | 46 | 47 | class Linear(WeakLearner): 48 | """Linear""" 49 | 50 | def __str__(self): 51 | return "Linear" 52 | 53 | def generate_all(self, points, count): 54 | x_min = points.min(0)[0] 55 | y_min = points.min(0)[1] 56 | x_max = points.max(0)[0] 57 | y_max = points.max(0)[1] 58 | tests = [] 59 | tests.extend(zip(uniform(x_min, x_max, count), 60 | uniform(y_min, y_max, count), 61 | uniform(0, 360, count))) 62 | return tests 63 | 64 | def run(self, point, test): 65 | theta = test[2] * np.pi / 180 66 | return (np.cos(theta) * (point[0] - test[0]) + 67 | np.sin(theta) * (point[1] - test[1])) > 0 68 | 69 | def run_all(self, points, tests): 70 | def _run(test): 71 | theta = test[2] * np.pi / 180 72 | return (np.cos(theta) * (points[:, 0] - test[0]) + 73 | np.sin(theta) * (points[:, 1] - test[1])) > 0 74 | 75 | return np.array(list(map(_run, tests))).T 76 | 77 | 78 | class Conic(WeakLearner): 79 | """Non-linear: conic""" 80 | 81 | def __str__(self): 82 | return "Conic" 83 | 84 | def generate_all(self, points, count): 85 | x_min = points.min(0)[0] 86 | y_min = points.min(0)[1] 87 | x_max = points.max(0)[0] 88 | y_max = points.max(0)[1] 89 | scale = max(points.max(), abs(points.min())) 90 | tests = [] 91 | tests.extend(zip(uniform(x_min, x_max, count), 92 | uniform(y_min, y_max, count), 93 | uniform(-scale, scale, 94 | count) * random_integers(0, 1, count), 95 | uniform(-scale, scale, 96 | count) * random_integers(0, 1, count), 97 | uniform(-scale, scale, 98 | count) * random_integers(0, 1, count), 99 | uniform(-scale, scale, 100 | count) * random_integers(0, 1, count), 101 | uniform(-scale, scale, 102 | count) * random_integers(0, 1, count), 103 | uniform(-scale, scale, 104 | count) * random_integers(0, 1, count))) 105 | 106 | return tests 107 | 108 | def run(self, point, test): 109 | x = (point[0] - test[0]) 110 | y = (point[1] - test[1]) 111 | A, B, C, D, E, F = test[2:] 112 | return (A * x * x + B * y * y + C * x * x + D * x + E * y + F) > 0 113 | 114 | def run_all(self, points, tests): 115 | def _run(test): 116 | x = (points[:, 0] - test[0]) 117 | y = (points[:, 1] - test[1]) 118 | A, B, C, D, E, F = test[2:] 119 | return (A * x * x + B * y * y + C * x * x + D * x + E * y + F) > 0 120 | 121 | return np.array(list(map(_run, tests))).T 122 | 123 | 124 | class Parabola(WeakLearner): 125 | """Non-linear: parabola""" 126 | 127 | def __str__(self): 128 | return "Parabola" 129 | 130 | def generate_all(self, points, count): 131 | x_min = points.min(0)[0] 132 | y_min = points.min(0)[1] 133 | x_max = points.max(0)[0] 134 | y_max = points.max(0)[1] 135 | scale = abs(points.max() - points.min()) 136 | tests = [] 137 | tests.extend(zip(uniform(2 * x_min, 2 * x_max, count), 138 | uniform(2 * y_min, 2 * y_max, count), 139 | uniform(-scale, scale, count), 140 | random_integers(0, 1, count))) 141 | 142 | return tests 143 | 144 | def run(self, point, test): 145 | x = (point[0] - test[0]) 146 | y = (point[1] - test[1]) 147 | p, axis = test[2:] 148 | if axis == 0: 149 | return x * x < p * y 150 | else: 151 | return y * y < p * x 152 | 153 | def run_all(self, points, tests): 154 | def _run(test): 155 | x = (points[:, 0] - test[0]) 156 | y = (points[:, 1] - test[1]) 157 | p, axis = test[2:] 158 | if axis == 0: 159 | return x * x < p * y 160 | else: 161 | return y * y < p * x 162 | 163 | return np.array(list(map(_run, tests))).T 164 | 165 | 166 | class FeatureExtractor(object): 167 | def __init__(self, learner, n_features): 168 | self.learner = learner 169 | self.n_features = n_features 170 | self.tests = [] 171 | 172 | def fit_transform(self, points): 173 | self.tests = self.learner.generate_all(points, self.n_features) 174 | return self.apply_all(points) 175 | 176 | def apply(self, point): 177 | return np.array(list(map(lambda t: self.learner.run(point, t), 178 | self.tests))) 179 | 180 | def apply_all(self, points): 181 | return self.learner.run_all(points, self.tests) 182 | --------------------------------------------------------------------------------