├── knn.py
├── README.md
├── logistic_regression.py
├── pca.py
├── random_forest.py
├── linear_regression.py
├── perceptron.py
├── k_means.py
└── decision_tree


/knn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import Counter
 3 | 
 4 | def euclidean_distance(x1, x2):
 5 |     return np.sqrt(np.sum((x1 - x2) ** 2))
 6 | 
 7 | class KNN:
 8 |     def __init__(self, k=3):
 9 |         self.k = k
10 | 
11 |     def fit(self, X, y):
12 |         self.X_train = X
13 |         self.y_train = y
14 | 
15 |     def predict(self, X):
16 |         return np.array([self._predict(x) for x in X])
17 | 
18 |     def _predict(self, x):
19 |         # Compute distances to all training points
20 |         distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
21 |         # Get indices of k nearest points
22 |         k_indices = np.argsort(distances)[:self.k]
23 |         # Fetch the labels of those points
24 |         k_labels = [self.y_train[i] for i in k_indices]
25 |         # Return the most common label
26 |         return Counter(k_labels).most_common(1)[0][0]
27 | 
28 | # Testing
29 | if __name__ == "__main__":
30 |     from sklearn.datasets import load_iris
31 |     from sklearn.model_selection import train_test_split
32 |     from matplotlib.colors import ListedColormap
33 | 
34 |     def accuracy(y_true, y_pred):
35 |         return np.mean(y_true == y_pred)
36 | 
37 |     X, y = load_iris(return_X_y=True)
38 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
39 | 
40 |     model = KNN(k=3)
41 |     model.fit(X_train, y_train)
42 |     preds = model.predict(X_test)
43 | 
44 |     print("KNN Classification Accuracy:", accuracy(y_test, preds))
45 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 |  # ScratchML: ML Algorithms from Scratch
 2 |  
 3 | This repo is a growing collection of scratch implementations of the most common conventional machine learning algorithms — built without relying on high-level libraries like scikit-learn.
 4 | 
 5 | Each algorithm is written with clarity and learning in mind, making it easier to understand what’s happening under the hood.
 6 | 
 7 | 🚀 What's Inside (so far)
 8 | ✅ Linear Regression
 9 | 
10 | ✅ Logistic Regression
11 | 
12 | ✅ K-Nearest Neighbors (KNN)
13 | 
14 | ✅ Decision Tree
15 | 
16 | ✅ Random Forest
17 | 
18 | ✅ Principal Component Analysis (PCA)
19 | 
20 | ✅ K-Means Clustering
21 | 
22 | More coming soon...
23 | 
24 | 📌 Why this repo?
25 | If you're learning ML or mentoring others, you'll know how important it is to truly understand the core logic behind each algorithm. This repo is my way of reinforcing that understanding — and helping others do the same.
26 | 
27 | ## How to Use
28 | Clone the repo and run any file directly — no extra dependencies required beyond the basics (numpy, matplotlib, sklearn for datasets).
29 | 
30 | 
31 | git clone https://github.com/your-username/scratchml.git
32 | cd scratchml
33 | python linear_regression.py 
34 | 
35 | 
36 | # 🔄 Still a Work in Progress
37 | I'll keep adding more algorithms and improving the existing ones — feel free to follow along, give feedback, or even contribute if you're into this kind of hands-on ML learning!
38 | 
39 | 🙌 Stay connected
40 | If you're learning ML from a non-tech background or transitioning into the field, feel free to connect with me on LinkedIn — always happy to help!
41 | 


--------------------------------------------------------------------------------
/logistic_regression.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class LogisticRegression:
 4 |     def __init__(self, learning_rate=0.001, n_iters=1000):
 5 |         self.lr = learning_rate
 6 |         self.n_iters = n_iters
 7 |         self.weights = None
 8 |         self.bias = None
 9 | 
10 |     def fit(self, X, y):
11 |         n_samples, n_features = X.shape
12 |         self.weights = np.zeros(n_features)
13 |         self.bias = 0
14 | 
15 |         for _ in range(self.n_iters):
16 |             linear = np.dot(X, self.weights) + self.bias
17 |             y_pred = self._sigmoid(linear)
18 | 
19 |             dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
20 |             db = (1 / n_samples) * np.sum(y_pred - y)
21 | 
22 |             self.weights -= self.lr * dw
23 |             self.bias -= self.lr * db
24 | 
25 |     def predict(self, X):
26 |         linear = np.dot(X, self.weights) + self.bias
27 |         probs = self._sigmoid(linear)
28 |         return np.array([1 if p > 0.5 else 0 for p in probs])
29 | 
30 |     def _sigmoid(self, x):
31 |         return 1 / (1 + np.exp(-x))
32 | 
33 | # Testing it
34 | if __name__ == "__main__":
35 |     from sklearn.datasets import load_breast_cancer
36 |     from sklearn.model_selection import train_test_split
37 | 
38 |     def accuracy(y_true, y_pred):
39 |         return np.mean(y_true == y_pred)
40 | 
41 |     X, y = load_breast_cancer(return_X_y=True)
42 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
43 | 
44 |     model = LogisticRegression(learning_rate=0.0001, n_iters=1000)
45 |     model.fit(X_train, y_train)
46 |     preds = model.predict(X_test)
47 | 
48 |     print("Accuracy:", accuracy(y_test, preds))
49 | 


--------------------------------------------------------------------------------
/pca.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class PCA:
 4 |     def __init__(self, n_components):
 5 |         self.n_components = n_components
 6 |         self.components = None
 7 |         self.mean = None
 8 | 
 9 |     def fit(self, X):
10 |         # Center the data
11 |         self.mean = np.mean(X, axis=0)
12 |         X_centered = X - self.mean
13 | 
14 |         # Compute covariance matrix
15 |         cov_matrix = np.cov(X_centered.T)
16 | 
17 |         # Get eigenvalues and eigenvectors
18 |         eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
19 | 
20 |         # Sort eigenvectors by descending eigenvalues
21 |         sorted_idx = np.argsort(eigenvalues)[::-1]
22 |         eigenvectors = eigenvectors[:, sorted_idx]
23 | 
24 |         # Keep only top n components
25 |         self.components = eigenvectors[:, :self.n_components].T
26 | 
27 |     def transform(self, X):
28 |         X_centered = X - self.mean
29 |         return np.dot(X_centered, self.components.T)
30 | 
31 | # Testing it
32 | if __name__ == "__main__":
33 |     import matplotlib.pyplot as plt
34 |     from sklearn.datasets import load_iris
35 | 
36 |     X, y = load_iris(return_X_y=True)
37 | 
38 |     pca = PCA(n_components=2)
39 |     pca.fit(X)
40 |     X_reduced = pca.transform(X)
41 | 
42 |     print("Original shape:", X.shape)
43 |     print("Reduced shape:", X_reduced.shape)
44 | 
45 |     plt.scatter(
46 |         X_reduced[:, 0],
47 |         X_reduced[:, 1],
48 |         c=y,
49 |         cmap=plt.cm.get_cmap("viridis", 3),
50 |         alpha=0.8,
51 |         edgecolors="none"
52 |     )
53 |     plt.xlabel("Principal Component 1")
54 |     plt.ylabel("Principal Component 2")
55 |     plt.colorbar()
56 |     plt.show()
57 | 


--------------------------------------------------------------------------------
/random_forest.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import Counter
 3 | from decision_tree import DecisionTree 
 4 | 
 5 | def bootstrap_sample(X, y):
 6 |     n_samples = X.shape[0]
 7 |     indices = np.random.choice(n_samples, n_samples, replace=True)
 8 |     return X[indices], y[indices]
 9 | 
10 | def most_common_label(y):
11 |     return Counter(y).most_common(1)[0][0]
12 | 
13 | class RandomForest:
14 |     def __init__(self, n_trees=10, min_samples_split=2, max_depth=100, n_feats=None):
15 |         self.n_trees = n_trees
16 |         self.min_samples_split = min_samples_split
17 |         self.max_depth = max_depth
18 |         self.n_feats = n_feats
19 |         self.trees = []
20 | 
21 |     def fit(self, X, y):
22 |         self.trees = []
23 |         for _ in range(self.n_trees):
24 |             tree = DecisionTree(
25 |                 min_samples_split=self.min_samples_split,
26 |                 max_depth=self.max_depth,
27 |                 n_feats=self.n_feats
28 |             )
29 |             X_sample, y_sample = bootstrap_sample(X, y)
30 |             tree.fit(X_sample, y_sample)
31 |             self.trees.append(tree)
32 | 
33 |     def predict(self, X):
34 |         tree_preds = np.array([tree.predict(X) for tree in self.trees])
35 |         # Transpose to shape (n_samples, n_trees)
36 |         tree_preds = tree_preds.T
37 |         return np.array([most_common_label(preds) for preds in tree_preds])
38 | 
39 | # Testing it
40 | if __name__ == "__main__":
41 |     from sklearn.datasets import load_breast_cancer
42 |     from sklearn.model_selection import train_test_split
43 | 
44 |     def accuracy(y_true, y_pred):
45 |         return np.mean(y_true == y_pred)
46 | 
47 |     X, y = load_breast_cancer(return_X_y=True)
48 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
49 | 
50 |     model = RandomForest(n_trees=3, max_depth=10)
51 |     model.fit(X_train, y_train)
52 |     preds = model.predict(X_test)
53 | 
54 |     print("Accuracy:", accuracy(y_test, preds))
55 | 


--------------------------------------------------------------------------------
/linear_regression.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def r2_score(y_true, y_pred):
 5 |     corr_matrix = np.corrcoef(y_true, y_pred)
 6 |     corr = corr_matrix[0, 1]
 7 |     return corr ** 2
 8 | 
 9 | 
10 | class LinearRegression:
11 |     def __init__(self, learning_rate=0.001, n_iters=1000):
12 |         self.lr = learning_rate
13 |         self.n_iters = n_iters
14 |         self.weights = None
15 |         self.bias = None
16 | 
17 |     def fit(self, X, y):
18 |         n_samples, n_features = X.shape
19 | 
20 |         # init parameters
21 |         self.weights = np.zeros(n_features)
22 |         self.bias = 0
23 | 
24 |         # gradient descent
25 |         for _ in range(self.n_iters):
26 |             y_predicted = np.dot(X, self.weights) + self.bias
27 |             # compute gradients
28 |             dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
29 |             db = (1 / n_samples) * np.sum(y_predicted - y)
30 | 
31 |             # update parameters
32 |             self.weights -= self.lr * dw
33 |             self.bias -= self.lr * db
34 | 
35 |     def predict(self, X):
36 |         y_approximated = np.dot(X, self.weights) + self.bias
37 |         return y_approximated
38 | 
39 | 
40 | # Testing
41 | if __name__ == "__main__":
42 |     # Imports
43 |     import matplotlib.pyplot as plt
44 |     from sklearn.model_selection import train_test_split
45 |     from sklearn import datasets
46 | 
47 |     def mean_squared_error(y_true, y_pred):
48 |         return np.mean((y_true - y_pred) ** 2)
49 | 
50 |     X, y = datasets.make_regression(
51 |         n_samples=100, n_features=1, noise=20, random_state=4
52 |     )
53 | 
54 |     X_train, X_test, y_train, y_test = train_test_split(
55 |         X, y, test_size=0.2, random_state=1234
56 |     )
57 | 
58 |     regressor = LinearRegression(learning_rate=0.01, n_iters=1000)
59 |     regressor.fit(X_train, y_train)
60 |     predictions = regressor.predict(X_test)
61 | 
62 |     mse = mean_squared_error(y_test, predictions)
63 |     print("MSE:", mse)
64 | 
65 |     accu = r2_score(y_test, predictions)
66 |     print("Accuracy:", accu)
67 | 
68 |     y_pred_line = regressor.predict(X)
69 |     cmap = plt.get_cmap("viridis")
70 |     fig = plt.figure(figsize=(8, 6))
71 |     m1 = plt.scatter(X_train, y_train, color=cmap(0.9), s=10)
72 |     m2 = plt.scatter(X_test, y_test, color=cmap(0.5), s=10)
73 |     plt.plot(X, y_pred_line, color="black", linewidth=2, label="Prediction")
74 |     plt.show()


--------------------------------------------------------------------------------
/perceptron.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Perceptron:
 6 |     def __init__(self, learning_rate=0.01, n_iters=1000):
 7 |         self.lr = learning_rate
 8 |         self.n_iters = n_iters
 9 |         self.activation_func = self._unit_step_func
10 |         self.weights = None
11 |         self.bias = None
12 | 
13 |     def fit(self, X, y):
14 |         n_samples, n_features = X.shape
15 | 
16 |         # init parameters
17 |         self.weights = np.zeros(n_features)
18 |         self.bias = 0
19 | 
20 |         y_ = np.array([1 if i > 0 else 0 for i in y])
21 | 
22 |         for _ in range(self.n_iters):
23 | 
24 |             for idx, x_i in enumerate(X):
25 | 
26 |                 linear_output = np.dot(x_i, self.weights) + self.bias
27 |                 y_predicted = self.activation_func(linear_output)
28 | 
29 |                 # Perceptron update rule
30 |                 update = self.lr * (y_[idx] - y_predicted)
31 | 
32 |                 self.weights += update * x_i
33 |                 self.bias += update
34 | 
35 |     def predict(self, X):
36 |         linear_output = np.dot(X, self.weights) + self.bias
37 |         y_predicted = self.activation_func(linear_output)
38 |         return y_predicted
39 | 
40 |     def _unit_step_func(self, x):
41 |         return np.where(x >= 0, 1, 0)
42 | 
43 | 
44 | # Testing
45 | if __name__ == "__main__":
46 |     # Imports
47 |     import matplotlib.pyplot as plt
48 |     from sklearn.model_selection import train_test_split
49 |     from sklearn import datasets
50 | 
51 |     def accuracy(y_true, y_pred):
52 |         accuracy = np.sum(y_true == y_pred) / len(y_true)
53 |         return accuracy
54 | 
55 |     X, y = datasets.make_blobs(
56 |         n_samples=150, n_features=2, centers=2, cluster_std=1.05, random_state=2
57 |     )
58 |     X_train, X_test, y_train, y_test = train_test_split(
59 |         X, y, test_size=0.2, random_state=123
60 |     )
61 | 
62 |     p = Perceptron(learning_rate=0.01, n_iters=1000)
63 |     p.fit(X_train, y_train)
64 |     predictions = p.predict(X_test)
65 | 
66 |     print("Perceptron classification accuracy", accuracy(y_test, predictions))
67 | 
68 |     fig = plt.figure()
69 |     ax = fig.add_subplot(1, 1, 1)
70 |     plt.scatter(X_train[:, 0], X_train[:, 1], marker="o", c=y_train)
71 | 
72 |     x0_1 = np.amin(X_train[:, 0])
73 |     x0_2 = np.amax(X_train[:, 0])
74 | 
75 |     x1_1 = (-p.weights[0] * x0_1 - p.bias) / p.weights[1]
76 |     x1_2 = (-p.weights[0] * x0_2 - p.bias) / p.weights[1]
77 | 
78 |     ax.plot([x0_1, x0_2], [x1_1, x1_2], "k")
79 | 
80 |     ymin = np.amin(X_train[:, 1])
81 |     ymax = np.amax(X_train[:, 1])
82 |     ax.set_ylim([ymin - 3, ymax + 3])
83 | 
84 |     plt.show()
85 | 


--------------------------------------------------------------------------------
/k_means.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | np.random.seed(42)
 5 | 
 6 | def euclidean_distance(x1, x2):
 7 |     return np.sqrt(np.sum((x1 - x2) ** 2))
 8 | 
 9 | class KMeans:
10 |     def __init__(self, K=3, max_iters=100, plot_steps=False):
11 |         self.K = K
12 |         self.max_iters = max_iters
13 |         self.plot_steps = plot_steps
14 |         self.clusters = [[] for _ in range(self.K)]
15 |         self.centroids = []
16 | 
17 |     def predict(self, X):
18 |         self.X = X
19 |         self.n_samples, self.n_features = X.shape
20 | 
21 |         # Initialize centroids
22 |         random_idxs = np.random.choice(self.n_samples, self.K, replace=False)
23 |         self.centroids = [X[idx] for idx in random_idxs]
24 | 
25 |         for _ in range(self.max_iters):
26 |             # Assign clusters
27 |             self.clusters = self._create_clusters(self.centroids)
28 | 
29 |             if self.plot_steps:
30 |                 self.plot()
31 | 
32 |             # Update centroids
33 |             centroids_old = self.centroids
34 |             self.centroids = self._calculate_centroids(self.clusters)
35 | 
36 |             # Check convergence
37 |             if self._is_converged(centroids_old, self.centroids):
38 |                 break
39 | 
40 |         return self._get_cluster_labels(self.clusters)
41 | 
42 |     def _create_clusters(self, centroids):
43 |         clusters = [[] for _ in range(self.K)]
44 |         for idx, sample in enumerate(self.X):
45 |             closest_idx = self._closest_centroid(sample, centroids)
46 |             clusters[closest_idx].append(idx)
47 |         return clusters
48 | 
49 |     def _closest_centroid(self, sample, centroids):
50 |         distances = [euclidean_distance(sample, point) for point in centroids]
51 |         return np.argmin(distances)
52 | 
53 |     def _calculate_centroids(self, clusters):
54 |         centroids = np.zeros((self.K, self.n_features))
55 |         for idx, cluster in enumerate(clusters):
56 |             cluster_mean = np.mean(self.X[cluster], axis=0)
57 |             centroids[idx] = cluster_mean
58 |         return centroids
59 | 
60 |     def _is_converged(self, old_centroids, new_centroids):
61 |         distances = [
62 |             euclidean_distance(old_centroids[i], new_centroids[i])
63 |             for i in range(self.K)
64 |         ]
65 |         return np.sum(distances) == 0
66 | 
67 |     def _get_cluster_labels(self, clusters):
68 |         labels = np.empty(self.n_samples)
69 |         for cluster_idx, sample_idxs in enumerate(clusters):
70 |             labels[sample_idxs] = cluster_idx
71 |         return labels
72 | 
73 |     def plot(self):
74 |         fig, ax = plt.subplots(figsize=(10, 6))
75 |         for i, cluster in enumerate(self.clusters):
76 |             points = self.X[cluster].T
77 |             ax.scatter(*points)
78 |         for point in self.centroids:
79 |             ax.scatter(*point, marker='x', color='black', linewidth=2)
80 |         plt.show()
81 | 
82 | # Testing
83 | if __name__ == "__main__":
84 |     from sklearn.datasets import make_blobs
85 | 
86 |     X, y = make_blobs(n_samples=500, centers=3, n_features=2, random_state=40)
87 |     kmeans = KMeans(K=3, max_iters=150, plot_steps=True)
88 |     y_pred = kmeans.predict(X)
89 |     kmeans.plot()
90 | 


--------------------------------------------------------------------------------
/decision_tree:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import Counter
  3 | 
  4 | def entropy(y):
  5 |     hist = np.bincount(y)
  6 |     probs = hist / len(y)
  7 |     return -np.sum([p * np.log2(p) for p in probs if p > 0])
  8 | 
  9 | class Node:
 10 |     def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
 11 |         self.feature = feature
 12 |         self.threshold = threshold
 13 |         self.left = left
 14 |         self.right = right
 15 |         self.value = value
 16 | 
 17 |     def is_leaf(self):
 18 |         return self.value is not None
 19 | 
 20 | class DecisionTree:
 21 |     def __init__(self, min_samples_split=2, max_depth=100, n_feats=None):
 22 |         self.min_samples_split = min_samples_split
 23 |         self.max_depth = max_depth
 24 |         self.n_feats = n_feats
 25 |         self.root = None
 26 | 
 27 |     def fit(self, X, y):
 28 |         self.n_feats = X.shape[1] if self.n_feats is None else min(self.n_feats, X.shape[1])
 29 |         self.root = self._build_tree(X, y)
 30 | 
 31 |     def predict(self, X):
 32 |         return np.array([self._traverse(x, self.root) for x in X])
 33 | 
 34 |     def _build_tree(self, X, y, depth=0):
 35 |         n_samples, n_features = X.shape
 36 |         n_labels = len(np.unique(y))
 37 | 
 38 |         if (depth >= self.max_depth or n_labels == 1 or n_samples < self.min_samples_split):
 39 |             return Node(value=self._most_common_label(y))
 40 | 
 41 |         feat_idxs = np.random.choice(n_features, self.n_feats, replace=False)
 42 | 
 43 |         best_feat, best_thresh = self._best_split(X, y, feat_idxs)
 44 | 
 45 |         left_idxs, right_idxs = self._split(X[:, best_feat], best_thresh)
 46 |         left = self._build_tree(X[left_idxs], y[left_idxs], depth + 1)
 47 |         right = self._build_tree(X[right_idxs], y[right_idxs], depth + 1)
 48 |         return Node(feature=best_feat, threshold=best_thresh, left=left, right=right)
 49 | 
 50 |     def _best_split(self, X, y, feat_idxs):
 51 |         best_gain = -1
 52 |         split_idx, split_thresh = None, None
 53 | 
 54 |         for feat_idx in feat_idxs:
 55 |             X_column = X[:, feat_idx]
 56 |             thresholds = np.unique(X_column)
 57 | 
 58 |             for thresh in thresholds:
 59 |                 gain = self._information_gain(y, X_column, thresh)
 60 | 
 61 |                 if gain > best_gain:
 62 |                     best_gain = gain
 63 |                     split_idx = feat_idx
 64 |                     split_thresh = thresh
 65 | 
 66 |         return split_idx, split_thresh
 67 | 
 68 |     def _information_gain(self, y, X_column, split_thresh):
 69 |         parent_entropy = entropy(y)
 70 | 
 71 |         left_idxs, right_idxs = self._split(X_column, split_thresh)
 72 |         if len(left_idxs) == 0 or len(right_idxs) == 0:
 73 |             return 0
 74 | 
 75 |         n = len(y)
 76 |         n_l, n_r = len(left_idxs), len(right_idxs)
 77 |         e_l, e_r = entropy(y[left_idxs]), entropy(y[right_idxs])
 78 |         weighted_avg_entropy = (n_l / n) * e_l + (n_r / n) * e_r
 79 | 
 80 |         return parent_entropy - weighted_avg_entropy
 81 | 
 82 |     def _split(self, X_column, threshold):
 83 |         left_idxs = np.argwhere(X_column <= threshold).flatten()
 84 |         right_idxs = np.argwhere(X_column > threshold).flatten()
 85 |         return left_idxs, right_idxs
 86 | 
 87 |     def _traverse(self, x, node):
 88 |         if node.is_leaf():
 89 |             return node.value
 90 | 
 91 |         if x[node.feature] <= node.threshold:
 92 |             return self._traverse(x, node.left)
 93 |         return self._traverse(x, node.right)
 94 | 
 95 |     def _most_common_label(self, y):
 96 |         return Counter(y).most_common(1)[0][0]
 97 | 
 98 | # Testing it
 99 | if __name__ == "__main__":
100 |     from sklearn.datasets import load_breast_cancer
101 |     from sklearn.model_selection import train_test_split
102 | 
103 |     def accuracy(y_true, y_pred):
104 |         return np.mean(y_true == y_pred)
105 | 
106 |     X, y = load_breast_cancer(return_X_y=True)
107 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
108 | 
109 |     tree = DecisionTree(max_depth=10)
110 |     tree.fit(X_train, y_train)
111 | 
112 |     preds = tree.predict(X_test)
113 |     print("Accuracy:", accuracy(y_test, preds))
114 | 


--------------------------------------------------------------------------------