├── .gitignore ├── README.md ├── cs224n └── README.md ├── fastai ├── README.md └── bird_or_plane.ipynb ├── kaggle ├── README.md ├── ensemble │ ├── README.md │ └── utils.py ├── metrics │ ├── README.md │ └── metrics.py ├── tuning │ └── optuna_classification.py ├── validation │ └── README.md └── viz │ └── eda.py ├── llms ├── README.md ├── fine_tuning │ └── REAMDE.md └── gpt_numpy │ └── gpt.py ├── math_for_ml ├── README.md └── linalg.ipynb ├── minitorch └── README.md ├── ml_from_scratch ├── README.md └── supervised │ ├── lineargression.py │ └── xgboost.py ├── nlp ├── BERT │ └── README.md ├── classic │ ├── LDA.py │ ├── LSA.py │ ├── PCA.py │ ├── SVD.py │ ├── TFIDF.py │ ├── TSNE.py │ └── UMAP.py └── embeddings │ ├── README.md │ ├── berttokenize.ipynb │ └── tfidf.ipynb ├── nn_zero_to_hero ├── README.md └── micrograd │ ├── derivatives.ipynb │ ├── exercises.ipynb │ ├── micrograd.ipynb │ ├── micrograd.py │ └── viz.py ├── roadmap.png └── tensor_puzzles └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI from scratch 2 | 3 | documenting what I learn 4 | 5 | from this [roadmap](https://medium.com/bitgrit-data-science-publication/a-roadmap-to-learn-ai-in-2024-cc30c6aa6e16) I made 6 | 7 | ![roadmap](roadmap.png) 8 | -------------------------------------------------------------------------------- /cs224n/README.md: -------------------------------------------------------------------------------- 1 | # CS224N: Natural Language Processing with Deep Learning 2 | 3 | [Course page](https://web.stanford.edu/class/archive/cs/cs224n/cs224n.1234/) 4 | 5 | Papers 6 | 7 | - [Efficient Estimation of Word Representations in Vector Space (original word2vec paper)](https://arxiv.org/pdf/1301.3781.pdf) 8 | - [Distributed Representations of Words and Phrases and their Compositionality (negative sampling paper)](https://proceedings.neurips.cc/paper_files/paper/2013/file/9aa42b31882ec039965f3c4923ce901b-Paper.pdf) 9 | -------------------------------------------------------------------------------- /fastai/README.md: -------------------------------------------------------------------------------- 1 | # Fast AI 2 | 3 | - [Practical Deep Learning for Coders - Practical Deep Learning](https://course.fast.ai/) 4 | - [Lectures](https://www.youtube.com/playlist?list=PLfYUBJiXbdtSvpQjSnJJ_PmDQB_VyT5iU) 5 | - [Practical Deep Learning for Coders - Part 2 overview](https://course.fast.ai/Lessons/part2.html) 6 | - [Walk with fastai - Introduction](https://walkwithfastai.com/revisited/) 7 | -------------------------------------------------------------------------------- /kaggle/README.md: -------------------------------------------------------------------------------- 1 | # Kaggle Notes 2 | 3 | Sources 4 | 5 | - [Kaggle Solutions](https://farid.one/kaggle-solutions/) 6 | - [The Kaggle Book](https://learning.oreilly.com/library/view/the-kaggle-book/9781801817479/) 7 | - [Winning Toolkit for Competitive ML](https://mlcontests.com/winning-toolkit/) 8 | - [[1811.12808] Model Evaluation, Model Selection, and Algorithm Selection in Machine Learning](https://arxiv.org/abs/1811.12808) 9 | 10 | ## Metrics 11 | 12 | - in real world, your model will be evaluated against multiple metrics, and some of the metrics won't even be related to how your predictions perform against the ground truth you are using for testing 13 | - ex: domain of knowledge you're working in, scope of project, number of features considered for model, overall memory usage, requirements for special hardware, latency of prediction process, complexity of prediction model, and many other aspects may count more that just predictive performance. 14 | - it is dominated by business and tech infrastructure concerns 15 | 16 | ### evaluation metrics and objective functions 17 | 18 | - objective functions: serves model during training, involved in process of error minimization (or score maximization) 19 | - evaluation metric: serves your model after it has been trained by providing a score 20 | - it cannot influence how model fits data, but it helps you select the best configurations within a model, and the best model among competing ones 21 | - analysis of evaluation metric should be your first act in competitions 22 | 23 | ### objective function, cost function and loss function 24 | 25 | - loss function: single data point (penalty = |pred - ground truth|) 26 | - cost function: on whole dataset (or a batch), sum or average over loss penalties. Can comprise further constraints, i.e. L1 or L2 penalties 27 | - objective function: related to scope of optimization during ML training, comprise cost function, but not limited to them. Can also take into account goals not related to target, ex: requiring sparse coefficients of estimated model or minimization of coefficients' values, i.e. L1 and L2 regularization. 28 | 29 | Loss & cost imply optimization based on minimization, objective function is neutral, it can be a maximization or minimization activity. 30 | 31 | Scoring function (higher score = better prediction, maximization process), error functions (smaller error = better prediction, minimization process) 32 | 33 | ### basic tasks 34 | 35 | - regression 36 | - a model that can predict a real number (often positive, sometimes negative) 37 | - evaluations: diff = dist(pred, true), square(diff) it to punish large errors / log(diff) to penalize predictions of the wrong scale 38 | - classification 39 | - binary: 0 or 1 / probabilities of class (in medical fields) 40 | - churn/not churn, cancer/not cancer (probability is important here) 41 | - !watch out for imbalance!, use eval metrics that take imbalance into account 42 | - multi-class: >2 classes 43 | - ex: leaf classification 44 | - ensure performance across class is comparable (model can underperform with respect to certain classes) 45 | - multi-label: predictions are not exclusive and you can predict multiple class ownership 46 | - ex: classify news articles with relevant topics 47 | - require further evaluations to control whether model is predicting correct classes, as well as the correct number and mix of classes 48 | - Ordinal 49 | - halfway between regression and classification 50 | - ex: magnitude of earthquake, customer preferences 51 | - as multiclass 52 | - get prediction as integer value, but not take into account the order of class 53 | - problem: probabilities distributed across entire range of possible values, depicting multi-model and often asymmetric distribution (you should expect Gaussian around max probability class) 54 | - as regression 55 | - output as a float number, and results include full range of values between integers of ordinal distribution, and possible outside of it 56 | - one solution is to crop the output values, cast into int by unit rounding, but may lead to inaccuracies, requiring more sophisticated post-processing 57 | 58 | ### common metrics 59 | 60 | Top Kaggle metrics 61 | 62 | - **AUC**: measures if your model's predicted probabilities tend to predict positive cases with high probabilities 63 | - **log loss**: how far your predicted probabilities are from the ground truth (as you optimize for log loss, you optimize for AUC metric) 64 | - **MAC@{k}**: common in recsys and search engines, used for information retrieval evaluations 65 | - ex: whale identification and having 5 possible guesses 66 | - ex2: quickdraw doodle recognition (guess the content of a drawn sketch in 3 attempts, score not just if you can guess correctly, but if your correct guess is among a certain number, the "K" in the name of the function, of other incorrect predictions) 67 | - RMSLE (root mean squared logarithmic error): 68 | - quadratic weigthed kappa: for ordinal scale problems (problems that involve guessing a progressive integer number) 69 | 70 | Metrics for regression 71 | 72 | - MSE 73 | - mean of sum of squared errors (SSE) 74 | - cautions 75 | - sensitive to outliers 76 | - imbalanced errors 77 | - not robust to non-gaussian errors 78 | - lack of sensitivity to small errors 79 | - R squared (coefficient of determination) 80 | -------------------------------------------------------------------------------- /kaggle/ensemble/README.md: -------------------------------------------------------------------------------- 1 | # Ensemble Learning 2 | 3 | ## What 4 | 5 | A technique that blends predictions from a diverse set of models. 6 | 7 | See [Wisdom of Crowds](https://arxiv.org/abs/1605.04074) 8 | 9 | ## Why they work 10 | 11 | - performance: ensemble reduce variance component of prediction error by adding bias 12 | - robustness: ensemble reduces reliance on any single model's prediction, making it better at handling noisy data. 13 | 14 | ## Diversity 15 | 16 | ensemble learning is based on concept of combining multiple weak learners, weak because individual models don't need to be very accurate, as long as they're better than a random model, combining them is beneficial. 17 | 18 | Diversity is a concept referring to the idea that individual models have to be **as different from each other as possible**. This is because different models are likely to make different types of errors. By combining predictions of a diverse set, we can reduce overall error of the ensemble. 19 | 20 | In order for accuracy of ensemble to be better than individual models, there needs to be diversity. 21 | 22 | ### how 23 | 24 | - train each model on different subset of data 25 | - bagging (w replacement) 26 | - pasting (w/out replacement) 27 | - ex: 28 | - random forest: achieves diversity using random number of features at each split 29 | - extremely randomized trees: a random split to lower correlation between trees 30 | - train each model with a different set of features 31 | - train each model using a different type of algorithm 32 | - voting and stacking meta-models 33 | 34 | ### good and bad 35 | 36 | good diversity: ensemble is already correct, low disagreement between classifier, several votes wasted 37 | 38 | bad diversity: ensemble is incorrect, any disagreement represent a wasted vote, as individual classifier did not contribute to correct decision. 39 | 40 | increase good diversity (where disagreements among classifiers contribute to correct decisions) and reduce bad diversity (where disagreements does not contribute to correct decisions). 41 | 42 | ### metrics 43 | 44 | - let f_1, f_2, f_3 be predictions of diff models in ensemble 45 | 46 | two types of measures 47 | 48 | - pairwise: computed for every f_i, f_j pair, represented by NxN matrix 49 | - global: computed on whole matrix of predictions, represented by a single value 50 | 51 | Measures 52 | 53 | - pearson correlation coefficient 54 | - disagreement 55 | - Yule's Q 56 | - entropy 57 | 58 | References 59 | 60 | - [Measures of Diversity in Classifier Ensembles and Their Relationship with the Ensemble Accuracy | Machine Learning](https://link.springer.com/article/10.1023/A:1022859003006) 61 | - [Understanding the Importance of Diversity in Ensemble Learning](https://towardsdatascience.com/understanding-the-importance-of-diversity-in-ensemble-learning-34fb58fd2ed0#:~:text=Ensemble%20learning%20is%20a%20powerful,of%20the%20ensemble%20also%20increased.) 62 | 63 | ## Methods 64 | 65 | 1. blending : averaging, weighted averaging, and rank averaging 66 | - average the outputs 67 | - weights given to model can be assigned explicitly or implicitly by [rank averaging](https://towardsdatascience.com/ensemble-averaging-improve-machine-learning-performance-by-voting-246106c753ee), which ranks models by performance and gives more accurate models greater weights 68 | - involves using optuna or hyperopt to find optimal blend by taking into account cross validation metrics 69 | 2. Voting : for classification 70 | - ex: majority voting: class that most models predict is chosen 71 | 3. classical trio: bagging, boosting and stacking 72 | - bagging: train multiple models on different subsets of training data and average prediction 73 | - boosting: sequentially training models, each new model focuses on errors made by predecessors. 74 | - Stacking: feed predictions of various models as input to higher-level model. 75 | 76 | ## Reality 77 | 78 | Building robust and highly accurate models are only half the solution. an equally challenging part is explainability and fairness. 79 | 80 | see: [On Transparency of Machine Learning Models: A Position Paper](https://crcs.seas.harvard.edu/sites/projects.iq.harvard.edu/files/crcs/files/ai4sg_2020_paper_62.pdf) and [[2105.06791] Agree to Disagree: When Deep Learning Models With Identical Architectures Produce Distinct Explanations](https://arxiv.org/abs/2105.06791) 81 | 82 | ## References 83 | 84 | - [Unreasonably Effective Ensemble Learning](https://www.kaggle.com/code/yeemeitsang/unreasonably-effective-ensemble-learning/notebook#Conclusion) 85 | 86 | ## implementations 87 | 88 | - [EnsemblesTutorial/ensemble_functions.py at main · PadraigC/EnsemblesTutorial](https://github.com/PadraigC/EnsemblesTutorial/blob/main/ensemble_functions.py) 89 | -------------------------------------------------------------------------------- /kaggle/ensemble/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def coefficients(preds): 5 | A = np.asarray(preds[:, 0], dtype=bool) 6 | B = np.asarray(preds[:, 1], dtype=bool) 7 | 8 | a = np.sum(A * B) # A right, B right 9 | b = np.sum(~A * B) # A wrong, B right 10 | c = np.sum(A * ~B) # A right, B wrong 11 | d = np.sum(~A * ~B) # A wrong, B wrong 12 | 13 | return a, b, c, d 14 | 15 | 16 | def disagreement(preds, i, j): 17 | L = preds.shape[1] 18 | a, b, c, d = coefficients(preds[:, [i, j]]) 19 | return float(b + c) / (a + b + c + d) 20 | 21 | 22 | def paired_q(preds, i, j): 23 | L = preds.shape[1] 24 | # div = np.zeros((L * (L - 1)) // 2) 25 | a, b, c, d = coefficients(preds[:, [i, j]]) 26 | return float(a * d - b * c) / ((a * d + b * c) + 10e-24) 27 | 28 | 29 | def entropy(preds): 30 | L = preds.shape[1] 31 | tmp = np.sum(preds, axis=1) 32 | tmp = np.minimum(tmp, L - tmp) 33 | ent = np.mean((1.0 / (L - np.ceil(0.5 * L))) * tmp) 34 | return ent 35 | -------------------------------------------------------------------------------- /kaggle/metrics/README.md: -------------------------------------------------------------------------------- 1 | # metrics 2 | 3 | [ajitsingh98/Evaluation-Metrics-In-Machine-Learning-Problems-Python: evaluation metrics implementation in Python from scratch](https://github.com/ajitsingh98/Evaluation-Metrics-In-Machine-Learning-Problems-Python) 4 | -------------------------------------------------------------------------------- /kaggle/metrics/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import jit 3 | 4 | 5 | def mse(y_pred: np.array, y_true: np.array) -> float: 6 | squared_diff = (y_pred - y_true) ** 2 7 | mse = np.mean(squared_diff) 8 | return mse 9 | 10 | 11 | def rmse(y_true, y_pred): 12 | return np.sqrt(mse) 13 | 14 | 15 | def r2(y_true, y_pred): 16 | pass 17 | 18 | 19 | @jit 20 | def fast_auc(y_true, y_prob): 21 | """ 22 | fast roc_auc computation: https://www.kaggle.com/c/microsoft-malware-prediction/discussion/76013 23 | """ 24 | y_true = np.asarray(y_true) 25 | y_true = y_true[np.argsort(y_prob)] 26 | nfalse = 0 27 | auc = 0 28 | n = len(y_true) 29 | for i in range(n): 30 | y_i = y_true[i] 31 | nfalse += 1 - y_i 32 | auc += y_i * nfalse 33 | auc /= nfalse * (n - nfalse) 34 | return auc 35 | -------------------------------------------------------------------------------- /kaggle/tuning/optuna_classification.py: -------------------------------------------------------------------------------- 1 | # paper: https://arxiv.org/pdf/1907.10902.pdf 2 | # examples: https://github.com/optuna/optuna-examples 3 | 4 | """ 5 | Optuna example that optimizes a classifier configuration for cancer dataset 6 | using XGBoost. 7 | 8 | In this example, we optimize the validation accuracy of cancer detection 9 | using XGBoost. We optimize both the choice of booster model and its 10 | hyperparameters. 11 | 12 | """ 13 | 14 | import numpy as np 15 | import optuna 16 | import sklearn.datasets 17 | import sklearn.metrics 18 | import xgboost as xgb 19 | from sklearn.model_selection import train_test_split 20 | 21 | 22 | def objective(trial): 23 | (data, target) = sklearn.datasets.load_breast_cancer(return_X_y=True) 24 | train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25) 25 | dtrain = xgb.DMatrix(train_x, label=train_y) 26 | dvalid = xgb.DMatrix(valid_x, label=valid_y) 27 | 28 | param = { 29 | "verbosity": 0, 30 | "objective": "binary:logistic", 31 | # use exact for small dataset. 32 | "tree_method": "exact", 33 | # defines booster, gblinear for linear functions. 34 | "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]), 35 | # L2 regularization weight. 36 | "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True), 37 | # L1 regularization weight. 38 | "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True), 39 | # sampling ratio for training data. 40 | "subsample": trial.suggest_float("subsample", 0.2, 1.0), 41 | # sampling according to each tree. 42 | "colsample_bytree": trial.suggest_float("colsample_bytree", 0.2, 1.0), 43 | } 44 | 45 | if param["booster"] in ["gbtree", "dart"]: 46 | # maximum depth of the tree, signifies complexity of the tree. 47 | param["max_depth"] = trial.suggest_int("max_depth", 3, 9, step=2) 48 | # minimum child weight, larger the term more conservative the tree. 49 | param["min_child_weight"] = trial.suggest_int("min_child_weight", 2, 10) 50 | param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True) 51 | # defines how selective algorithm is. 52 | param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True) 53 | param["grow_policy"] = trial.suggest_categorical( 54 | "grow_policy", ["depthwise", "lossguide"] 55 | ) 56 | 57 | if param["booster"] == "dart": 58 | param["sample_type"] = trial.suggest_categorical( 59 | "sample_type", ["uniform", "weighted"] 60 | ) 61 | param["normalize_type"] = trial.suggest_categorical( 62 | "normalize_type", ["tree", "forest"] 63 | ) 64 | param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True) 65 | param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True) 66 | 67 | bst = xgb.train(param, dtrain) 68 | preds = bst.predict(dvalid) 69 | pred_labels = np.rint(preds) 70 | accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels) 71 | return accuracy 72 | 73 | 74 | if __name__ == "__main__": 75 | study = optuna.create_study(direction="maximize") 76 | study.optimize(objective, n_trials=100, timeout=600) 77 | 78 | print("Number of finished trials: ", len(study.trials)) 79 | print("Best trial:") 80 | trial = study.best_trial 81 | 82 | print(" Value: {}".format(trial.value)) 83 | print(" Params: ") 84 | for key, value in trial.params.items(): 85 | print(" {}: {}".format(key, value)) 86 | 87 | """ 88 | Optuna example that optimizes a classifier configuration for cancer dataset using LightGBM. 89 | 90 | In this example, we optimize the validation accuracy of cancer detection using LightGBM. 91 | We optimize both the choice of booster model and their hyperparameters. 92 | 93 | """ 94 | 95 | import lightgbm as lgb 96 | import numpy as np 97 | import optuna 98 | import sklearn.datasets 99 | import sklearn.metrics 100 | from sklearn.model_selection import train_test_split 101 | 102 | 103 | # FYI: Objective functions can take additional arguments 104 | # (https://optuna.readthedocs.io/en/stable/faq.html#objective-func-additional-args). 105 | def objective(trial): 106 | data, target = sklearn.datasets.load_breast_cancer(return_X_y=True) 107 | train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25) 108 | dtrain = lgb.Dataset(train_x, label=train_y) 109 | 110 | param = { 111 | "objective": "binary", 112 | "metric": "binary_logloss", 113 | "verbosity": -1, 114 | "boosting_type": "gbdt", 115 | "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True), 116 | "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True), 117 | "num_leaves": trial.suggest_int("num_leaves", 2, 256), 118 | "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0), 119 | "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0), 120 | "bagging_freq": trial.suggest_int("bagging_freq", 1, 7), 121 | "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), 122 | } 123 | 124 | gbm = lgb.train(param, dtrain) 125 | preds = gbm.predict(valid_x) 126 | pred_labels = np.rint(preds) 127 | accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels) 128 | return accuracy 129 | 130 | 131 | if __name__ == "__main__": 132 | study = optuna.create_study(direction="maximize") 133 | study.optimize(objective, n_trials=100) 134 | 135 | print("Number of finished trials: {}".format(len(study.trials))) 136 | 137 | print("Best trial:") 138 | trial = study.best_trial 139 | 140 | print(" Value: {}".format(trial.value)) 141 | 142 | print(" Params: ") 143 | for key, value in trial.params.items(): 144 | print(" {}: {}".format(key, value)) 145 | 146 | """ 147 | Optuna example that optimizes a classifier configuration for cancer dataset using 148 | Catboost. 149 | 150 | In this example, we optimize the validation accuracy of cancer detection using 151 | Catboost. We optimize both the choice of booster model and their hyperparameters. 152 | 153 | """ 154 | 155 | import catboost as cb 156 | import numpy as np 157 | import optuna 158 | from sklearn.datasets import load_breast_cancer 159 | from sklearn.metrics import accuracy_score 160 | from sklearn.model_selection import train_test_split 161 | 162 | 163 | def objective(trial): 164 | data, target = load_breast_cancer(return_X_y=True) 165 | train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.3) 166 | 167 | param = { 168 | "objective": trial.suggest_categorical( 169 | "objective", ["Logloss", "CrossEntropy"] 170 | ), 171 | "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1), 172 | "depth": trial.suggest_int("depth", 1, 12), 173 | "boosting_type": trial.suggest_categorical( 174 | "boosting_type", ["Ordered", "Plain"] 175 | ), 176 | "bootstrap_type": trial.suggest_categorical( 177 | "bootstrap_type", ["Bayesian", "Bernoulli", "MVS"] 178 | ), 179 | "used_ram_limit": "3gb", 180 | } 181 | 182 | if param["bootstrap_type"] == "Bayesian": 183 | param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10) 184 | elif param["bootstrap_type"] == "Bernoulli": 185 | param["subsample"] = trial.suggest_float("subsample", 0.1, 1) 186 | 187 | gbm = cb.CatBoostClassifier(**param) 188 | 189 | gbm.fit( 190 | train_x, 191 | train_y, 192 | eval_set=[(valid_x, valid_y)], 193 | verbose=0, 194 | early_stopping_rounds=100, 195 | ) 196 | 197 | preds = gbm.predict(valid_x) 198 | pred_labels = np.rint(preds) 199 | accuracy = accuracy_score(valid_y, pred_labels) 200 | return accuracy 201 | 202 | 203 | if __name__ == "__main__": 204 | study = optuna.create_study(direction="maximize") 205 | study.optimize(objective, n_trials=100, timeout=600) 206 | 207 | print("Number of finished trials: {}".format(len(study.trials))) 208 | 209 | print("Best trial:") 210 | trial = study.best_trial 211 | 212 | print(" Value: {}".format(trial.value)) 213 | 214 | print(" Params: ") 215 | for key, value in trial.params.items(): 216 | print(" {}: {}".format(key, value)) 217 | -------------------------------------------------------------------------------- /kaggle/validation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benthecoder/AI/b77a9c354386bdb681a80f5dd913b4b7ee4f640c/kaggle/validation/README.md -------------------------------------------------------------------------------- /kaggle/viz/eda.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | %matplotlib inline 6 | 7 | sns.set_theme(style="whitegrid", palette="muted", context="talk", font_scale=1.2) 8 | 9 | plt.rcParams.update({ 10 | 'figure.figsize': (10, 6), 11 | 'axes.titlesize': 18, 12 | 'axes.labelsize': 16, 13 | 'xtick.labelsize': 14, 14 | 'ytick.labelsize': 14, 15 | 'legend.fontsize': 12 16 | }) 17 | 18 | import warnings 19 | warnings.filterwarnings("ignore") 20 | 21 | def reduce_mem_usage(df, verbose=True): 22 | numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64'] 23 | start_mem = df.memory_usage().sum() / 1024**2 24 | for col in df.columns: 25 | col_type = df[col].dtypes 26 | if col_type in numerics: 27 | c_min = df[col].min() 28 | c_max = df[col].max() 29 | if str(col_type)[:3] == 'int': 30 | if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max: 31 | df[col] = df[col].astype(np.int8) 32 | elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max: 33 | df[col] = df[col].astype(np.int16) 34 | elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max: 35 | df[col] = df[col].astype(np.int32) 36 | elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max: 37 | df[col] = df[col].astype(np.int64) 38 | else: 39 | if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max: 40 | df[col] = df[col].astype(np.float16) 41 | elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max: 42 | df[col] = df[col].astype(np.float32) 43 | else: 44 | df[col] = df[col].astype(np.float64) 45 | end_mem = df.memory_usage().sum() / 1024**2 46 | if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem)) 47 | return df 48 | 49 | ## EDA 50 | 51 | def plot_categorical(data, column_name): 52 | f, ax = plt.subplots(1, 2, figsize=(18, 8)) 53 | data[column_name].value_counts().plot.pie(explode=[0, 0.1], autopct='%1.1f%%', ax=ax[0], shadow=True) 54 | ax[0].set_title(column_name) 55 | ax[0].set_ylabel('') 56 | sns.countplot(x=column_name, data=data, ax=ax[1]) 57 | ax[1].set_title(column_name) 58 | plt.show() 59 | 60 | def plot_correlation_heatmap(df): 61 | corr = df.corr() 62 | mask = np.triu(corr) 63 | plt.figure(figsize=(15, 11)) 64 | sns.heatmap(corr, mask=mask, annot=True, fmt=".3f") 65 | plt.show() 66 | 67 | 68 | def plot_pairplot(data, numerical_cols, target_col): 69 | pairplot = sns.pairplot(data=data[numerical_cols + [target_col]], 70 | hue=target_col, 71 | corner=True, 72 | plot_kws={'alpha': 0.7, 's': 50, 'edgecolor': 'k'}, 73 | palette='Set1', 74 | diag_kws={'edgecolor':'k'}) 75 | pairplot.fig.suptitle("Pairplot of Numerical Variables", y=1.02) 76 | plt.show() 77 | 78 | def plot_boxplot(data, numerical_col, target_col): 79 | sns.boxplot(x=target_col, y=numerical_col, data=data) 80 | plt.title(f'Box Plot of {numerical_col} by {target_col}') 81 | plt.show() 82 | 83 | 84 | def plot_violinplot(data, numerical_col, target_col): 85 | sns.violinplot(x=target_col, y=numerical_col, data=data) 86 | plt.title(f'Violin Plot of {numerical_col} by {target_col}') 87 | plt.show() 88 | 89 | def plot_histograms(data, continuous_vars, target_col): 90 | for column in continuous_vars: 91 | if data[column].dtype == 'float16': 92 | data[column] = data[column].astype('float32') 93 | 94 | fig, ax = plt.subplots(figsize=(18, 4)) 95 | sns.histplot(data=data, x=column, hue=target_col, bins=50, kde=True) 96 | plt.show() 97 | 98 | def plot_countplot(data, column_name): 99 | sns.countplot(x=column_name, data=data) 100 | plt.title(f'Count Plot of {column_name}') 101 | plt.show() -------------------------------------------------------------------------------- /llms/README.md: -------------------------------------------------------------------------------- 1 | # LLMs 2 | 3 | Landscape 4 | 5 | - [Language Models Formulas](https://www.youtube.com/watch?v=KCXDr-UOb9A) 6 | 7 | From Scratch 8 | 9 | - [GPT Speed Optimization](https://www.dipkumar.dev/becoming-the-unbeatable/posts/gpt-kvcache/) 10 | - [GPT in 60 Lines of NumPy | Jay Mody](https://jaykmody.com/blog/gpt-from-scratch/) 11 | 12 | Mistral 13 | 14 | - [makeMoE: Implement a Sparse Mixture of Experts Language Model from Scratch](https://huggingface.co/blog/AviSoori1x/makemoe-from-scratch) 15 | -------------------------------------------------------------------------------- /llms/fine_tuning/REAMDE.md: -------------------------------------------------------------------------------- 1 | # Fine Tuning 2 | 3 | [Fine-Tune LLMs](https://www.philschmid.de/fine-tune-llms-in-2024-with-trl) 4 | [Fine-Tuning — The GenAI Guidebook](https://ravinkumar.com/GenAiGuidebook/language_models/finetuning.html) 5 | -------------------------------------------------------------------------------- /llms/gpt_numpy/gpt.py: -------------------------------------------------------------------------------- 1 | # https://jaykmody.com/blog/gpt-from-scratch/ 2 | -------------------------------------------------------------------------------- /math_for_ml/README.md: -------------------------------------------------------------------------------- 1 | # Math for ML 2 | 3 | - [Math for Machine Learning](https://www.youtube.com/playlist?list=PLD80i8An1OEGZ2tYimemzwC3xqkU0jKUg) 4 | - [edu/math-for-ml at main · wandb/edu](https://github.com/wandb/edu/tree/main/math-for-ml) 5 | 6 | - [The Matrix Calculus You Need For Deep Learning](https://arxiv.org/pdf/1802.01528.pdf) 7 | - [Essence of calculus](https://www.youtube.com/playlist?list=PLZHQObOWTQDMsr9K-rj53DwVRMYO3t5Yr) 8 | 9 | - [Computational Linear Algebra](https://www.fast.ai/posts/2017-07-17-num-lin-alg.html) ([video](https://www.youtube.com/playlist?list=PLtmWHNX-gukIc92m1K0P6bIOnZb-mg0hY), [code](https://github.com/fastai/numerical-linear-algebra)) 10 | - [Introduction to Linear Algebra for Applied Machine Learning with Python](https://pabloinsente.github.io/intro-linear-algebra) 11 | 12 | Videos 13 | 14 | - [Essence of linear algebra](https://www.youtube.com/playlist?list=PLZHQObOWTQDPD3MizzM2xVFitgF8hE_ab) 15 | -------------------------------------------------------------------------------- /math_for_ml/linalg.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [] 9 | } 10 | ], 11 | "metadata": { 12 | "kernelspec": { 13 | "display_name": "base", 14 | "language": "python", 15 | "name": "python3" 16 | }, 17 | "language_info": { 18 | "name": "python", 19 | "version": "3.9.10" 20 | } 21 | }, 22 | "nbformat": 4, 23 | "nbformat_minor": 2 24 | } 25 | -------------------------------------------------------------------------------- /minitorch/README.md: -------------------------------------------------------------------------------- 1 | # MiniTorch 2 | 3 | Resources 4 | 5 | - [MiniTorch](https://minitorch.github.io/) 6 | - [minitorch](https://github.com/minitorch/) 7 | - [MiniTorch: A DIY Course on Machine Learning Engineering](https://www.youtube.com/playlist?list=PLO45-80-XKkQyROXXpn4PfjF1J2tH46w8) 8 | -------------------------------------------------------------------------------- /ml_from_scratch/README.md: -------------------------------------------------------------------------------- 1 | # ML and DL from scratch 2 | 3 | - [Implement - YouTube](https://www.youtube.com/playlist?list=PLG8XxYPkVOUvVzz1ZKcGAJpIBK7GRrFYR) 4 | - [eriklindernoren/ML-From-Scratch](https://github.com/eriklindernoren/ML-From-Scratch) 5 | - [JeremyNixon/oracle](https://github.com/JeremyNixon/oracle) 6 | - [trekhleb/homemade-machine-learning](https://github.com/trekhleb/homemade-machine-learning) 7 | - [ethen8181/machine-learning](https://github.com/ethen8181/machine-learning) 8 | 9 | Specific ones 10 | 11 | - [Ekeany/XGBoost-From-Scratch](https://github.com/Ekeany/XGBoost-From-Scratch) 12 | - [HowUMAPWorks/HowUMAPWorks.ipynb](https://github.com/NikolayOskolkov/HowUMAPWorks/blob/c872b2feb1426992c7ef4528994aba7ad6fcc0d6/HowUMAPWorks.ipynb) 13 | 14 | Papers 15 | 16 | - [[2402.01502] Why do Random Forests Work? Understanding Tree Ensembles as Self-Regularizing Adaptive Smoothers](https://arxiv.org/abs/2402.01502) 17 | -------------------------------------------------------------------------------- /ml_from_scratch/supervised/lineargression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from dataclasses import dataclass 3 | import matplotlib.pyplot as plt 4 | from sklearn.metrics import accuracy_score, precision_recall_fscore_support 5 | 6 | plt.style.use("bmh") 7 | 8 | 9 | """ 10 | f(x) = xW + b 11 | 12 | MSELoss = (actual - predicted)^2 / n_samples 13 | 14 | wrt weights 15 | ((y - f(x))^2)' = 2(y - f(x))( y - f(x))' 16 | = 2(y - f(x))(y - xW - b) 17 | = -2x(y - f(x)) 18 | 19 | wrt bias 20 | ((y - f(x))^2)' = 2(y - f(x))( y - f(x))' 21 | = 2(y - f(x))(y - xW - b) 22 | = -2(y - f(x)) 23 | """ 24 | 25 | 26 | @dataclass 27 | class LinearRegression: 28 | features: np.ndarray 29 | labels: np.ndarray 30 | learning_rate: float 31 | epochs: int 32 | logging: bool 33 | 34 | def fit(self, features: np.ndarray, labels: np.ndarray) -> None: 35 | """Fits LR model""" 36 | 37 | n_samples, n_features = features.shape 38 | self.weights, self.bias = np.zeros(n_features), 0 39 | 40 | for epoch in range(self.epochs): 41 | residuals = labels - self.predict(features) 42 | 43 | d_weights = -2 / n_samples * features.T.dot(residuals) 44 | 45 | d_bias = -2 / n_samples * residuals.sum() 46 | 47 | self.weights -= self.learning_rate * d_weights 48 | self.bias -= self.learning_rate * d_bias 49 | 50 | mse_loss = np.mean(np.square(residuals)) 51 | if self.logging: 52 | print(f"MSE loss [{epoch}] : {mse_loss:.15f}") 53 | 54 | def predict(self, features: np.ndarray) -> np.ndarray: 55 | """Perform inference using given features""" 56 | 57 | return features.dot(self.weights) + self.bias 58 | 59 | 60 | if __name__ == "__main__": 61 | # training data 62 | X_train = np.arange(0, 250).reshape(-1, 1) 63 | y_train = np.arange(0, 500, 2) 64 | 65 | # testing data 66 | X_test = np.arange(300, 400, 8).reshape(-1, 1) 67 | y_test = np.arange(600, 800, 16) 68 | 69 | # Train model 70 | LR = LinearRegression(X_train, y_train, learning_rate=1e-5, epochs=75, logging=True) 71 | 72 | LR.fit(X_train, y_train) 73 | 74 | preds = LR.predict(X_test).round() 75 | 76 | # Plot the data 77 | fig, axs = plt.subplots(nrows=1, ncols=3) 78 | fig.suptitle("f(x) = 2x") 79 | fig.tight_layout() 80 | fig.set_size_inches(18, 8) 81 | 82 | axs[0].set_title("Visualization for f(x) = 2x") 83 | axs[0].set_xlabel("x") 84 | axs[0].set_ylabel("y") 85 | axs[0].plot(X_train, y_train) 86 | 87 | axs[1].set_title("Scatterplot for f(x) = 2x Data") 88 | axs[1].set_xlabel("x") 89 | axs[1].set_ylabel("y") 90 | axs[1].scatter(X_test, y_test, color="blue") 91 | 92 | axs[2].set_title("Visualization for Approximated f(x) = 2x") 93 | axs[2].set_xlabel("x") 94 | axs[2].set_ylabel("y") 95 | axs[2].scatter(X_test, y_test, color="blue") 96 | axs[2].plot(X_test, preds) 97 | 98 | plt.show() 99 | 100 | accuracy = accuracy_score(preds, y_test) 101 | precision, recall, fscore, _ = precision_recall_fscore_support( 102 | y_test, preds, average="macro" 103 | ) 104 | 105 | print(f"Accuracy: {accuracy:.3f}") 106 | print(f"Precision: {recall:.3f}") 107 | print(f"Recall: {precision:.3f}") 108 | print(f"F-score: {fscore:.3f}") 109 | -------------------------------------------------------------------------------- /ml_from_scratch/supervised/xgboost.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benthecoder/AI/b77a9c354386bdb681a80f5dd913b4b7ee4f640c/ml_from_scratch/supervised/xgboost.py -------------------------------------------------------------------------------- /nlp/BERT/README.md: -------------------------------------------------------------------------------- 1 | # BERT 2 | 3 | - [BERT](https://huggingface.co/docs/transformers/model_doc/bert) 4 | - [DistilBERT](https://huggingface.co/docs/transformers/model_doc/distilbert) 5 | - [DeBERTa-v2](https://huggingface.co/docs/transformers/model_doc/deberta-v2) 6 | - [RoBERTa](https://huggingface.co/docs/transformers/model_doc/roberta) 7 | 8 | Applications 9 | 10 | [BERTopic for Topic Modeling - Maarten Grootendorst - Talking Language AI Ep#1](https://www.youtube.com/watch?v=uZxQz87lb84&list=PLLalUvky4CLJ9ZgtZguDJ7dAYuI1bfaYW&index=7&t=840s) 11 | -------------------------------------------------------------------------------- /nlp/classic/LDA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benthecoder/AI/b77a9c354386bdb681a80f5dd913b4b7ee4f640c/nlp/classic/LDA.py -------------------------------------------------------------------------------- /nlp/classic/LSA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benthecoder/AI/b77a9c354386bdb681a80f5dd913b4b7ee4f640c/nlp/classic/LSA.py -------------------------------------------------------------------------------- /nlp/classic/PCA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benthecoder/AI/b77a9c354386bdb681a80f5dd913b4b7ee4f640c/nlp/classic/PCA.py -------------------------------------------------------------------------------- /nlp/classic/SVD.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benthecoder/AI/b77a9c354386bdb681a80f5dd913b4b7ee4f640c/nlp/classic/SVD.py -------------------------------------------------------------------------------- /nlp/classic/TFIDF.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Dict, List 3 | 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import pandas as pd 7 | import seaborn as sns 8 | from sklearn.feature_extraction.text import TfidfVectorizer 9 | 10 | pd.set_option("display.max_rows", None) 11 | 12 | 13 | def tokenize(text: str) -> List[str]: 14 | """Tokenize the input text by removing punctuation and splitting into words.""" 15 | cleaned_text = re.sub(r"[^\w\s]", "", text) 16 | tokens = cleaned_text.lower().split() 17 | return tokens 18 | 19 | 20 | def calculate_word_frequencies(document: List[str]) -> Dict[str, int]: 21 | """Calculate the frequency of each word in a document.""" 22 | frequencies = {} 23 | for word in document: 24 | frequencies[word] = frequencies.get(word, 0) + 1 25 | return frequencies 26 | 27 | 28 | def calculate_tf(word_counts: Dict[str, int], document_length: int) -> Dict[str, float]: 29 | """Calculate term frequency for each word in a document.""" 30 | 31 | tf_dict = { 32 | word: count / float(document_length) for word, count in word_counts.items() 33 | } 34 | 35 | return tf_dict 36 | 37 | 38 | def calculate_idf(documents_word_counts: List[Dict[str, int]]) -> Dict[str, float]: 39 | """Calculate inverse document frequency for each word across all documents.""" 40 | N = len(documents_word_counts) 41 | idf_dict = {} 42 | unique_words = set(word for doc in documents_word_counts for word in doc) 43 | 44 | for word in unique_words: 45 | # count number of docs containing the word 46 | doc_containing_word = sum( 47 | word in document for document in documents_word_counts 48 | ) 49 | 50 | idf_dict[word] = np.log10((N + 1) / (doc_containing_word + 1)) 51 | 52 | return idf_dict 53 | 54 | 55 | def calculate_tfidf( 56 | tf_dict: Dict[str, float], idf_dict: Dict[str, float] 57 | ) -> Dict[str, float]: 58 | """Calculate TF-IDF for each word in a document.""" 59 | 60 | tfidf_dict = {word: tf_val * idf_dict[word] for word, tf_val in tf_dict.items()} 61 | 62 | return tfidf_dict 63 | 64 | 65 | def visualize_tfidf(tfidf_matrix: pd.DataFrame): 66 | """Visualize the TF-IDF matrix using a heatmap.""" 67 | plt.figure(figsize=(10, 10)) 68 | sns.heatmap(tfidf_matrix, annot=True, cmap="YlGnBu") 69 | plt.xticks(rotation=45, ha="right") 70 | plt.tight_layout() 71 | plt.show() 72 | 73 | 74 | def main(): 75 | # seneca 76 | sentences = [ 77 | "Life, if well lived, is long enough.", 78 | "Your time is limited, so don't waste it living someone else's life.", 79 | ] 80 | 81 | documents = [tokenize(sentence) for sentence in sentences] 82 | 83 | documents_word_counts = [calculate_word_frequencies(doc) for doc in documents] 84 | 85 | idf_dict = calculate_idf(documents_word_counts) 86 | 87 | tfidfs = [] 88 | for doc, doc_word_counts in zip(documents, documents_word_counts): 89 | tf_dict = calculate_tf(doc_word_counts, len(doc)) 90 | tfidf_dict = calculate_tfidf(tf_dict, idf_dict) 91 | tfidfs.append(tfidf_dict) 92 | 93 | tfidf_matrix = pd.DataFrame(tfidfs, index=["Document A", "Document B"]).T 94 | visualize_tfidf(tfidf_matrix) 95 | 96 | # scikit-learn 97 | titles = ["seneca", "steve_jobs"] 98 | 99 | vectorizer = TfidfVectorizer() 100 | vector = vectorizer.fit_transform(sentences) 101 | dict(zip(vectorizer.get_feature_names_out(), vector.toarray()[0])) 102 | 103 | tfidf_df = pd.DataFrame( 104 | vector.toarray(), index=titles, columns=vectorizer.get_feature_names_out() 105 | ) 106 | 107 | visualize_tfidf(tfidf_df.T) 108 | 109 | 110 | if __name__ == "__main__": 111 | main() 112 | -------------------------------------------------------------------------------- /nlp/classic/TSNE.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benthecoder/AI/b77a9c354386bdb681a80f5dd913b4b7ee4f640c/nlp/classic/TSNE.py -------------------------------------------------------------------------------- /nlp/classic/UMAP.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benthecoder/AI/b77a9c354386bdb681a80f5dd913b4b7ee4f640c/nlp/classic/UMAP.py -------------------------------------------------------------------------------- /nlp/embeddings/README.md: -------------------------------------------------------------------------------- 1 | # Embeddings 2 | 3 | - [What Are Embeddings - Vicki Boykis](https://github.com/veekaybee/what_are_embeddings/blob/main/embeddings.pdf) 4 | - [hackerllama - Sentence Embeddings. Introduction to Sentence Embeddings](https://osanseviero.github.io/hackerllama/blog/posts/sentence_embeddings/) 5 | - [Embeddings: What they are and why they matter](https://simonwillison.net/2023/Oct/23/embeddings/) 6 | -------------------------------------------------------------------------------- /nlp/embeddings/tfidf.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 12, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# seneca\n", 10 | "sentence_a = \"\"\"Life, if well lived, is long enough.\"\"\"\n", 11 | "# steve jobs\n", 12 | "sentence_b = \"\"\"Your time is limited, so don't waste it living someone else's life.\"\"\"" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 26, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "from typing import List\n", 22 | "import re\n", 23 | "\n", 24 | "\n", 25 | "def tokenize(text: str) -> List[str]:\n", 26 | " # Remove punctuation using regex, keeping words and numbers\n", 27 | " cleaned_text = re.sub(r\"[^\\w\\s]\", \"\", text)\n", 28 | " # Split the cleaned text into words\n", 29 | " tokens = cleaned_text.lower().split()\n", 30 | " return tokens" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 27, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "7\n", 43 | "12\n" 44 | ] 45 | }, 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "17" 50 | ] 51 | }, 52 | "execution_count": 27, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "doc_a = tokenize(sentence_a)\n", 59 | "doc_b = tokenize(sentence_b)\n", 60 | "\n", 61 | "print(len(doc_a))\n", 62 | "print(len(doc_b))\n", 63 | "\n", 64 | "total_corpus = set(doc_a).union(set(doc_b))\n", 65 | "\n", 66 | "len(total_corpus)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "### bag of words\n" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 28, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "text/html": [ 84 | "
\n", 85 | "\n", 98 | "\n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | "
01
so01
lived10
it01
life11
elses01
time01
well10
long10
if10
living01
your01
is11
limited01
dont01
waste01
someone01
enough10
\n", 194 | "
" 195 | ], 196 | "text/plain": [ 197 | " 0 1\n", 198 | "so 0 1\n", 199 | "lived 1 0\n", 200 | "it 0 1\n", 201 | "life 1 1\n", 202 | "elses 0 1\n", 203 | "time 0 1\n", 204 | "well 1 0\n", 205 | "long 1 0\n", 206 | "if 1 0\n", 207 | "living 0 1\n", 208 | "your 0 1\n", 209 | "is 1 1\n", 210 | "limited 0 1\n", 211 | "dont 0 1\n", 212 | "waste 0 1\n", 213 | "someone 0 1\n", 214 | "enough 1 0" 215 | ] 216 | }, 217 | "execution_count": 28, 218 | "metadata": {}, 219 | "output_type": "execute_result" 220 | } 221 | ], 222 | "source": [ 223 | "import pandas as pd\n", 224 | "\n", 225 | "\n", 226 | "word_count_a = dict.fromkeys(total_corpus, 0)\n", 227 | "word_count_b = dict.fromkeys(total_corpus, 0)\n", 228 | "\n", 229 | "for word in doc_a:\n", 230 | " word_count_a[word] += 1\n", 231 | "\n", 232 | "for word in doc_b:\n", 233 | " word_count_b[word] += 1\n", 234 | "\n", 235 | "pd.set_option(\"display.max_rows\", None)\n", 236 | "\n", 237 | "freq = pd.DataFrame([word_count_a, word_count_b])\n", 238 | "freq.T" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "### TF\n" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 29, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "data": { 255 | "text/plain": [ 256 | "{'so': 0.0,\n", 257 | " 'lived': 0.14285714285714285,\n", 258 | " 'it': 0.0,\n", 259 | " 'life': 0.14285714285714285,\n", 260 | " 'elses': 0.0,\n", 261 | " 'time': 0.0,\n", 262 | " 'well': 0.14285714285714285,\n", 263 | " 'long': 0.14285714285714285,\n", 264 | " 'if': 0.14285714285714285,\n", 265 | " 'living': 0.0,\n", 266 | " 'your': 0.0,\n", 267 | " 'is': 0.14285714285714285,\n", 268 | " 'limited': 0.0,\n", 269 | " 'dont': 0.0,\n", 270 | " 'waste': 0.0,\n", 271 | " 'someone': 0.0,\n", 272 | " 'enough': 0.14285714285714285}" 273 | ] 274 | }, 275 | "execution_count": 29, 276 | "metadata": {}, 277 | "output_type": "execute_result" 278 | } 279 | ], 280 | "source": [ 281 | "def tf(word_counts: dict, document: list[str]) -> dict:\n", 282 | " \"\"\"Calculate term frequency of each word in a document.\"\"\"\n", 283 | "\n", 284 | " tf_dict = {}\n", 285 | " corpus_count = len(document)\n", 286 | "\n", 287 | " for word, count in word_counts.items():\n", 288 | " tf_dict[word] = count / float(corpus_count)\n", 289 | "\n", 290 | " return tf_dict\n", 291 | "\n", 292 | "\n", 293 | "tf(word_count_a, doc_a)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "### IDF\n" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 30, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "{'so': 0.17609125905568124,\n", 312 | " 'lived': 0.17609125905568124,\n", 313 | " 'it': 0.17609125905568124,\n", 314 | " 'life': 0.0,\n", 315 | " 'elses': 0.17609125905568124,\n", 316 | " 'time': 0.17609125905568124,\n", 317 | " 'well': 0.17609125905568124,\n", 318 | " 'long': 0.17609125905568124,\n", 319 | " 'if': 0.17609125905568124,\n", 320 | " 'living': 0.17609125905568124,\n", 321 | " 'your': 0.17609125905568124,\n", 322 | " 'is': 0.0,\n", 323 | " 'limited': 0.17609125905568124,\n", 324 | " 'dont': 0.17609125905568124,\n", 325 | " 'waste': 0.17609125905568124,\n", 326 | " 'someone': 0.17609125905568124,\n", 327 | " 'enough': 0.17609125905568124}" 328 | ] 329 | }, 330 | "execution_count": 30, 331 | "metadata": {}, 332 | "output_type": "execute_result" 333 | } 334 | ], 335 | "source": [ 336 | "import numpy as np\n", 337 | "\n", 338 | "\n", 339 | "def idf(word_counts: list[dict[str, int]]) -> dict:\n", 340 | " \"\"\"Given N documents, no. of documents in which the the term appears for each term\"\"\"\n", 341 | " idf_dict = {}\n", 342 | " N = len(word_counts)\n", 343 | "\n", 344 | " idf_dict = dict.fromkeys(word_counts[0].keys(), 0)\n", 345 | "\n", 346 | " for word in idf_dict.keys():\n", 347 | " idf_dict[word] = sum(doc[word] > 0 for doc in word_counts)\n", 348 | "\n", 349 | " for word, df in idf_dict.items():\n", 350 | " idf_dict[word] = np.log10((N + 1.0) / (df + 1.0))\n", 351 | "\n", 352 | " return idf_dict\n", 353 | "\n", 354 | "\n", 355 | "idfs = idf([word_count_a, word_count_b])\n", 356 | "idfs" 357 | ] 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "metadata": {}, 362 | "source": [ 363 | "### TF-IDF\n" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 31, 369 | "metadata": {}, 370 | "outputs": [ 371 | { 372 | "data": { 373 | "text/html": [ 374 | "
\n", 375 | "\n", 388 | "\n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | "
01
so0.0000000.014674
lived0.0251560.000000
it0.0000000.014674
life0.0000000.000000
elses0.0000000.014674
time0.0000000.014674
well0.0251560.000000
long0.0251560.000000
if0.0251560.000000
living0.0000000.014674
your0.0000000.014674
is0.0000000.000000
limited0.0000000.014674
dont0.0000000.014674
waste0.0000000.014674
someone0.0000000.014674
enough0.0251560.000000
\n", 484 | "
" 485 | ], 486 | "text/plain": [ 487 | " 0 1\n", 488 | "so 0.000000 0.014674\n", 489 | "lived 0.025156 0.000000\n", 490 | "it 0.000000 0.014674\n", 491 | "life 0.000000 0.000000\n", 492 | "elses 0.000000 0.014674\n", 493 | "time 0.000000 0.014674\n", 494 | "well 0.025156 0.000000\n", 495 | "long 0.025156 0.000000\n", 496 | "if 0.025156 0.000000\n", 497 | "living 0.000000 0.014674\n", 498 | "your 0.000000 0.014674\n", 499 | "is 0.000000 0.000000\n", 500 | "limited 0.000000 0.014674\n", 501 | "dont 0.000000 0.014674\n", 502 | "waste 0.000000 0.014674\n", 503 | "someone 0.000000 0.014674\n", 504 | "enough 0.025156 0.000000" 505 | ] 506 | }, 507 | "execution_count": 31, 508 | "metadata": {}, 509 | "output_type": "execute_result" 510 | } 511 | ], 512 | "source": [ 513 | "def tfidf(doc_elements: dict[str, int], idfs: dict[str, int]) -> dict:\n", 514 | " \"\"\"TF * IDF per word given a single word in a single document\"\"\"\n", 515 | "\n", 516 | " tfidf_dict = {}\n", 517 | "\n", 518 | " for word, val in doc_elements.items():\n", 519 | " tfidf_dict[word] = val * idfs[word]\n", 520 | "\n", 521 | " return tfidf_dict\n", 522 | "\n", 523 | "\n", 524 | "# calculate term frequency for each document\n", 525 | "tf_a = tf(word_count_a, doc_a)\n", 526 | "tf_b = tf(word_count_b, doc_b)\n", 527 | "\n", 528 | "# calculate inverse document frequency for each document\n", 529 | "tfidf_a = tfidf(tf_a, idfs)\n", 530 | "tfidf_b = tfidf(tf_b, idfs)\n", 531 | "\n", 532 | "# return score\n", 533 | "document_tfidf = pd.DataFrame([tfidf_a, tfidf_b])\n", 534 | "document_tfidf.T" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": 32, 540 | "metadata": {}, 541 | "outputs": [ 542 | { 543 | "data": { 544 | "image/png": "", 545 | "text/plain": [ 546 | "
" 547 | ] 548 | }, 549 | "metadata": {}, 550 | "output_type": "display_data" 551 | } 552 | ], 553 | "source": [ 554 | "import matplotlib.pyplot as plt\n", 555 | "import seaborn as sns\n", 556 | "\n", 557 | "\n", 558 | "def visualize_tfidf(tfidf_matrix: pd.DataFrame):\n", 559 | " plt.figure(figsize=(10, 10))\n", 560 | " sns.heatmap(tfidf_matrix, annot=True, cmap=\"YlGnBu\")\n", 561 | " plt.xticks(rotation=45, ha=\"right\")\n", 562 | " plt.tight_layout()\n", 563 | " plt.show()\n", 564 | "\n", 565 | "\n", 566 | "# Prepare the TF-IDF matrix for visualization and EDA\n", 567 | "tfidf_matrix = pd.DataFrame([tfidf_a, tfidf_b], index=[\"Document A\", \"Document B\"]).T\n", 568 | "\n", 569 | "# Visualize the TF-IDF matrix\n", 570 | "visualize_tfidf(tfidf_matrix)" 571 | ] 572 | }, 573 | { 574 | "cell_type": "markdown", 575 | "metadata": {}, 576 | "source": [ 577 | "The heatmap displays the TF-IDF scores of different words across two documents (A and B). The colors indicate the magnitude of the TF-IDF scores, with darker colors representing higher scores. Words that have non-zero scores in both documents are those that are shared between the documents. Words that have a high score in one document and a low or zero score in the other suggest uniqueness. For example, \"ignorance\" has a higher score in Document B, indicating it's more important or unique to that document within the context of these two documents.\n" 578 | ] 579 | }, 580 | { 581 | "cell_type": "markdown", 582 | "metadata": {}, 583 | "source": [ 584 | "# scikit-learn\n" 585 | ] 586 | }, 587 | { 588 | "cell_type": "code", 589 | "execution_count": 38, 590 | "metadata": {}, 591 | "outputs": [], 592 | "source": [ 593 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 594 | "\n", 595 | "corpus = [sentence_a, sentence_b]\n", 596 | "titles = [\"seneca\", \"steve_jobs\"]\n", 597 | "\n", 598 | "vectorizer = TfidfVectorizer()\n", 599 | "vector = vectorizer.fit_transform(corpus)\n", 600 | "dict(zip(vectorizer.get_feature_names_out(), vector.toarray()[0]))\n", 601 | "\n", 602 | "tfidf_df = pd.DataFrame(\n", 603 | " vector.toarray(), index=titles, columns=vectorizer.get_feature_names_out()\n", 604 | ")" 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": 39, 610 | "metadata": {}, 611 | "outputs": [ 612 | { 613 | "data": { 614 | "image/png": "", 615 | "text/plain": [ 616 | "
" 617 | ] 618 | }, 619 | "metadata": {}, 620 | "output_type": "display_data" 621 | } 622 | ], 623 | "source": [ 624 | "visualize_tfidf(tfidf_df.T)" 625 | ] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "execution_count": null, 630 | "metadata": {}, 631 | "outputs": [], 632 | "source": [] 633 | } 634 | ], 635 | "metadata": { 636 | "kernelspec": { 637 | "display_name": "ai", 638 | "language": "python", 639 | "name": "python3" 640 | }, 641 | "language_info": { 642 | "codemirror_mode": { 643 | "name": "ipython", 644 | "version": 3 645 | }, 646 | "file_extension": ".py", 647 | "mimetype": "text/x-python", 648 | "name": "python", 649 | "nbconvert_exporter": "python", 650 | "pygments_lexer": "ipython3", 651 | "version": "3.12.1" 652 | } 653 | }, 654 | "nbformat": 4, 655 | "nbformat_minor": 2 656 | } 657 | -------------------------------------------------------------------------------- /nn_zero_to_hero/README.md: -------------------------------------------------------------------------------- 1 | # Neural Networks from scratch 2 | 3 | My code for Andrej Karpathy's NN from scratch series (~13 hours) 4 | 5 | ## installations 6 | 7 | ```bash 8 | brew install graphviz 9 | ``` 10 | 11 | ## Building micrograd (currently doing) 12 | 13 | What? A tiny scalar-valued autograd engine and a neural net library on top of it with PyTorch-like API 14 | 15 | ## Source 16 | 17 | - [Neural Networks: Zero to Hero](https://www.youtube.com/playlist?list=PLAqhIrjkxbuWI23v9cThsA9GvCAUhRvKZ) 18 | - [karpathy/nn-zero-to-hero: Neural Networks: Zero to Hero](https://github.com/karpathy/nn-zero-to-hero) 19 | -------------------------------------------------------------------------------- /nn_zero_to_hero/micrograd/derivatives.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import math\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "\n", 13 | "%matplotlib inline" 14 | ] 15 | }, 16 | { 17 | "attachments": {}, 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## Derivative of a function with single input\n" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/plain": [ 32 | "20.0" 33 | ] 34 | }, 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "output_type": "execute_result" 38 | } 39 | ], 40 | "source": [ 41 | "def f(x):\n", 42 | " return 3 * x**2 - 4 * x + 5\n", 43 | "\n", 44 | "\n", 45 | "f(3.0)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "image/png": "", 56 | "text/plain": [ 57 | "
" 58 | ] 59 | }, 60 | "metadata": { 61 | "needs_background": "light" 62 | }, 63 | "output_type": "display_data" 64 | } 65 | ], 66 | "source": [ 67 | "xs = np.arange(-5, 5, 0.25)\n", 68 | "ys = f(xs) # apply f to each element of xs\n", 69 | "plt.plot(xs, ys);" 70 | ] 71 | }, 72 | { 73 | "attachments": {}, 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "what is derivative at every point of this function? in class you would do it by hand\n" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "20.014003000000002" 89 | ] 90 | }, 91 | "execution_count": 5, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": [ 97 | "h = 0.001\n", 98 | "x = 3.0\n", 99 | "f(x + h) # do you expect function to be greater or less after bumping h\n", 100 | "(f(x + h) - f(x)) / h # function responded in positive direction normalized by run" 101 | ] 102 | }, 103 | { 104 | "attachments": {}, 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "make h very small to converge to right amount\n" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 9, 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "data": { 118 | "text/plain": [ 119 | "14.000001158365194" 120 | ] 121 | }, 122 | "execution_count": 9, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "h = 0.0000000001\n", 129 | "x = 3.0\n", 130 | "(f(x + h) - f(x)) / h" 131 | ] 132 | }, 133 | { 134 | "attachments": {}, 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "doing this from function\n", 139 | "\n", 140 | "derivative of $f(x) = 3x^2 - 4x + 5 = 6x - 4$\n", 141 | "\n", 142 | "when $x = 3$, $6 \\cdot 3 - 4 = 14$\n" 143 | ] 144 | }, 145 | { 146 | "attachments": {}, 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "What if it's a negative number?\n", 151 | "\n", 152 | "looking at function, if we bump it, it'll go down, so it's going to be negative\n" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 14, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "-21.999966293151374" 164 | ] 165 | }, 166 | "execution_count": 14, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "h = 0.0000000001\n", 173 | "x = -3.0\n", 174 | "(f(x + h) - f(x)) / h" 175 | ] 176 | }, 177 | { 178 | "attachments": {}, 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "What if it's a number where slope is zero?\n", 183 | "\n", 184 | "nudging it doesn't change the value of the function\n" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 11, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "data": { 194 | "text/plain": [ 195 | "0.0" 196 | ] 197 | }, 198 | "execution_count": 11, 199 | "metadata": {}, 200 | "output_type": "execute_result" 201 | } 202 | ], 203 | "source": [ 204 | "h = 0.0000000001\n", 205 | "x = 2 / 3 # slope is zero at this point\n", 206 | "(f(x + h) - f(x)) / h" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 8, 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "data": { 216 | "text/plain": [ 217 | "0.0" 218 | ] 219 | }, 220 | "execution_count": 8, 221 | "metadata": {}, 222 | "output_type": "execute_result" 223 | } 224 | ], 225 | "source": [ 226 | "h = 0.0000000000000001 # floating point arithmetic, represetnation is finite\n", 227 | "x = 3.0\n", 228 | "(f(x + h) - f(x)) / h" 229 | ] 230 | }, 231 | { 232 | "attachments": {}, 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "## Derivative of a function with multiple input\n" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 12, 242 | "metadata": {}, 243 | "outputs": [ 244 | { 245 | "name": "stdout", 246 | "output_type": "stream", 247 | "text": [ 248 | "4.0\n" 249 | ] 250 | } 251 | ], 252 | "source": [ 253 | "a = 2.0\n", 254 | "b = -3.0\n", 255 | "c = 10.0\n", 256 | "d = a * b + c\n", 257 | "\n", 258 | "print(d)" 259 | ] 260 | }, 261 | { 262 | "attachments": {}, 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "what happens if we bump b?\n", 267 | "\n", 268 | "because b is negative, it's going to go down\n" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 15, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "name": "stdout", 278 | "output_type": "stream", 279 | "text": [ 280 | "d1 4.0\n", 281 | "d2 3.99999997\n", 282 | "slope -2.999999981767587\n" 283 | ] 284 | } 285 | ], 286 | "source": [ 287 | "h = 0.00000001\n", 288 | "\n", 289 | "# fix inputs at values of interest\n", 290 | "a = 2.0\n", 291 | "b = -3.0\n", 292 | "c = 10.0\n", 293 | "\n", 294 | "d1 = a * b + c\n", 295 | "a += h\n", 296 | "d2 = a * b + c\n", 297 | "\n", 298 | "print(\"d1\", d1)\n", 299 | "print(\"d2\", d2)\n", 300 | "print(\"slope\", (d2 - d1) / h)" 301 | ] 302 | }, 303 | { 304 | "attachments": {}, 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "mathmatically:\n", 309 | "\n", 310 | "derivative of d with respect to a gives you b, and b is -3\n" 311 | ] 312 | }, 313 | { 314 | "attachments": {}, 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "what happens if we bump b?\n", 319 | "\n", 320 | "because a is positive, we'll be adding more to d.\n", 321 | "\n", 322 | "What is the sensitivity? the slope of the function? it's 2\n" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 16, 328 | "metadata": {}, 329 | "outputs": [ 330 | { 331 | "name": "stdout", 332 | "output_type": "stream", 333 | "text": [ 334 | "d1 4.0\n", 335 | "d2 4.00000002\n", 336 | "slope 1.999999987845058\n" 337 | ] 338 | } 339 | ], 340 | "source": [ 341 | "h = 0.00000001\n", 342 | "\n", 343 | "# fix inputs at values of interest\n", 344 | "a = 2.0\n", 345 | "b = -3.0\n", 346 | "c = 10.0\n", 347 | "\n", 348 | "d1 = a * b + c\n", 349 | "b += h\n", 350 | "d2 = a * b + c\n", 351 | "\n", 352 | "print(\"d1\", d1)\n", 353 | "print(\"d2\", d2)\n", 354 | "print(\"slope\", (d2 - d1) / h)" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": 17, 360 | "metadata": {}, 361 | "outputs": [ 362 | { 363 | "name": "stdout", 364 | "output_type": "stream", 365 | "text": [ 366 | "d1 4.0\n", 367 | "d2 4.000000010000001\n", 368 | "slope 1.000000082740371\n" 369 | ] 370 | } 371 | ], 372 | "source": [ 373 | "h = 0.00000001\n", 374 | "\n", 375 | "# fix inputs at values of interest\n", 376 | "a = 2.0\n", 377 | "b = -3.0\n", 378 | "c = 10.0\n", 379 | "\n", 380 | "d1 = a * b + c\n", 381 | "c += h\n", 382 | "d2 = a * b + c\n", 383 | "\n", 384 | "print(\"d1\", d1)\n", 385 | "print(\"d2\", d2)\n", 386 | "print(\"slope\", (d2 - d1) / h)" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [] 395 | } 396 | ], 397 | "metadata": { 398 | "kernelspec": { 399 | "display_name": "base", 400 | "language": "python", 401 | "name": "python3" 402 | }, 403 | "language_info": { 404 | "codemirror_mode": { 405 | "name": "ipython", 406 | "version": 3 407 | }, 408 | "file_extension": ".py", 409 | "mimetype": "text/x-python", 410 | "name": "python", 411 | "nbconvert_exporter": "python", 412 | "pygments_lexer": "ipython3", 413 | "version": "3.9.10" 414 | }, 415 | "orig_nbformat": 4, 416 | "vscode": { 417 | "interpreter": { 418 | "hash": "0f1e841692445df6c0f476977380d4c26cc40d52508098a18c340919add514d9" 419 | } 420 | } 421 | }, 422 | "nbformat": 4, 423 | "nbformat_minor": 2 424 | } 425 | -------------------------------------------------------------------------------- /nn_zero_to_hero/micrograd/exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "JnGHatCI51JP" 7 | }, 8 | "source": [ 9 | "# micrograd exercises\n", 10 | "\n", 11 | "1. watch the [micrograd video](https://www.youtube.com/watch?v=VMj-3S1tku0) on YouTube\n", 12 | "2. come back and complete these exercises to level up :)\n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": { 18 | "id": "OFt6NKOz6iBZ" 19 | }, 20 | "source": [ 21 | "## section 1: derivatives\n" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": { 28 | "id": "3Jx9fCXl5xHd" 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "# here is a mathematical expression that takes 3 inputs and produces one output\n", 33 | "from math import sin, cos\n", 34 | "\n", 35 | "\n", 36 | "def f(a, b, c):\n", 37 | " return -(a**3) + sin(3 * b) - 1.0 / c + b**2.5 - a**0.5\n", 38 | "\n", 39 | "\n", 40 | "print(f(2, 3, 4))" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "id": "qXaH59eL9zxf" 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "# write the function df that returns the analytical gradient of f\n", 52 | "# i.e. use your skills from calculus to take the derivative, then implement the formula\n", 53 | "# if you do not calculus then feel free to ask wolframalpha, e.g.:\n", 54 | "# https://www.wolframalpha.com/input?i=d%2Fda%28sin%283*a%29%29%29\n", 55 | "\n", 56 | "\n", 57 | "def gradf(a, b, c):\n", 58 | " return [0, 0, 0] # todo, return [df/da, df/db, df/dc]\n", 59 | "\n", 60 | "\n", 61 | "# expected answer is the list of\n", 62 | "ans = [-12.353553390593273, 10.25699027111255, 0.0625]\n", 63 | "yours = gradf(2, 3, 4)\n", 64 | "for dim in range(3):\n", 65 | " ok = \"OK\" if abs(yours[dim] - ans[dim]) < 1e-5 else \"WRONG!\"\n", 66 | " print(f\"{ok} for dim {dim}: expected {ans[dim]}, yours returns {yours[dim]}\")" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "id": "_27n-KTA9Qla" 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "# now estimate the gradient numerically without any calculus, using\n", 78 | "# the approximation we used in the video.\n", 79 | "# you should not call the function df from the last cell\n", 80 | "\n", 81 | "# -----------\n", 82 | "numerical_grad = [0, 0, 0] # TODO\n", 83 | "# -----------\n", 84 | "\n", 85 | "for dim in range(3):\n", 86 | " ok = \"OK\" if abs(numerical_grad[dim] - ans[dim]) < 1e-5 else \"WRONG!\"\n", 87 | " print(\n", 88 | " f\"{ok} for dim {dim}: expected {ans[dim]}, yours returns {numerical_grad[dim]}\"\n", 89 | " )" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "id": "BUqsGb5o_h2P" 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "# there is an alternative formula that provides a much better numerical\n", 101 | "# approximation to the derivative of a function.\n", 102 | "# learn about it here: https://en.wikipedia.org/wiki/Symmetric_derivative\n", 103 | "# implement it. confirm that for the same step size h this version gives a\n", 104 | "# better approximation.\n", 105 | "\n", 106 | "# -----------\n", 107 | "numerical_grad2 = [0, 0, 0] # TODO\n", 108 | "# -----------\n", 109 | "\n", 110 | "for dim in range(3):\n", 111 | " ok = \"OK\" if abs(numerical_grad2[dim] - ans[dim]) < 1e-5 else \"WRONG!\"\n", 112 | " print(\n", 113 | " f\"{ok} for dim {dim}: expected {ans[dim]}, yours returns {numerical_grad2[dim]}\"\n", 114 | " )" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": { 120 | "id": "tklF9s_4AtlI" 121 | }, 122 | "source": [ 123 | "## section 2: support for softmax\n" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "id": "nAPe_RVrCTeO" 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "# Value class starter code, with many functions taken out\n", 135 | "from math import exp, log\n", 136 | "\n", 137 | "\n", 138 | "class Value:\n", 139 | " def __init__(self, data, _children=(), _op=\"\", label=\"\"):\n", 140 | " self.data = data\n", 141 | " self.grad = 0.0\n", 142 | " self._backward = lambda: None\n", 143 | " self._prev = set(_children)\n", 144 | " self._op = _op\n", 145 | " self.label = label\n", 146 | "\n", 147 | " def __repr__(self):\n", 148 | " return f\"Value(data={self.data})\"\n", 149 | "\n", 150 | " def __add__(self, other): # exactly as in the video\n", 151 | " other = other if isinstance(other, Value) else Value(other)\n", 152 | " out = Value(self.data + other.data, (self, other), \"+\")\n", 153 | "\n", 154 | " def _backward():\n", 155 | " self.grad += 1.0 * out.grad\n", 156 | " other.grad += 1.0 * out.grad\n", 157 | "\n", 158 | " out._backward = _backward\n", 159 | "\n", 160 | " return out\n", 161 | "\n", 162 | " # ------\n", 163 | " # re-implement all the other functions needed for the exercises below\n", 164 | " # your code here\n", 165 | " # TODO\n", 166 | " # ------\n", 167 | "\n", 168 | " def backward(self): # exactly as in video\n", 169 | " topo = []\n", 170 | " visited = set()\n", 171 | "\n", 172 | " def build_topo(v):\n", 173 | " if v not in visited:\n", 174 | " visited.add(v)\n", 175 | " for child in v._prev:\n", 176 | " build_topo(child)\n", 177 | " topo.append(v)\n", 178 | "\n", 179 | " build_topo(self)\n", 180 | "\n", 181 | " self.grad = 1.0\n", 182 | " for node in reversed(topo):\n", 183 | " node._backward()" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": { 190 | "id": "VgWvwVQNAvnI" 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "# without referencing our code/video __too__ much, make this cell work\n", 195 | "# you'll have to implement (in some cases re-implemented) a number of functions\n", 196 | "# of the Value object, similar to what we've seen in the video.\n", 197 | "# instead of the squared error loss this implements the negative log likelihood\n", 198 | "# loss, which is very often used in classification.\n", 199 | "\n", 200 | "# this is the softmax function\n", 201 | "# https://en.wikipedia.org/wiki/Softmax_function\n", 202 | "def softmax(logits):\n", 203 | " counts = [logit.exp() for logit in logits]\n", 204 | " denominator = sum(counts)\n", 205 | " out = [c / denominator for c in counts]\n", 206 | " return out\n", 207 | "\n", 208 | "\n", 209 | "# this is the negative log likelihood loss function, pervasive in classification\n", 210 | "logits = [Value(0.0), Value(3.0), Value(-2.0), Value(1.0)]\n", 211 | "probs = softmax(logits)\n", 212 | "loss = -probs[3].log() # dim 3 acts as the label for this input example\n", 213 | "loss.backward()\n", 214 | "print(loss.data)\n", 215 | "\n", 216 | "ans = [\n", 217 | " 0.041772570515350445,\n", 218 | " 0.8390245074625319,\n", 219 | " 0.005653302662216329,\n", 220 | " -0.8864503806400986,\n", 221 | "]\n", 222 | "for dim in range(4):\n", 223 | " ok = \"OK\" if abs(logits[dim].grad - ans[dim]) < 1e-5 else \"WRONG!\"\n", 224 | " print(f\"{ok} for dim {dim}: expected {ans[dim]}, yours returns {logits[dim].grad}\")" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": { 231 | "id": "q7ca1SVAGG1S" 232 | }, 233 | "outputs": [], 234 | "source": [ 235 | "# verify the gradient using the torch library\n", 236 | "# torch should give you the exact same gradient\n", 237 | "import torch" 238 | ] 239 | } 240 | ], 241 | "metadata": { 242 | "colab": { 243 | "provenance": [] 244 | }, 245 | "kernelspec": { 246 | "display_name": "Python 3", 247 | "language": "python", 248 | "name": "python3" 249 | }, 250 | "language_info": { 251 | "name": "python", 252 | "version": "3.10.0" 253 | }, 254 | "vscode": { 255 | "interpreter": { 256 | "hash": "50587d438b9934cf2712ee500622f7def3550698a6c70c07f7d3c00dd27cb653" 257 | } 258 | } 259 | }, 260 | "nbformat": 4, 261 | "nbformat_minor": 0 262 | } 263 | -------------------------------------------------------------------------------- /nn_zero_to_hero/micrograd/micrograd.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from viz import draw_dot" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 13, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Value(data=-8.0)\n", 22 | "{Value(data=-2.0), Value(data=4.0)}\n", 23 | "*\n" 24 | ] 25 | } 26 | ], 27 | "source": [ 28 | "class Value:\n", 29 | " def __init__(self, data, _children=(), _op=\"\", label=\"\"):\n", 30 | " self.data = data\n", 31 | " self.grad = 0.0\n", 32 | " self._prev = set(_children)\n", 33 | " self._op = _op\n", 34 | " self.label = label\n", 35 | "\n", 36 | " def __repr__(self):\n", 37 | " return f\"Value(data={self.data})\"\n", 38 | "\n", 39 | " def __add__(self, other):\n", 40 | " return Value(self.data + other.data, (self, other), \"+\")\n", 41 | "\n", 42 | " def __mul__(self, other):\n", 43 | " return Value(self.data * other.data, (self, other), \"*\")\n", 44 | "\n", 45 | "\n", 46 | "a = Value(2.0, label=\"a\")\n", 47 | "b = Value(-3.0, label=\"b\")\n", 48 | "c = Value(10.0, label=\"c\")\n", 49 | "e = a * b\n", 50 | "e.label = \"e\"\n", 51 | "d = e + c\n", 52 | "d.label = \"d\"\n", 53 | "f = Value(-2.0, label=\"f\")\n", 54 | "L = d * f\n", 55 | "L.label = \"L\"\n", 56 | "\n", 57 | "print(L)\n", 58 | "print(L._prev) # the children of the value\n", 59 | "print(L._op) # the operation that created the value" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 15, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "image/svg+xml": [ 70 | "\n", 71 | "\n", 73 | "\n", 75 | "\n", 76 | "\n", 78 | "\n", 79 | "\n", 80 | "\n", 81 | "\n", 82 | "4465548304\n", 83 | "\n", 84 | "f\n", 85 | "\n", 86 | "data -2.0000\n", 87 | "\n", 88 | "grad 0.0000\n", 89 | "\n", 90 | "\n", 91 | "\n", 92 | "4464374016*\n", 93 | "\n", 94 | "*\n", 95 | "\n", 96 | "\n", 97 | "\n", 98 | "4465548304->4464374016*\n", 99 | "\n", 100 | "\n", 101 | "\n", 102 | "\n", 103 | "\n", 104 | "4465548352\n", 105 | "\n", 106 | "d\n", 107 | "\n", 108 | "data 4.0000\n", 109 | "\n", 110 | "grad 0.0000\n", 111 | "\n", 112 | "\n", 113 | "\n", 114 | "4465548352->4464374016*\n", 115 | "\n", 116 | "\n", 117 | "\n", 118 | "\n", 119 | "\n", 120 | "4465548352+\n", 121 | "\n", 122 | "+\n", 123 | "\n", 124 | "\n", 125 | "\n", 126 | "4465548352+->4465548352\n", 127 | "\n", 128 | "\n", 129 | "\n", 130 | "\n", 131 | "\n", 132 | "4464373920\n", 133 | "\n", 134 | "a\n", 135 | "\n", 136 | "data 2.0000\n", 137 | "\n", 138 | "grad 0.0000\n", 139 | "\n", 140 | "\n", 141 | "\n", 142 | "4464377760*\n", 143 | "\n", 144 | "*\n", 145 | "\n", 146 | "\n", 147 | "\n", 148 | "4464373920->4464377760*\n", 149 | "\n", 150 | "\n", 151 | "\n", 152 | "\n", 153 | "\n", 154 | "4464374016\n", 155 | "\n", 156 | "L\n", 157 | "\n", 158 | "data -8.0000\n", 159 | "\n", 160 | "grad 0.0000\n", 161 | "\n", 162 | "\n", 163 | "\n", 164 | "4464374016*->4464374016\n", 165 | "\n", 166 | "\n", 167 | "\n", 168 | "\n", 169 | "\n", 170 | "4464376128\n", 171 | "\n", 172 | "c\n", 173 | "\n", 174 | "data 10.0000\n", 175 | "\n", 176 | "grad 0.0000\n", 177 | "\n", 178 | "\n", 179 | "\n", 180 | "4464376128->4465548352+\n", 181 | "\n", 182 | "\n", 183 | "\n", 184 | "\n", 185 | "\n", 186 | "4464377760\n", 187 | "\n", 188 | "e\n", 189 | "\n", 190 | "data -6.0000\n", 191 | "\n", 192 | "grad 0.0000\n", 193 | "\n", 194 | "\n", 195 | "\n", 196 | "4464377760->4465548352+\n", 197 | "\n", 198 | "\n", 199 | "\n", 200 | "\n", 201 | "\n", 202 | "4464377760*->4464377760\n", 203 | "\n", 204 | "\n", 205 | "\n", 206 | "\n", 207 | "\n", 208 | "4464374256\n", 209 | "\n", 210 | "b\n", 211 | "\n", 212 | "data -3.0000\n", 213 | "\n", 214 | "grad 0.0000\n", 215 | "\n", 216 | "\n", 217 | "\n", 218 | "4464374256->4464377760*\n", 219 | "\n", 220 | "\n", 221 | "\n", 222 | "\n", 223 | "\n" 224 | ], 225 | "text/plain": [ 226 | "" 227 | ] 228 | }, 229 | "execution_count": 15, 230 | "metadata": {}, 231 | "output_type": "execute_result" 232 | } 233 | ], 234 | "source": [ 235 | "draw_dot(L)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [] 244 | } 245 | ], 246 | "metadata": { 247 | "kernelspec": { 248 | "display_name": "base", 249 | "language": "python", 250 | "name": "python3" 251 | }, 252 | "language_info": { 253 | "codemirror_mode": { 254 | "name": "ipython", 255 | "version": 3 256 | }, 257 | "file_extension": ".py", 258 | "mimetype": "text/x-python", 259 | "name": "python", 260 | "nbconvert_exporter": "python", 261 | "pygments_lexer": "ipython3", 262 | "version": "3.9.10" 263 | }, 264 | "orig_nbformat": 4, 265 | "vscode": { 266 | "interpreter": { 267 | "hash": "0f1e841692445df6c0f476977380d4c26cc40d52508098a18c340919add514d9" 268 | } 269 | } 270 | }, 271 | "nbformat": 4, 272 | "nbformat_minor": 2 273 | } 274 | -------------------------------------------------------------------------------- /nn_zero_to_hero/micrograd/micrograd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benthecoder/AI/b77a9c354386bdb681a80f5dd913b4b7ee4f640c/nn_zero_to_hero/micrograd/micrograd.py -------------------------------------------------------------------------------- /nn_zero_to_hero/micrograd/viz.py: -------------------------------------------------------------------------------- 1 | from graphviz import Digraph 2 | 3 | 4 | def trace(root): 5 | # builds a set of all nodes and edges in a graph 6 | nodes, edges = set(), set() 7 | 8 | def build(v): 9 | if v not in nodes: 10 | nodes.add(v) 11 | for child in v._prev: 12 | edges.add((child, v)) 13 | build(child) 14 | 15 | build(root) 16 | return nodes, edges 17 | 18 | 19 | def draw_dot(root): 20 | dot = Digraph(format="svg", graph_attr={"rankdir": "LR"}) # LR = left to right 21 | 22 | nodes, edges = trace(root) 23 | for n in nodes: 24 | uid = str(id(n)) 25 | # for any value in the graph, create a rectangular ('record') node for it 26 | dot.node( 27 | name=uid, 28 | label="{ %s | data %.4f | grad %.4f }" % (n.label, n.data, n.grad), 29 | shape="record", 30 | ) 31 | if n._op: 32 | # if this value is a result of some operation, create an op node for it 33 | dot.node(name=uid + n._op, label=n._op) 34 | # and connect this node to it 35 | dot.edge(uid + n._op, uid) 36 | 37 | for n1, n2 in edges: 38 | # connect n1 to the op node of n2 39 | dot.edge(str(id(n1)), str(id(n2)) + n2._op) 40 | 41 | return dot 42 | -------------------------------------------------------------------------------- /roadmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benthecoder/AI/b77a9c354386bdb681a80f5dd913b4b7ee4f640c/roadmap.png -------------------------------------------------------------------------------- /tensor_puzzles/README.md: -------------------------------------------------------------------------------- 1 | # Tensor Puzzles 2 | 3 | [srush/Tensor-Puzzles: Solve puzzles. Improve your pytorch.](https://github.com/srush/Tensor-Puzzles) 4 | --------------------------------------------------------------------------------