├── 902 Deep Reinforcement Learning
    ├── .cache
    │   └── v
    │   │   └── cache
    │   │       └── lastfailed
    ├── images
    │   ├── google-breakout.jpg
    │   ├── mdp-illustrated.png
    │   ├── deep-q-learning-001.png
    │   ├── bellman_update_example.png
    │   ├── drl-agent-environment.jpg
    │   ├── real-life-of-developer.png
    │   ├── atari-breakout-deepmind.png
    │   ├── policy_evaluation_example.png
    │   ├── Q-Learning-Example_clip_image004.gif
    │   ├── Q-Learning-Example_clip_image006.gif
    │   ├── Q-Learning-Example_clip_image014.gif
    │   ├── Q-Learning-Example_clip_image024.gif
    │   ├── Q-Learning-Example_clip_image030.gif
    │   ├── deep-q-learning-used-by-deepmind.png
    │   └── deep-q-learning-algorithm-pseudo-code.png
    ├── evaluate.py
    ├── train.py
    ├── environment.py
    ├── test.py
    ├── replay.py
    └── 02 Deep Reinforcement Learning Part 2.ipynb
├── data
    ├── exem
    │   ├── .RData
    │   └── .Rhistory
    ├── images
    │   ├── rainbowsix.jpg
    │   └── rainbosix_cropped.jpg
    ├── linear-regression
    │   ├── challenger.csv
    │   ├── ex1data1.txt
    │   └── linear-regression.csv
    ├── time-series
    │   └── international-airline-passengers.csv
    ├── basic_csv_data
    │   ├── gradient.csv
    │   └── iris.csv
    └── titanic
    │   └── gendermodel.csv
├── 004 Anova
    └── images
    │   ├── anova-01.png
    │   └── anova-02.png
├── 902 Q-Learning
    ├── mountain-car.gif
    └── mountain-car.mp4
├── 002 Distributions
    └── images
    │   └── integral.png
├── 501 Autoencoder
    ├── images
    │   ├── autoencoder01.png
    │   └── autoencoder_layer.png
    ├── 21 Variational Autoencoder (VAE).ipynb
    └── 01 Introduction Auto-Encoder.ipynb
├── 003 Correlation
    ├── images
    │   ├── stationary_data.png
    │   ├── non-stationary01.png
    │   ├── non-stationary02.png
    │   ├── non-stationary03.png
    │   └── non-stationary04.png
    └── 02. Correlation Between Categorical Data.ipynb
├── 203 Decision Tree
    ├── images
    │   ├── entropy_ball.png
    │   ├── entropy_ball2.png
    │   ├── entropy-box-tree.png
    │   ├── entropy-vs-gini.png
    │   ├── entropy-box-balls.png
    │   ├── entropy-box-balls.html
    │   └── entropy-box-tree.html
    ├── tennis.csv
    ├── tree.dot
    └── 06 Titanic Decision Tree with Python (in Development).ipynb
├── 006 AB Test
    └── images
    │   ├── ab_confidence_interval.png
    │   └── ab_unit_normal_table.png
├── 030 Backpropagation
    └── images
    │   ├── neural_network.png
    │   └── single_layer.png
├── 120 Ensemble
    ├── img
    │   └── underfit_right_overfit.png
    └── 01. Ensemble.ipynb
├── 020 Cohort Analysis
    └── data
    │   └── online-retail.parquet
├── 102 Autoregression
    └── images
    │   ├── Cov_nonstationary.png
    │   ├── Var_nonstationary.png
    │   └── Mean_nonstationary.png
├── 502 Convolutional Neural Network
    ├── images
    │   ├── car.jpg
    │   ├── camera.jpg
    │   ├── maxpool.jpeg
    │   ├── convolution_example.png
    │   ├── simple_convolution_operation.png
    │   └── numerical_no_padding_no_strides.gif
    └── 02 Transposed Convolution .ipynb
├── 206 Root Cause Analysis
    └── titanic_decision_tree.png
├── 600 Transformer Machine Translation
    ├── rnn_torchviz.png
    ├── sp-bpt-anderson.model
    ├── rnn_torchviz
    ├── spe.py
    ├── preprocessing.py
    ├── data.py
    ├── train.py
    └── beam_search.py
├── 017 Hyperparameter Optimization
    ├── images
    │   └── grid-search.jpg
    └── 02 Grid Search (Hacker Rank).ipynb
├── 010 Intro to Linear Algebra
    ├── images
    │   ├── linear-equation-01.png
    │   ├── linear-equation-02.png
    │   ├── linear-equation-03.png
    │   ├── linear-equation-04.png
    │   ├── linear-equation-05.png
    │   ├── linear-equation-06.png
    │   └── linear-equation-07.png
    ├── 01 Linear Equation, Solution, System of Linear Equations.ipynb
    └── 02 Vector & Linear Combination.ipynb
├── 103 Logistic Regression
    ├── images
    │   └── logistic_regression_01.png
    └── 03 Predicting Breast Cancer with Scipy.ipynb
├── 500 Perceptron (mini-batch gradient descent)
    ├── iris.weights
    ├── images
    │   └── perceptron_model.png
    └── Iris data classfication with Scikit-Learn.ipynb
├── 005 Sampling & Hypothesis Test
    └── images
    │   └── hypothesis-test-error.png
├── 201 Mathematical Optimization
    └── images
    │   ├── Nelder-Mead_Himmelblau.gif
    │   └── Nelder-Mead_Rosenbrock.gif
├── 601 Dense Passage Retrieval
    ├── README.md
    ├── preprocess.py
    ├── train.py
    ├── data.py
    ├── model.py
    └── test.ipynb
├── 001 Performance Test (ROC, AUC, Confusion Matrix)
    ├── images
    │   ├── auc-example.png
    │   ├── roc-example1.png
    │   ├── confusion_matrix_simple2.png
    │   ├── ½Å»ï±¹Áö(2010³â)+95ºÎÀÛ+ÀüÆí+ÀÚ¸·Æ÷ÇÔ.torrent
    │   ├── ½Å»ï±¹Áö(2010³â)+95ºÎÀÛ+ÀüÆí+ÀÚ¸·Æ÷ÇÔ (1).torrent
    │   └── %5B한국어더빙x%5D%5B신삼국지%5D_%5B三国%5D.Three.Kingdoms.%5B01-95%5D.2010.BluRay.720p..torrent
    └── 01. Performance Test.ipynb
├── 903 One-Shot Learning with Memory-Augmented Neural Network
    ├── images
    │   ├── memory-block.png
    │   ├── addressing-mechanism.png
    │   └── neural-turing-machines.png
    ├── 03 One-shot learning with Memory-Augmented Neural Network.ipynb
    └── 01 Turing Machine.ipynb
├── .gitignore
├── 037 Global Average Pooling
    └── Untitled.ipynb
├── 204 Support Vector Machines
    └── Support Vector Machine Part I.ipynb
├── README.md
├── 033 Optimizers
    └── data
    │   └── ex1data1.txt
├── 100 Linear Regression
    └── ex1data1.csv
├── Interview Questions.ipynb
├── 014 Latent Dirichlet Allocation (LDA)
    └── 00. Dirichlet Distribution.ipynb
├── 036 Batch Normalization
    └── 01 Batch Normalization.ipynb
├── 505 Predicting Sin Cos
    └── 06 [Keras] different variance sin.ipynb
├── 101 Multiple Linear Regression
    └── Multiple Linear Regression.ipynb
├── 032 Activations
    ├── 01 Sigmoid.ipynb
    └── 01 Activations.ipynb
├── 111 Linear Discriminant Analysis (LDA)
    └── LDA Feature Extraction.ipynb
├── 008 Mathmatics for ML
    └── 01 Derivatives Rules.ipynb
├── 503 International Airline Passengers prediction
    └── rnn_with_numpy.py
├── 034 Classifier
    └── 01 Softmax.ipynb
└── Keras Tutorial
    ├── 001 Getting Started
        └── 02 Get Weights and Biases as Numpy Array.ipynb
    ├── 105 [Layer] Embedding Layers
        └── 01 Embedding Layers.ipynb
    ├── 200 [Preprocessing] Padding
        └── 01 Padding for RNN.ipynb
    └── 106 [Layer] Merge Layers
        └── 01 Concatenate.ipynb


/902 Deep Reinforcement Learning/.cache/v/cache/lastfailed:
--------------------------------------------------------------------------------
1 | {}


--------------------------------------------------------------------------------
/data/exem/.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/data/exem/.RData


--------------------------------------------------------------------------------
/data/images/rainbowsix.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/data/images/rainbowsix.jpg


--------------------------------------------------------------------------------
/004 Anova/images/anova-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/004 Anova/images/anova-01.png


--------------------------------------------------------------------------------
/004 Anova/images/anova-02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/004 Anova/images/anova-02.png


--------------------------------------------------------------------------------
/902 Q-Learning/mountain-car.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Q-Learning/mountain-car.gif


--------------------------------------------------------------------------------
/902 Q-Learning/mountain-car.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Q-Learning/mountain-car.mp4


--------------------------------------------------------------------------------
/data/images/rainbosix_cropped.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/data/images/rainbosix_cropped.jpg


--------------------------------------------------------------------------------
/002 Distributions/images/integral.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/002 Distributions/images/integral.png


--------------------------------------------------------------------------------
/501 Autoencoder/images/autoencoder01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/501 Autoencoder/images/autoencoder01.png


--------------------------------------------------------------------------------
/003 Correlation/images/stationary_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/003 Correlation/images/stationary_data.png


--------------------------------------------------------------------------------
/203 Decision Tree/images/entropy_ball.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/203 Decision Tree/images/entropy_ball.png


--------------------------------------------------------------------------------
/203 Decision Tree/images/entropy_ball2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/203 Decision Tree/images/entropy_ball2.png


--------------------------------------------------------------------------------
/003 Correlation/images/non-stationary01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/003 Correlation/images/non-stationary01.png


--------------------------------------------------------------------------------
/003 Correlation/images/non-stationary02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/003 Correlation/images/non-stationary02.png


--------------------------------------------------------------------------------
/003 Correlation/images/non-stationary03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/003 Correlation/images/non-stationary03.png


--------------------------------------------------------------------------------
/003 Correlation/images/non-stationary04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/003 Correlation/images/non-stationary04.png


--------------------------------------------------------------------------------
/006 AB Test/images/ab_confidence_interval.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/006 AB Test/images/ab_confidence_interval.png


--------------------------------------------------------------------------------
/006 AB Test/images/ab_unit_normal_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/006 AB Test/images/ab_unit_normal_table.png


--------------------------------------------------------------------------------
/030 Backpropagation/images/neural_network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/030 Backpropagation/images/neural_network.png


--------------------------------------------------------------------------------
/030 Backpropagation/images/single_layer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/030 Backpropagation/images/single_layer.png


--------------------------------------------------------------------------------
/120 Ensemble/img/underfit_right_overfit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/120 Ensemble/img/underfit_right_overfit.png


--------------------------------------------------------------------------------
/203 Decision Tree/images/entropy-box-tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/203 Decision Tree/images/entropy-box-tree.png


--------------------------------------------------------------------------------
/203 Decision Tree/images/entropy-vs-gini.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/203 Decision Tree/images/entropy-vs-gini.png


--------------------------------------------------------------------------------
/501 Autoencoder/images/autoencoder_layer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/501 Autoencoder/images/autoencoder_layer.png


--------------------------------------------------------------------------------
/020 Cohort Analysis/data/online-retail.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/020 Cohort Analysis/data/online-retail.parquet


--------------------------------------------------------------------------------
/102 Autoregression/images/Cov_nonstationary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/102 Autoregression/images/Cov_nonstationary.png


--------------------------------------------------------------------------------
/102 Autoregression/images/Var_nonstationary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/102 Autoregression/images/Var_nonstationary.png


--------------------------------------------------------------------------------
/203 Decision Tree/images/entropy-box-balls.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/203 Decision Tree/images/entropy-box-balls.png


--------------------------------------------------------------------------------
/502 Convolutional Neural Network/images/car.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/502 Convolutional Neural Network/images/car.jpg


--------------------------------------------------------------------------------
/102 Autoregression/images/Mean_nonstationary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/102 Autoregression/images/Mean_nonstationary.png


--------------------------------------------------------------------------------
/206 Root Cause Analysis/titanic_decision_tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/206 Root Cause Analysis/titanic_decision_tree.png


--------------------------------------------------------------------------------
/502 Convolutional Neural Network/images/camera.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/502 Convolutional Neural Network/images/camera.jpg


--------------------------------------------------------------------------------
/502 Convolutional Neural Network/images/maxpool.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/502 Convolutional Neural Network/images/maxpool.jpeg


--------------------------------------------------------------------------------
/600 Transformer Machine Translation/rnn_torchviz.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/600 Transformer Machine Translation/rnn_torchviz.png


--------------------------------------------------------------------------------
/017 Hyperparameter Optimization/images/grid-search.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/017 Hyperparameter Optimization/images/grid-search.jpg


--------------------------------------------------------------------------------
/010 Intro to Linear Algebra/images/linear-equation-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/010 Intro to Linear Algebra/images/linear-equation-01.png


--------------------------------------------------------------------------------
/010 Intro to Linear Algebra/images/linear-equation-02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/010 Intro to Linear Algebra/images/linear-equation-02.png


--------------------------------------------------------------------------------
/010 Intro to Linear Algebra/images/linear-equation-03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/010 Intro to Linear Algebra/images/linear-equation-03.png


--------------------------------------------------------------------------------
/010 Intro to Linear Algebra/images/linear-equation-04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/010 Intro to Linear Algebra/images/linear-equation-04.png


--------------------------------------------------------------------------------
/010 Intro to Linear Algebra/images/linear-equation-05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/010 Intro to Linear Algebra/images/linear-equation-05.png


--------------------------------------------------------------------------------
/010 Intro to Linear Algebra/images/linear-equation-06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/010 Intro to Linear Algebra/images/linear-equation-06.png


--------------------------------------------------------------------------------
/010 Intro to Linear Algebra/images/linear-equation-07.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/010 Intro to Linear Algebra/images/linear-equation-07.png


--------------------------------------------------------------------------------
/103 Logistic Regression/images/logistic_regression_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/103 Logistic Regression/images/logistic_regression_01.png


--------------------------------------------------------------------------------
/500 Perceptron (mini-batch gradient descent)/iris.weights:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/500 Perceptron (mini-batch gradient descent)/iris.weights


--------------------------------------------------------------------------------
/600 Transformer Machine Translation/sp-bpt-anderson.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/600 Transformer Machine Translation/sp-bpt-anderson.model


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/google-breakout.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/google-breakout.jpg


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/mdp-illustrated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/mdp-illustrated.png


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/deep-q-learning-001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/deep-q-learning-001.png


--------------------------------------------------------------------------------
/005 Sampling & Hypothesis Test/images/hypothesis-test-error.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/005 Sampling & Hypothesis Test/images/hypothesis-test-error.png


--------------------------------------------------------------------------------
/201 Mathematical Optimization/images/Nelder-Mead_Himmelblau.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/201 Mathematical Optimization/images/Nelder-Mead_Himmelblau.gif


--------------------------------------------------------------------------------
/201 Mathematical Optimization/images/Nelder-Mead_Rosenbrock.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/201 Mathematical Optimization/images/Nelder-Mead_Rosenbrock.gif


--------------------------------------------------------------------------------
/502 Convolutional Neural Network/images/convolution_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/502 Convolutional Neural Network/images/convolution_example.png


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/bellman_update_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/bellman_update_example.png


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/drl-agent-environment.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/drl-agent-environment.jpg


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/real-life-of-developer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/real-life-of-developer.png


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/atari-breakout-deepmind.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/atari-breakout-deepmind.png


--------------------------------------------------------------------------------
/601 Dense Passage Retrieval/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Dense Passage Retrieval 
 4 | 
 5 | ## Introduction
 6 | 
 7 | 해당 코드는 https://github.com/TmaxEdu/KorDPR 코드를 참고해서 연습용으로 만든 것입니다.
 8 | 
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/policy_evaluation_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/policy_evaluation_example.png


--------------------------------------------------------------------------------
/001 Performance Test (ROC, AUC, Confusion Matrix)/images/auc-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/001 Performance Test (ROC, AUC, Confusion Matrix)/images/auc-example.png


--------------------------------------------------------------------------------
/500 Perceptron (mini-batch gradient descent)/images/perceptron_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/500 Perceptron (mini-batch gradient descent)/images/perceptron_model.png


--------------------------------------------------------------------------------
/502 Convolutional Neural Network/images/simple_convolution_operation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/502 Convolutional Neural Network/images/simple_convolution_operation.png


--------------------------------------------------------------------------------
/001 Performance Test (ROC, AUC, Confusion Matrix)/images/roc-example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/001 Performance Test (ROC, AUC, Confusion Matrix)/images/roc-example1.png


--------------------------------------------------------------------------------
/502 Convolutional Neural Network/images/numerical_no_padding_no_strides.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/502 Convolutional Neural Network/images/numerical_no_padding_no_strides.gif


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/Q-Learning-Example_clip_image004.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/Q-Learning-Example_clip_image004.gif


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/Q-Learning-Example_clip_image006.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/Q-Learning-Example_clip_image006.gif


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/Q-Learning-Example_clip_image014.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/Q-Learning-Example_clip_image014.gif


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/Q-Learning-Example_clip_image024.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/Q-Learning-Example_clip_image024.gif


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/Q-Learning-Example_clip_image030.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/Q-Learning-Example_clip_image030.gif


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/deep-q-learning-used-by-deepmind.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/deep-q-learning-used-by-deepmind.png


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/images/deep-q-learning-algorithm-pseudo-code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/902 Deep Reinforcement Learning/images/deep-q-learning-algorithm-pseudo-code.png


--------------------------------------------------------------------------------
/903 One-Shot Learning with Memory-Augmented Neural Network/images/memory-block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/903 One-Shot Learning with Memory-Augmented Neural Network/images/memory-block.png


--------------------------------------------------------------------------------
/001 Performance Test (ROC, AUC, Confusion Matrix)/images/confusion_matrix_simple2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/001 Performance Test (ROC, AUC, Confusion Matrix)/images/confusion_matrix_simple2.png


--------------------------------------------------------------------------------
/903 One-Shot Learning with Memory-Augmented Neural Network/images/addressing-mechanism.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/903 One-Shot Learning with Memory-Augmented Neural Network/images/addressing-mechanism.png


--------------------------------------------------------------------------------
/903 One-Shot Learning with Memory-Augmented Neural Network/images/neural-turing-machines.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/903 One-Shot Learning with Memory-Augmented Neural Network/images/neural-turing-machines.png


--------------------------------------------------------------------------------
/001 Performance Test (ROC, AUC, Confusion Matrix)/images/½Å»ï±¹Áö(2010³â)+95ºÎÀÛ+ÀüÆí+ÀÚ¸·Æ÷ÇÔ.torrent:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/001 Performance Test (ROC, AUC, Confusion Matrix)/images/½Å»ï±¹Áö(2010³â)+95ºÎÀÛ+ÀüÆí+ÀÚ¸·Æ÷ÇÔ.torrent


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/evaluate.py:
--------------------------------------------------------------------------------
1 | import agent
2 | import environment
3 | import replay
4 | 
5 | env = environment.Environment('Breakout-v0')
6 | replay = replay.ExperienceReplay(env)
7 | agent = agent.Agent(env, replay)
8 | agent.restore()
9 | agent.evaluate()


--------------------------------------------------------------------------------
/001 Performance Test (ROC, AUC, Confusion Matrix)/images/½Å»ï±¹Áö(2010³â)+95ºÎÀÛ+ÀüÆí+ÀÚ¸·Æ÷ÇÔ (1).torrent:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/001 Performance Test (ROC, AUC, Confusion Matrix)/images/½Å»ï±¹Áö(2010³â)+95ºÎÀÛ+ÀüÆí+ÀÚ¸·Æ÷ÇÔ (1).torrent


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/train.py:
--------------------------------------------------------------------------------
 1 | import agent
 2 | import environment
 3 | import replay
 4 | 
 5 | env = environment.Environment('Breakout-v0')
 6 | replay = replay.ExperienceReplay(env)
 7 | agent = agent.Agent(env, replay)
 8 | # agent.restore()
 9 | agent.train()
10 | 


--------------------------------------------------------------------------------
/001 Performance Test (ROC, AUC, Confusion Matrix)/images/%5B한국어더빙x%5D%5B신삼국지%5D_%5B三国%5D.Three.Kingdoms.%5B01-95%5D.2010.BluRay.720p..torrent:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndersonJo/machine-learning/HEAD/001 Performance Test (ROC, AUC, Confusion Matrix)/images/%5B한국어더빙x%5D%5B신삼국지%5D_%5B三国%5D.Three.Kingdoms.%5B01-95%5D.2010.BluRay.720p..torrent


--------------------------------------------------------------------------------
/data/linear-regression/challenger.csv:
--------------------------------------------------------------------------------
1 | distress_ct,temperature,field_check_pressure,flight_num0,66,50,11,70,50,20,69,50,30,68,50,40,67,50,50,72,50,60,73,100,70,70,100,81,57,200,91,63,200,101,70,200,110,78,200,120,67,200,132,53,200,140,67,200,150,75,200,160,70,200,170,81,200,180,76,200,190,79,200,202,75,200,210,76,200,221,58,200,23


--------------------------------------------------------------------------------
/600 Transformer Machine Translation/rnn_torchviz:
--------------------------------------------------------------------------------
 1 | digraph {
 2 | 	graph [size="12,12"]
 3 | 	node [align=left fontname=monospace fontsize=10 height=0.2 ranksep=0.1 shape=box style=filled]
 4 | 	140713931411184 [label="
 5 |  (1, 41, 8000)" fillcolor=darkolivegreen1]
 6 | 	140713797549008 [label="
 7 |  (41, 8000)" fillcolor=darkolivegreen3]
 8 | 	140713797549008 -> 140713931411184 [style=dotted]
 9 | }
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # IDE
 2 | .idea
 3 | _site
 4 | 
 5 | # Python
 6 | *.pyc
 7 | .ipynb_checkpoints
 8 | 
 9 | 
10 | # Data
11 | _data
12 | *.gz
13 | *.zip
14 | *.alz
15 | *.data
16 | *.json
17 | *.npz
18 | cifar-10-batches-py
19 | *.h5
20 | joblib
21 | *.ckpt
22 | 
23 | # Specific Data
24 | imdb_full.pkl
25 | 
26 | # TensorFlow Model
27 | *.tfmodel
28 | _network
29 | _network_backup
30 | _tfmodel
31 | lightning_logs
32 | tb_logs
33 | checkpoints
34 | 
35 | # R
36 | .Rhistory
37 | 


--------------------------------------------------------------------------------
/203 Decision Tree/tennis.csv:
--------------------------------------------------------------------------------
 1 | outlook,temp,humidity,windy,play
 2 | sunny,hot,high,false,no
 3 | sunny,hot,high,true,no
 4 | overcast,hot,high,false,yes
 5 | rainy,mild,high,false,yes
 6 | rainy,cool,normal,false,yes
 7 | rainy,cool,normal,true,no
 8 | overcast,cool,normal,true,yes
 9 | sunny,mild,high,false,no
10 | sunny,cool,normal,false,yes
11 | rainy,mild,normal,false,yes
12 | sunny,mild,normal,true,yes
13 | overcast,mild,high,true,yes
14 | overcast,hot,normal,false,yes
15 | rainy,mild,high,true,no
16 | 


--------------------------------------------------------------------------------
/600 Transformer Machine Translation/spe.py:
--------------------------------------------------------------------------------
 1 | import sentencepiece as spm
 2 | 
 3 | corpus = './_data/korean.txt,./_data/english.txt'
 4 | vocab_size = 8000
 5 | model_prefix = "sp-bpt-anderson"
 6 | 
 7 | spm.SentencePieceTrainer.train(
 8 |     input=corpus,
 9 |     model_prefix=model_prefix,
10 |     vocab_size=vocab_size,
11 |     model_type="bpe",
12 |     max_sentence_length=500,
13 |     character_coverage=1.0,
14 |     pad_id=0,
15 |     unk_id=1,
16 |     bos_id=2,
17 |     eos_id=3,
18 |     pad_piece="<pad>",
19 |     unk_piece="<unk>",
20 |     bos_piece="<s>",
21 |     eos_piece="</s>",
22 |     user_defined_symbols="<sep>,<cls>,<mask>",
23 |     byte_fallback=False,
24 |     num_threads=16
25 | )
26 | 


--------------------------------------------------------------------------------
/037 Global Average Pooling/Untitled.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": []
 9 |   }
10 |  ],
11 |  "metadata": {
12 |   "kernelspec": {
13 |    "display_name": "Python 3",
14 |    "language": "python",
15 |    "name": "python3"
16 |   },
17 |   "language_info": {
18 |    "codemirror_mode": {
19 |     "name": "ipython",
20 |     "version": 3
21 |    },
22 |    "file_extension": ".py",
23 |    "mimetype": "text/x-python",
24 |    "name": "python",
25 |    "nbconvert_exporter": "python",
26 |    "pygments_lexer": "ipython3",
27 |    "version": "3.6.4"
28 |   }
29 |  },
30 |  "nbformat": 4,
31 |  "nbformat_minor": 2
32 | }
33 | 


--------------------------------------------------------------------------------
/204 Support Vector Machines/Support Vector Machine Part I.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Support Vector Machine Part I\n",
 8 |     "\n",
 9 |     "* [SVM Tutorial](http://www.svm-tutorial.com/)"
10 |    ]
11 |   }
12 |  ],
13 |  "metadata": {
14 |   "kernelspec": {
15 |    "display_name": "Python 2",
16 |    "language": "python",
17 |    "name": "python2"
18 |   },
19 |   "language_info": {
20 |    "codemirror_mode": {
21 |     "name": "ipython",
22 |     "version": 2
23 |    },
24 |    "file_extension": ".py",
25 |    "mimetype": "text/x-python",
26 |    "name": "python",
27 |    "nbconvert_exporter": "python",
28 |    "pygments_lexer": "ipython2",
29 |    "version": "2.7.12"
30 |   }
31 |  },
32 |  "nbformat": 4,
33 |  "nbformat_minor": 1
34 | }
35 | 


--------------------------------------------------------------------------------
/203 Decision Tree/tree.dot:
--------------------------------------------------------------------------------
 1 | digraph Tree {
 2 | node [shape=box] ;
 3 | 0 [label="X[1] <= 0.5\nentropy = 0.94\nsamples = 14\nvalue = [5, 9]"] ;
 4 | 1 [label="X[8] <= 0.5\nentropy = 1.0\nsamples = 10\nvalue = [5, 5]"] ;
 5 | 0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
 6 | 2 [label="X[3] <= 0.5\nentropy = 0.722\nsamples = 5\nvalue = [4, 1]"] ;
 7 | 1 -> 2 ;
 8 | 3 [label="X[0] <= 0.5\nentropy = 1.0\nsamples = 2\nvalue = [1, 1]"] ;
 9 | 2 -> 3 ;
10 | 4 [label="entropy = 0.0\nsamples = 1\nvalue = [0, 1]"] ;
11 | 3 -> 4 ;
12 | 5 [label="entropy = 0.0\nsamples = 1\nvalue = [1, 0]"] ;
13 | 3 -> 5 ;
14 | 6 [label="entropy = 0.0\nsamples = 3\nvalue = [3, 0]"] ;
15 | 2 -> 6 ;
16 | 7 [label="X[0] <= 0.5\nentropy = 0.722\nsamples = 5\nvalue = [1, 4]"] ;
17 | 1 -> 7 ;
18 | 8 [label="entropy = 0.0\nsamples = 3\nvalue = [0, 3]"] ;
19 | 7 -> 8 ;
20 | 9 [label="X[4] <= 0.5\nentropy = 1.0\nsamples = 2\nvalue = [1, 1]"] ;
21 | 7 -> 9 ;
22 | 10 [label="entropy = 0.0\nsamples = 1\nvalue = [0, 1]"] ;
23 | 9 -> 10 ;
24 | 11 [label="entropy = 0.0\nsamples = 1\nvalue = [1, 0]"] ;
25 | 9 -> 11 ;
26 | 12 [label="entropy = 0.0\nsamples = 4\nvalue = [0, 4]"] ;
27 | 0 -> 12 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
28 | }


--------------------------------------------------------------------------------
/501 Autoencoder/21 Variational Autoencoder (VAE).ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {
 6 |     "deletable": true,
 7 |     "editable": true
 8 |    },
 9 |    "source": [
10 |     "# Variational Autoencoder (VAE)\n",
11 |     "\n",
12 |     "* [Tutorial on Variational Autoencoders](https://arxiv.org/pdf/1606.05908v2.pdf)\n",
13 |     "* [Variational Autoencoder based Anomaly Detection using Reconstruction Probability](http://dm.snu.ac.kr/static/docs/TR/SNUDM-TR-2015-03.pdf)\n",
14 |     "* [Variational Autoencoders Explained](http://kvfrans.com/variational-autoencoders-explained/)\n"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "markdown",
19 |    "metadata": {
20 |     "collapsed": true,
21 |     "deletable": true,
22 |     "editable": true
23 |    },
24 |    "source": []
25 |   }
26 |  ],
27 |  "metadata": {
28 |   "kernelspec": {
29 |    "display_name": "Python 3",
30 |    "language": "python",
31 |    "name": "python3"
32 |   },
33 |   "language_info": {
34 |    "codemirror_mode": {
35 |     "name": "ipython",
36 |     "version": 3
37 |    },
38 |    "file_extension": ".py",
39 |    "mimetype": "text/x-python",
40 |    "name": "python",
41 |    "nbconvert_exporter": "python",
42 |    "pygments_lexer": "ipython3",
43 |    "version": "3.6.0"
44 |   }
45 |  },
46 |  "nbformat": 4,
47 |  "nbformat_minor": 2
48 | }
49 | 


--------------------------------------------------------------------------------
/600 Transformer Machine Translation/preprocessing.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def data_preprocessing():
 6 |     data_paths = ['/home/anderson/Downloads/12.한영말뭉치/1_구어체(1)_200226.xlsx',
 7 |                   '/home/anderson/Downloads/12.한영말뭉치/1_구어체(2)_200226.xlsx']
 8 | 
 9 |     data = []
10 |     for data_path in data_paths:
11 |         df = pd.read_excel(data_path)
12 |         df.set_index('SID', inplace=True)
13 |         df.columns = ['korean', 'english']
14 |         data.append(df)
15 |     data = pd.concat(data)
16 | 
17 |     train_data, valid_data, test_data = np.split(
18 |         data.sample(frac=1, random_state=42),
19 |         [int(0.7 * len(data)), int(0.8 * len(data))])
20 | 
21 |     train_data.to_parquet('./_data/train.parquet')
22 |     valid_data.to_parquet('./_data/valid.parquet')
23 |     test_data.to_parquet('./_data/test.parquet')
24 | 
25 | 
26 | def create_sentencepiece_data():
27 |     df = pd.read_parquet('./_data/data.parquet')
28 |     with open('./_data/korean.txt', 'wt') as f:
29 |         for i, row in df.iterrows():
30 |             f.write(row.korean)
31 |             f.write('\n')
32 | 
33 |     with open('./_data/english.txt', 'wt') as f:
34 |         for i, row in df.iterrows():
35 |             f.write(row.english)
36 |             f.write('\n')
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     data_preprocessing()
41 |     # create_sentencepiece_data()
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Notebook
 2 | 기계학습에 대해서 이론부터 실제 코드구현까지 정리한 repository 입니다.
 3 | 실제 분석시 많이 사용되는 부분들을 이론및 예제까지 정리를 합니다. 
 4 | 또한 기계학습 또는 딥러닝 연구원 또는 엔지니어로 면접시 도움이 될 만한 내용들을 정리하였습니다. 
 5 | 
 6 | > 현재 많은 contents들을 지속적으로 만들고 있습니다. 
 7 | 
 8 | 
 9 | 
10 | ## Contents
11 | 
12 | 1. 000 ~ 099 : Statistics, Linear Algebra (이론)
13 | 2. 100 ~ 199 : Linear Regression & Basic Machine Learning
14 | 3. 200 ~ 299 : Useful Analysis & Machine Learning 
15 | 4. 300 ~ 399 : Machine Learning Algorithms
16 | 5. 400 ~ 499 : ..
17 | 6. 500 ~ 599 : Deep Learning Basics
18 | 7. 600 ~ 699 : Convolution Neural Networks
19 | 8. 700 ~ 799 : Recurrent Neural Network Algorithms
20 | 
21 | 
22 | 
23 | # 면접 질문과 답변 정리
24 | 
25 | 해당 repository가 면접을 위해서 만들어진 노트북은 아니지만, 면접을 위해 필요한 사항들 또한 정리를 했습니다. 
26 | 
27 | ## 통계 문제와 답변
28 | 
29 | 1. [Performance Test 그리고 ROC, AUC](blob/master/001%20Performance%20Test%20(ROC%2C%20AUC%2C%20Confusion%20Matrix)/performance%20test.ipynb) 에 대해서 설명하고 수학적 공식을 쓰세요
30 | 
31 | 
32 | 
33 | ## 기계학습 문제와 답변
34 | 
35 | 1. Eigenvalue 그리고 eigenvector 이란 무엇인가? 왜 중요한가?
36 |    - [Eigenvalue & Eigenvector 자세한 내용 참고](blob/master/170%20Eigenvalue%20and%20Eigenvector/Eigenvalue%20and%20Eigenvector.ipynb)
37 |    - 선형변화 A를 했을때, 크기는 변하지만 방향은 변하지 않는 것이 eigenvector이고, 얼마만큼 크기가 변했는지를 나타내는 것은 eigenvalue. 이때 eigenvector는 null vector (영벡터)가 아니다.
38 | 2. PCA 알고리즘의 구현방법은?
39 |    - 구현방법
40 |      1. 데이터에 대해서 Standardization
41 |      2. Covariance Matrix 계산
42 |      3. Eigenvalue and eigenvector of the covariance matrix 계산
43 |      4. Eigenvalue의 값에 따라서 eigenvalue 그리고 eigenvector를 정렬
44 |      5. Dimensional reduction
45 |    - [구현방법 참고](blob/master/210%20Principle%20Component%20Analysis%20(PCA)/Extracting%20PCA.ipynb)
46 | 
47 | 
48 | 
49 | ## 딥러닝
50 | 
51 | 


--------------------------------------------------------------------------------
/033 Optimizers/data/ex1data1.txt:
--------------------------------------------------------------------------------
 1 | 6.1101,17.592
 2 | 5.5277,9.1302
 3 | 8.5186,13.662
 4 | 7.0032,11.854
 5 | 5.8598,6.8233
 6 | 8.3829,11.886
 7 | 7.4764,4.3483
 8 | 8.5781,12
 9 | 6.4862,6.5987
10 | 5.0546,3.8166
11 | 5.7107,3.2522
12 | 14.164,15.505
13 | 5.734,3.1551
14 | 8.4084,7.2258
15 | 5.6407,0.71618
16 | 5.3794,3.5129
17 | 6.3654,5.3048
18 | 5.1301,0.56077
19 | 6.4296,3.6518
20 | 7.0708,5.3893
21 | 6.1891,3.1386
22 | 20.27,21.767
23 | 5.4901,4.263
24 | 6.3261,5.1875
25 | 5.5649,3.0825
26 | 18.945,22.638
27 | 12.828,13.501
28 | 10.957,7.0467
29 | 13.176,14.692
30 | 22.203,24.147
31 | 5.2524,-1.22
32 | 6.5894,5.9966
33 | 9.2482,12.134
34 | 5.8918,1.8495
35 | 8.2111,6.5426
36 | 7.9334,4.5623
37 | 8.0959,4.1164
38 | 5.6063,3.3928
39 | 12.836,10.117
40 | 6.3534,5.4974
41 | 5.4069,0.55657
42 | 6.8825,3.9115
43 | 11.708,5.3854
44 | 5.7737,2.4406
45 | 7.8247,6.7318
46 | 7.0931,1.0463
47 | 5.0702,5.1337
48 | 5.8014,1.844
49 | 11.7,8.0043
50 | 5.5416,1.0179
51 | 7.5402,6.7504
52 | 5.3077,1.8396
53 | 7.4239,4.2885
54 | 7.6031,4.9981
55 | 6.3328,1.4233
56 | 6.3589,-1.4211
57 | 6.2742,2.4756
58 | 5.6397,4.6042
59 | 9.3102,3.9624
60 | 9.4536,5.4141
61 | 8.8254,5.1694
62 | 5.1793,-0.74279
63 | 21.279,17.929
64 | 14.908,12.054
65 | 18.959,17.054
66 | 7.2182,4.8852
67 | 8.2951,5.7442
68 | 10.236,7.7754
69 | 5.4994,1.0173
70 | 20.341,20.992
71 | 10.136,6.6799
72 | 7.3345,4.0259
73 | 6.0062,1.2784
74 | 7.2259,3.3411
75 | 5.0269,-2.6807
76 | 6.5479,0.29678
77 | 7.5386,3.8845
78 | 5.0365,5.7014
79 | 10.274,6.7526
80 | 5.1077,2.0576
81 | 5.7292,0.47953
82 | 5.1884,0.20421
83 | 6.3557,0.67861
84 | 9.7687,7.5435
85 | 6.5159,5.3436
86 | 8.5172,4.2415
87 | 9.1802,6.7981
88 | 6.002,0.92695
89 | 5.5204,0.152
90 | 5.0594,2.8214
91 | 5.7077,1.8451
92 | 7.6366,4.2959
93 | 5.8707,7.2029
94 | 5.3054,1.9869
95 | 8.2934,0.14454
96 | 13.394,9.0551
97 | 5.4369,0.61705
98 | 


--------------------------------------------------------------------------------
/100 Linear Regression/ex1data1.csv:
--------------------------------------------------------------------------------
 1 | 6.1101,17.592
 2 | 5.5277,9.1302
 3 | 8.5186,13.662
 4 | 7.0032,11.854
 5 | 5.8598,6.8233
 6 | 8.3829,11.886
 7 | 7.4764,4.3483
 8 | 8.5781,12
 9 | 6.4862,6.5987
10 | 5.0546,3.8166
11 | 5.7107,3.2522
12 | 14.164,15.505
13 | 5.734,3.1551
14 | 8.4084,7.2258
15 | 5.6407,0.71618
16 | 5.3794,3.5129
17 | 6.3654,5.3048
18 | 5.1301,0.56077
19 | 6.4296,3.6518
20 | 7.0708,5.3893
21 | 6.1891,3.1386
22 | 20.27,21.767
23 | 5.4901,4.263
24 | 6.3261,5.1875
25 | 5.5649,3.0825
26 | 18.945,22.638
27 | 12.828,13.501
28 | 10.957,7.0467
29 | 13.176,14.692
30 | 22.203,24.147
31 | 5.2524,-1.22
32 | 6.5894,5.9966
33 | 9.2482,12.134
34 | 5.8918,1.8495
35 | 8.2111,6.5426
36 | 7.9334,4.5623
37 | 8.0959,4.1164
38 | 5.6063,3.3928
39 | 12.836,10.117
40 | 6.3534,5.4974
41 | 5.4069,0.55657
42 | 6.8825,3.9115
43 | 11.708,5.3854
44 | 5.7737,2.4406
45 | 7.8247,6.7318
46 | 7.0931,1.0463
47 | 5.0702,5.1337
48 | 5.8014,1.844
49 | 11.7,8.0043
50 | 5.5416,1.0179
51 | 7.5402,6.7504
52 | 5.3077,1.8396
53 | 7.4239,4.2885
54 | 7.6031,4.9981
55 | 6.3328,1.4233
56 | 6.3589,-1.4211
57 | 6.2742,2.4756
58 | 5.6397,4.6042
59 | 9.3102,3.9624
60 | 9.4536,5.4141
61 | 8.8254,5.1694
62 | 5.1793,-0.74279
63 | 21.279,17.929
64 | 14.908,12.054
65 | 18.959,17.054
66 | 7.2182,4.8852
67 | 8.2951,5.7442
68 | 10.236,7.7754
69 | 5.4994,1.0173
70 | 20.341,20.992
71 | 10.136,6.6799
72 | 7.3345,4.0259
73 | 6.0062,1.2784
74 | 7.2259,3.3411
75 | 5.0269,-2.6807
76 | 6.5479,0.29678
77 | 7.5386,3.8845
78 | 5.0365,5.7014
79 | 10.274,6.7526
80 | 5.1077,2.0576
81 | 5.7292,0.47953
82 | 5.1884,0.20421
83 | 6.3557,0.67861
84 | 9.7687,7.5435
85 | 6.5159,5.3436
86 | 8.5172,4.2415
87 | 9.1802,6.7981
88 | 6.002,0.92695
89 | 5.5204,0.152
90 | 5.0594,2.8214
91 | 5.7077,1.8451
92 | 7.6366,4.2959
93 | 5.8707,7.2029
94 | 5.3054,1.9869
95 | 8.2934,0.14454
96 | 13.394,9.0551
97 | 5.4369,0.61705
98 | 


--------------------------------------------------------------------------------
/data/linear-regression/ex1data1.txt:
--------------------------------------------------------------------------------
 1 | 6.1101,17.592
 2 | 5.5277,9.1302
 3 | 8.5186,13.662
 4 | 7.0032,11.854
 5 | 5.8598,6.8233
 6 | 8.3829,11.886
 7 | 7.4764,4.3483
 8 | 8.5781,12
 9 | 6.4862,6.5987
10 | 5.0546,3.8166
11 | 5.7107,3.2522
12 | 14.164,15.505
13 | 5.734,3.1551
14 | 8.4084,7.2258
15 | 5.6407,0.71618
16 | 5.3794,3.5129
17 | 6.3654,5.3048
18 | 5.1301,0.56077
19 | 6.4296,3.6518
20 | 7.0708,5.3893
21 | 6.1891,3.1386
22 | 20.27,21.767
23 | 5.4901,4.263
24 | 6.3261,5.1875
25 | 5.5649,3.0825
26 | 18.945,22.638
27 | 12.828,13.501
28 | 10.957,7.0467
29 | 13.176,14.692
30 | 22.203,24.147
31 | 5.2524,-1.22
32 | 6.5894,5.9966
33 | 9.2482,12.134
34 | 5.8918,1.8495
35 | 8.2111,6.5426
36 | 7.9334,4.5623
37 | 8.0959,4.1164
38 | 5.6063,3.3928
39 | 12.836,10.117
40 | 6.3534,5.4974
41 | 5.4069,0.55657
42 | 6.8825,3.9115
43 | 11.708,5.3854
44 | 5.7737,2.4406
45 | 7.8247,6.7318
46 | 7.0931,1.0463
47 | 5.0702,5.1337
48 | 5.8014,1.844
49 | 11.7,8.0043
50 | 5.5416,1.0179
51 | 7.5402,6.7504
52 | 5.3077,1.8396
53 | 7.4239,4.2885
54 | 7.6031,4.9981
55 | 6.3328,1.4233
56 | 6.3589,-1.4211
57 | 6.2742,2.4756
58 | 5.6397,4.6042
59 | 9.3102,3.9624
60 | 9.4536,5.4141
61 | 8.8254,5.1694
62 | 5.1793,-0.74279
63 | 21.279,17.929
64 | 14.908,12.054
65 | 18.959,17.054
66 | 7.2182,4.8852
67 | 8.2951,5.7442
68 | 10.236,7.7754
69 | 5.4994,1.0173
70 | 20.341,20.992
71 | 10.136,6.6799
72 | 7.3345,4.0259
73 | 6.0062,1.2784
74 | 7.2259,3.3411
75 | 5.0269,-2.6807
76 | 6.5479,0.29678
77 | 7.5386,3.8845
78 | 5.0365,5.7014
79 | 10.274,6.7526
80 | 5.1077,2.0576
81 | 5.7292,0.47953
82 | 5.1884,0.20421
83 | 6.3557,0.67861
84 | 9.7687,7.5435
85 | 6.5159,5.3436
86 | 8.5172,4.2415
87 | 9.1802,6.7981
88 | 6.002,0.92695
89 | 5.5204,0.152
90 | 5.0594,2.8214
91 | 5.7077,1.8451
92 | 7.6366,4.2959
93 | 5.8707,7.2029
94 | 5.3054,1.9869
95 | 8.2934,0.14454
96 | 13.394,9.0551
97 | 5.4369,0.61705
98 | 


--------------------------------------------------------------------------------
/Interview Questions.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Basic Interview Questions \n",
 8 |     "\n",
 9 |     "1. "
10 |    ]
11 |   }
12 |  ],
13 |  "metadata": {
14 |   "kernelspec": {
15 |    "display_name": "Python 3",
16 |    "language": "python",
17 |    "name": "python3"
18 |   },
19 |   "language_info": {
20 |    "codemirror_mode": {
21 |     "name": "ipython",
22 |     "version": 3
23 |    },
24 |    "file_extension": ".py",
25 |    "mimetype": "text/x-python",
26 |    "name": "python",
27 |    "nbconvert_exporter": "python",
28 |    "pygments_lexer": "ipython3",
29 |    "version": "3.8.5"
30 |   },
31 |   "toc": {
32 |    "base_numbering": 1,
33 |    "nav_menu": {},
34 |    "number_sections": true,
35 |    "sideBar": true,
36 |    "skip_h1_title": false,
37 |    "title_cell": "Table of Contents",
38 |    "title_sidebar": "Contents",
39 |    "toc_cell": false,
40 |    "toc_position": {},
41 |    "toc_section_display": true,
42 |    "toc_window_display": false
43 |   },
44 |   "varInspector": {
45 |    "cols": {
46 |     "lenName": 16,
47 |     "lenType": 16,
48 |     "lenVar": 40
49 |    },
50 |    "kernels_config": {
51 |     "python": {
52 |      "delete_cmd_postfix": "",
53 |      "delete_cmd_prefix": "del ",
54 |      "library": "var_list.py",
55 |      "varRefreshCmd": "print(var_dic_list())"
56 |     },
57 |     "r": {
58 |      "delete_cmd_postfix": ") ",
59 |      "delete_cmd_prefix": "rm(",
60 |      "library": "var_list.r",
61 |      "varRefreshCmd": "cat(var_dic_list()) "
62 |     }
63 |    },
64 |    "types_to_exclude": [
65 |     "module",
66 |     "function",
67 |     "builtin_function_or_method",
68 |     "instance",
69 |     "_Feature"
70 |    ],
71 |    "window_display": false
72 |   }
73 |  },
74 |  "nbformat": 4,
75 |  "nbformat_minor": 4
76 | }
77 | 


--------------------------------------------------------------------------------
/014 Latent Dirichlet Allocation (LDA)/00. Dirichlet Distribution.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": []
 9 |   }
10 |  ],
11 |  "metadata": {
12 |   "kernelspec": {
13 |    "display_name": "Python 3",
14 |    "language": "python",
15 |    "name": "python3"
16 |   },
17 |   "language_info": {
18 |    "codemirror_mode": {
19 |     "name": "ipython",
20 |     "version": 3
21 |    },
22 |    "file_extension": ".py",
23 |    "mimetype": "text/x-python",
24 |    "name": "python",
25 |    "nbconvert_exporter": "python",
26 |    "pygments_lexer": "ipython3",
27 |    "version": "3.8.5"
28 |   },
29 |   "toc": {
30 |    "base_numbering": 1,
31 |    "nav_menu": {},
32 |    "number_sections": true,
33 |    "sideBar": true,
34 |    "skip_h1_title": false,
35 |    "title_cell": "Table of Contents",
36 |    "title_sidebar": "Contents",
37 |    "toc_cell": false,
38 |    "toc_position": {},
39 |    "toc_section_display": true,
40 |    "toc_window_display": false
41 |   },
42 |   "varInspector": {
43 |    "cols": {
44 |     "lenName": 16,
45 |     "lenType": 16,
46 |     "lenVar": 40
47 |    },
48 |    "kernels_config": {
49 |     "python": {
50 |      "delete_cmd_postfix": "",
51 |      "delete_cmd_prefix": "del ",
52 |      "library": "var_list.py",
53 |      "varRefreshCmd": "print(var_dic_list())"
54 |     },
55 |     "r": {
56 |      "delete_cmd_postfix": ") ",
57 |      "delete_cmd_prefix": "rm(",
58 |      "library": "var_list.r",
59 |      "varRefreshCmd": "cat(var_dic_list()) "
60 |     }
61 |    },
62 |    "types_to_exclude": [
63 |     "module",
64 |     "function",
65 |     "builtin_function_or_method",
66 |     "instance",
67 |     "_Feature"
68 |    ],
69 |    "window_display": false
70 |   }
71 |  },
72 |  "nbformat": 4,
73 |  "nbformat_minor": 4
74 | }
75 | 


--------------------------------------------------------------------------------
/036 Batch Normalization/01 Batch Normalization.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Batch Normalization\n",
 8 |     "\n",
 9 |     "* [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](https://arxiv.org/pdf/1502.03167.pdf)"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "markdown",
14 |    "metadata": {},
15 |    "source": [
16 |     "### Covariate Shift\n",
17 |     "\n",
18 |     "* http://blog.smola.org/post/4110255196/real-simple-covariate-shift-correction\n",
19 |     "\n",
20 |     "한 스타트업에서 혈액으로 암을 진단하는 모델을 만들려고 했습니다.<br>\n",
21 |     "많은 암환자들로부터 혈액 샘플을 얻을수 있었지만, training시키기 위해서 필요한 정상적인 사람의 샘플을 얻는 것은 매우 어려웠습니다.<br>\n",
22 |     "정상적인 사람들로부터 혈액을 수집하는 것은 먼저 윤리적인 이유에서부터 힘이 들었습니다.<br>\n",
23 |     "대안으로 많은 학생들로부터 혈액샘플을 얻을수 있었고 모델을 만들수 있었습니다. \n",
24 |     "\n",
25 |     "training에서는 높은 accuracy로 잘잡아냈지만, 스타트업이 정작 서비스를 시작했을때는 정확도가 매우 낮았고 실패하고 말았습니다.<br>\n",
26 |     "왜 이런일이 일어나게 된 것일까요?\n",
27 |     "\n",
28 |     "먼저 암에 걸려 얻은 혈액 샘플들은 주로 나이가 많았으며, 담배 또는 술을 먹는 경우도 있습니다.<br>\n",
29 |     "하지만 학생들은 나이도 젊고, 담배 술또한 비율자체가 다르며, 이외에도 여러 factors (요인들)이 달랐습니다.<br>\n",
30 |     "결국 학습시에는 정확도가 높지만, 정작 실전에 나가서는 잘못된 방향으로 진단을 하게 되는 것입니다.\n",
31 |     "\n",
32 |     "이러한 현상은 Covariate Shift라고 합니다.<br>\n",
33 |     "쉽게 이야기해서 training과 test의 데이터의 distributions이 차이가 날때 생겨나게되는 현상입니다."
34 |    ]
35 |   }
36 |  ],
37 |  "metadata": {
38 |   "kernelspec": {
39 |    "display_name": "Python 3",
40 |    "language": "python",
41 |    "name": "python3"
42 |   },
43 |   "language_info": {
44 |    "codemirror_mode": {
45 |     "name": "ipython",
46 |     "version": 3
47 |    },
48 |    "file_extension": ".py",
49 |    "mimetype": "text/x-python",
50 |    "name": "python",
51 |    "nbconvert_exporter": "python",
52 |    "pygments_lexer": "ipython3",
53 |    "version": "3.6.0"
54 |   }
55 |  },
56 |  "nbformat": 4,
57 |  "nbformat_minor": 2
58 | }
59 | 


--------------------------------------------------------------------------------
/903 One-Shot Learning with Memory-Augmented Neural Network/03 One-shot learning with Memory-Augmented Neural Network.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {
 6 |     "deletable": true,
 7 |     "editable": true
 8 |    },
 9 |    "source": [
10 |     "# One-shot learning with Memory-Augmented Neural Network\n",
11 |     "\n",
12 |     "* [Google DeepMind - One-shot learning with Memory-Augmented Neural Network](https://arxiv.org/pdf/1605.06065v1.pdf)\n",
13 |     "* [Neural Turing Machines - Alex Graves, Greg Wayne, Ivo Danihelka](https://arxiv.org/pdf/1410.5401.pdf)"
14 |    ]
15 |   },
16 |   {
17 |    "cell_type": "markdown",
18 |    "metadata": {
19 |     "deletable": true,
20 |     "editable": true
21 |    },
22 |    "source": [
23 |     "최근 Deep Learning은 전통적인 Gradient 기반의 NN에 기반을 합니다.<br>\n",
24 |     "문제는 대량의 데이터를 필요로하며, 수많은 training을 거쳐야 한다는 것이 문제입니다.\n",
25 |     "**[Catastrophic Interference](https://en.wikipedia.org/wiki/Catastrophic_interference)** (새로운 데이터가 들어오면, 이전에 학습된 내용을 잊어버리는 현상) 으로 인해서 기존의 모델은 비효율적으로 기존 데이터와 더불어 학습을 해야합니다. \n",
26 |     "\n",
27 |     "**Neural Turing Machines (NTMs)** 같은 augmented memory capacities를 같은 아키텍쳐의 경우 새로운 정보를 빠르게 encode해서 새로운 정보를 빠르게 학습하도록 만듭니다."
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "code",
32 |    "execution_count": null,
33 |    "metadata": {
34 |     "collapsed": true,
35 |     "deletable": true,
36 |     "editable": true
37 |    },
38 |    "outputs": [],
39 |    "source": []
40 |   }
41 |  ],
42 |  "metadata": {
43 |   "kernelspec": {
44 |    "display_name": "Python 3",
45 |    "language": "python",
46 |    "name": "python3"
47 |   },
48 |   "language_info": {
49 |    "codemirror_mode": {
50 |     "name": "ipython",
51 |     "version": 3
52 |    },
53 |    "file_extension": ".py",
54 |    "mimetype": "text/x-python",
55 |    "name": "python",
56 |    "nbconvert_exporter": "python",
57 |    "pygments_lexer": "ipython3",
58 |    "version": "3.6.0"
59 |   }
60 |  },
61 |  "nbformat": 4,
62 |  "nbformat_minor": 2
63 | }
64 | 


--------------------------------------------------------------------------------
/600 Transformer Machine Translation/data.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import sentencepiece as spm
 3 | import torch
 4 | from torch.utils.data import Dataset
 5 | 
 6 | 
 7 | class TranslationDataset(Dataset):
 8 |     def __init__(self, data_path, sp_path, max_seq_len: int = 128):
 9 |         self.data = pd.read_parquet(data_path)
10 |         self.sp = spm.SentencePieceProcessor()
11 |         self.sp.load(sp_path)
12 | 
13 |         self.max_seq_len = max_seq_len
14 | 
15 |     def __len__(self):
16 |         return len(self.data)
17 | 
18 |     def __getitem__(self, idx):
19 |         """
20 |         반드시 torch.Tensor 로 내보내는게 중요합니다.
21 |         모든 vector의 길이가 동일해야 합니다. 그래야지 추후 batch 로 만들었을때도 문제가 없습니다.
22 |         따라서 해당 함수에서 torch.tensor를 만들어서 내보내는게 맞습니다.
23 |         """
24 |         row = self.data.iloc[idx]
25 |         korean = row.korean
26 |         english = row.english
27 | 
28 |         src_tokenized = self.sp.encode(korean, add_bos=True, add_eos=True)
29 |         tgt_tokenized = self.sp.encode(english, add_bos=True, add_eos=True)
30 | 
31 |         src_tokenized = src_tokenized[:self.max_seq_len]
32 |         if src_tokenized[-1] != self.sp.eos_id():
33 |             src_tokenized[-1] = self.sp.eos_id()
34 |         if tgt_tokenized[-1] != self.sp.eos_id():
35 |             tgt_tokenized[-1] = self.sp.eos_id()
36 | 
37 |         src_tensor = torch.zeros(self.max_seq_len, dtype=torch.int32)
38 |         src_tensor[:len(src_tokenized)] = torch.tensor(src_tokenized)
39 | 
40 |         tgt_input = torch.zeros(self.max_seq_len, dtype=torch.int32)
41 |         tgt_input[:max(len(tgt_tokenized) - 1, 0)] = torch.tensor(tgt_tokenized[:-1])
42 | 
43 |         tgt_output = torch.zeros(self.max_seq_len, dtype=torch.int32)
44 |         tgt_output[:max(len(tgt_tokenized) - 1, 0)] = torch.tensor(tgt_tokenized[1:])
45 | 
46 |         return {'src': src_tensor, 'tgt_input': tgt_input, 'tgt_output': tgt_output}
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     dataset = TranslationDataset('./_data/data.parquet',
51 |                                  'sp-bpt-anderson.model')
52 |     print(dataset[0])
53 | 


--------------------------------------------------------------------------------
/505 Predicting Sin Cos/06 [Keras] different variance sin.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "name": "stdout",
10 |      "output_type": "stream",
11 |      "text": [
12 |       "Populating the interactive namespace from numpy and matplotlib\n"
13 |      ]
14 |     },
15 |     {
16 |      "name": "stderr",
17 |      "output_type": "stream",
18 |      "text": [
19 |       "/usr/local/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
20 |       "  from ._conv import register_converters as _register_converters\n",
21 |       "Using TensorFlow backend.\n"
22 |      ]
23 |     }
24 |    ],
25 |    "source": [
26 |     "%pylab inline\n",
27 |     "import numpy as np\n",
28 |     "\n",
29 |     "from keras.models import Sequential\n",
30 |     "from keras.layers import SimpleRNN, Dense\n",
31 |     "from keras.metrics import mean_squared_error\n",
32 |     "from keras.wrappers.scikit_learn import KerasClassifier\n",
33 |     "\n",
34 |     "from sklearn.metrics import make_scorer, r2_score\n",
35 |     "from sklearn.model_selection import GridSearchCV\n",
36 |     "\n",
37 |     "from IPython.display import SVG, Image\n",
38 |     "from keras.utils.vis_utils import model_to_dot"
39 |    ]
40 |   },
41 |   {
42 |    "cell_type": "code",
43 |    "execution_count": null,
44 |    "metadata": {},
45 |    "outputs": [],
46 |    "source": []
47 |   }
48 |  ],
49 |  "metadata": {
50 |   "kernelspec": {
51 |    "display_name": "Python 3",
52 |    "language": "python",
53 |    "name": "python3"
54 |   },
55 |   "language_info": {
56 |    "codemirror_mode": {
57 |     "name": "ipython",
58 |     "version": 3
59 |    },
60 |    "file_extension": ".py",
61 |    "mimetype": "text/x-python",
62 |    "name": "python",
63 |    "nbconvert_exporter": "python",
64 |    "pygments_lexer": "ipython3",
65 |    "version": "3.6.4"
66 |   }
67 |  },
68 |  "nbformat": 4,
69 |  "nbformat_minor": 2
70 | }
71 | 


--------------------------------------------------------------------------------
/101 Multiple Linear Regression/Multiple Linear Regression.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 2,
 6 |    "metadata": {
 7 |     "collapsed": false
 8 |    },
 9 |    "outputs": [
10 |     {
11 |      "name": "stdout",
12 |      "output_type": "stream",
13 |      "text": [
14 |       "Populating the interactive namespace from numpy and matplotlib\n"
15 |      ]
16 |     }
17 |    ],
18 |    "source": [
19 |     "%pylab inline\n",
20 |     "import numpy as np\n",
21 |     "from numpy.linalg import inv"
22 |    ]
23 |   },
24 |   {
25 |    "cell_type": "code",
26 |    "execution_count": 123,
27 |    "metadata": {
28 |     "collapsed": false
29 |    },
30 |    "outputs": [],
31 |    "source": [
32 |     "challenger = np.genfromtxt('../data/linear-regression/challenger.csv', delimiter=',', skip_header=True)"
33 |    ]
34 |   },
35 |   {
36 |    "cell_type": "code",
37 |    "execution_count": 132,
38 |    "metadata": {
39 |     "collapsed": false
40 |    },
41 |    "outputs": [
42 |     {
43 |      "name": "stdout",
44 |      "output_type": "stream",
45 |      "text": [
46 |       "3.52709338331\n",
47 |       "[-0.05138594  0.00175701  0.01429284]\n"
48 |      ]
49 |     }
50 |    ],
51 |    "source": [
52 |     "def reg(x, y):\n",
53 |     "    x = np.c_[np.ones(len(x)), x]\n",
54 |     "    b = inv(np.dot(x.T, x))    \n",
55 |     "    b = np.dot(np.dot(b, x.T), y)\n",
56 |     "    return b[0], b[1:]\n",
57 |     "    \n",
58 |     "y_intercept, coefficients = reg(y=challenger[:, 0], x=challenger[:, 1:4])\n",
59 |     "print y_intercept\n",
60 |     "print coefficients"
61 |    ]
62 |   }
63 |  ],
64 |  "metadata": {
65 |   "kernelspec": {
66 |    "display_name": "Python 2",
67 |    "language": "python",
68 |    "name": "python2"
69 |   },
70 |   "language_info": {
71 |    "codemirror_mode": {
72 |     "name": "ipython",
73 |     "version": 2
74 |    },
75 |    "file_extension": ".py",
76 |    "mimetype": "text/x-python",
77 |    "name": "python",
78 |    "nbconvert_exporter": "python",
79 |    "pygments_lexer": "ipython2",
80 |    "version": "2.7.10"
81 |   }
82 |  },
83 |  "nbformat": 4,
84 |  "nbformat_minor": 0
85 | }
86 | 


--------------------------------------------------------------------------------
/203 Decision Tree/images/entropy-box-balls.html:
--------------------------------------------------------------------------------
 1 | <!--[if IE]><meta http-equiv="X-UA-Compatible" content="IE=5,IE=9" ><![endif]-->
 2 | <!DOCTYPE html>
 3 | <html>
 4 | <head>
 5 | <title>entropy-box-balls</title>
 6 | <meta charset="utf-8"/>
 7 | </head>
 8 | <body><div class="mxgraph" style="max-width:100%;border:1px solid transparent;" data-mxgraph="{&quot;highlight&quot;:&quot;#0000ff&quot;,&quot;nav&quot;:true,&quot;resize&quot;:true,&quot;toolbar&quot;:&quot;zoom layers lightbox&quot;,&quot;edit&quot;:&quot;_blank&quot;,&quot;xml&quot;:&quot;&lt;mxfile userAgent=\&quot;Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36\&quot; version=\&quot;9.3.4\&quot; editor=\&quot;www.draw.io\&quot; type=\&quot;google\&quot;&gt;&lt;diagram&gt;7ZtLd6IwFMc/jcueAwkBXBb7WkxXnXNmHSUip0gYjFXn088FgkrQVmtrwiksWnLzwvvLzeOPDvBovn7MaTZ75iFLBsgK1wN8N0BoaNvwtzBsKoPtDb3KEuVxKG07w0v8j0mjJa3LOGSLRkHBeSLirGmc8DRlE9Gw0Tznq2axKU+avWY0Yi3Dy4QmbeufOBSzyuoTa2d/YnE0q3u2LZkzp3VhaVjMaMhXeyZ8P8CjnHNR3c3XI5YUzqv9UtV7OJK7fbCcpeKkCsNxSBHD4diykctuajRvNFnKDxvwNRhuB8hNoMlgnBePLTbSF+7fZfGswZSn4mZRkoKiFsIZ0A52+XAXVf9Roz4SbF3YZ2KegMGG25xBO3RcFrAgnfE4FSUxEgzIHVjoUvCqr7ICTeIohfsJfGiWg+GN5SIGXLcyQ/AMrIuMTuI0+l0k7m4c+dBydCFcp+Wj1Y6Aptj6qHftLTMY7IzPmcg3UERWQI7ELAc6xjK92o2aetDM9gZMPTqoHKfRtuUdSriRNE8ki1tkWyxWs1iwF3BTkV5B7DbB0EVWRdM0XrPwgP9aIL7Ah7bqQ6/tw8NOJN/hRedIfASfiI+fHRyOTz4MDv8AV/wdWEk3g8OxjAoO90hwjPrgOBOs5xgUHF43g8N1jQoOv+XFX7DzQ9Z9KnKebUwd3kD/S2hsN++ShntgRNv2tTZDwxaMZxbGy/nP4eEQ8jEP50o86jb3eDxBjz+HhocNig7UPgS2/A8twGkbEsEFU3/pvRaGhI5ZEtDJa5TzZRqOeMLzslM8La+ijThJ9uwPPsLlMrKAwfLK9nKs8oKcKKdhDD3UeSlP2bZCfYhHX3RkURae7S5tDya+0uJdo+tZfpKl15wmHVcjyxOO8D3Ld7YgRInLepOtg2VbSOhZnsMSNVmSA5v7q7E8QT3oWR5niZXND9E5x7Y1jJ7lOXHpNtdLgjSyPEG46Fm+E5eq9O5rZNmWT3qW5xz3kUFnkrb60rM8gyUxaI6t2+xZfjIuFRlO5z4W97rPZXHpmzPH4q7pPgWv0cgYls7QHN0Hd033MYylGpda59jO6T5wlcTMYKnGJcH6NDzcNd2niEqDWBJbPV9qZNnrPpdpBUhZL4nGObbXfS57Ga3G5VBjXPa6z0UsfVVb1/jOC3dN9zFsH+sq35h1hvrmWKdruo9hLD2irpf64tLpnO5j1pnEU76FTTTufZyu6T6GsfTVLw1rfOfldE/3Mep86avvvHSy7JruYxhLz1Xn2G9bLyG5+wFmmbf3M1Z8/x8=&lt;/diagram&gt;&lt;/mxfile&gt;&quot;}"></div>
 9 | <script type="text/javascript" src="https://www.draw.io/js/viewer.min.js"></script>
10 | </body>
11 | </html>


--------------------------------------------------------------------------------
/032 Activations/01 Sigmoid.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {
 6 |     "deletable": true,
 7 |     "editable": true
 8 |    },
 9 |    "source": [
10 |     "# Sigmoid\n",
11 |     "\n",
12 |     "### Formula\n",
13 |     " \n",
14 |     "$$ \\sigma(x) = \\frac{1}{1 + e^-x} $$\n",
15 |     "\n",
16 |     "### Derivative of the Sigmoid\n",
17 |     "\n",
18 |     "\\begin{align}\n",
19 |     "\\dfrac{d}{dx} \\sigma(x) &= \\dfrac{d}{dx} \\left[ \\dfrac{1}{1 + e^{-x}} \\right] & [1] \\\\\n",
20 |     "&= \\dfrac{d}{dx} \\left( 1 + \\mathrm{e}^{-x} \\right)^{-1}  & [2]\\\\\n",
21 |     "&= -(1 + e^{-x})^{-2}(-e^{-x}) & [3]\\\\\n",
22 |     "&= \\dfrac{e^{-x}}{\\left(1 + e^{-x}\\right)^2} & [4]\\\\\n",
23 |     "&= \\dfrac{1}{1 + e^{-x}\\ } \\cdot \\dfrac{e^{-x}}{1 + e^{-x}}  & [5]\\\\\n",
24 |     "&= \\dfrac{1}{1 + e^{-x}\\ } \\cdot \\dfrac{(1 + e^{-x}) - 1}{1 + e^{-x}}  & [6]\\\\\n",
25 |     "&= \\dfrac{1}{1 + e^{-x}\\ } \\cdot \\left( 1 - \\dfrac{1}{1 + e^{-x}} \\right) & [7]\\\\\n",
26 |     "&= \\sigma(x) \\cdot (1 - \\sigma(x)) & [8]\n",
27 |     "\\end{align}"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "markdown",
32 |    "metadata": {},
33 |    "source": [
34 |     "* [3] Chain Rule을 적용\n",
35 |     "* [4] $ \\frac{d}{dx} e^{-x} = -e^{-x} $  이며  $ \\frac{d}{dx} e^{x} = e^{x} $"
36 |    ]
37 |   },
38 |   {
39 |    "cell_type": "code",
40 |    "execution_count": 1,
41 |    "metadata": {
42 |     "collapsed": true
43 |    },
44 |    "outputs": [],
45 |    "source": [
46 |     "def sigmoid(x):\n",
47 |     "    return 1. / (1 + numpy.exp(-x))\n",
48 |     "\n",
49 |     "def dsigmoid(x):\n",
50 |     "    return x * (1. - x)"
51 |    ]
52 |   }
53 |  ],
54 |  "metadata": {
55 |   "kernelspec": {
56 |    "display_name": "Python 3",
57 |    "language": "python",
58 |    "name": "python3"
59 |   },
60 |   "language_info": {
61 |    "codemirror_mode": {
62 |     "name": "ipython",
63 |     "version": 3
64 |    },
65 |    "file_extension": ".py",
66 |    "mimetype": "text/x-python",
67 |    "name": "python",
68 |    "nbconvert_exporter": "python",
69 |    "pygments_lexer": "ipython3",
70 |    "version": "3.6.0"
71 |   }
72 |  },
73 |  "nbformat": 4,
74 |  "nbformat_minor": 2
75 | }
76 | 


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/environment.py:
--------------------------------------------------------------------------------
 1 | from collections import deque
 2 | 
 3 | import gym
 4 | import cv2
 5 | import numpy as np
 6 | 
 7 | 
 8 | class Environment(object):
 9 |     def __init__(self, game, action_repeat=4):
10 |         self.game = gym.make(game)
11 |         self.action_size = self.game.action_space.n
12 |         self.action_repeat = action_repeat
13 |         self.height, self.width = self.dims = (84, 84)
14 | 
15 |         self._buffer = deque(maxlen=self.action_repeat)
16 | 
17 |     def play_sample(self):
18 |         while True:
19 |             self.game.render()
20 |             action = self.game.action_space.sample()
21 |             observation, reward, done = self.step(action)
22 |             if done:
23 |                 break
24 |         self.game.close()
25 | 
26 |     def step(self, action):
27 |         screen, reward, done, info = self.game.step(action)
28 |         screen = self.preprocess(screen)
29 | 
30 |         self.add_screeen(screen)
31 |         return screen, reward, done, info
32 | 
33 |     def preprocess(self, screen):
34 |         preprocessed = cv2.resize(cv2.cvtColor(screen, cv2.COLOR_RGB2GRAY) / 255., (self.height, self.width))
35 |         return preprocessed
36 | 
37 |     def get_initial_states(self):
38 |         screen = self.game.reset()
39 |         screen = self.preprocess(screen)
40 |         screens = np.stack([screen for _ in range(self.action_repeat)], axis=0)
41 | 
42 |         self._buffer = deque(maxlen=self.action_repeat)
43 |         for _ in range(self.action_repeat):
44 |             self._buffer.append(screen)
45 |         return screens
46 | 
47 |     def recent_screens(self):
48 |         return np.array(self._buffer)
49 | 
50 |     def add_screeen(self, screen):
51 |         self._buffer.append(screen)
52 | 
53 |     def random_step(self, gray=True):
54 |         action = self.game.action_space.sample()
55 |         return self.step(action, gray=True)
56 | 
57 |     def random_action(self):
58 |         return self.game.action_space.sample()
59 | 
60 |     def render(self):
61 |         return self.game.render()
62 | 
63 |     def reset(self):
64 |         return self.game.reset()
65 | 
66 |     def close(self):
67 |         return self.game.close()
68 | 


--------------------------------------------------------------------------------
/003 Correlation/02. Correlation Between Categorical Data.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 12,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from sklearn.datasets import load_boston"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": 13,
15 |    "metadata": {},
16 |    "outputs": [
17 |     {
18 |      "data": {
19 |       "text/plain": [
20 |        "array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,\n",
21 |        "        4.9800e+00],\n",
22 |        "       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,\n",
23 |        "        9.1400e+00],\n",
24 |        "       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,\n",
25 |        "        4.0300e+00],\n",
26 |        "       ...,\n",
27 |        "       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,\n",
28 |        "        5.6400e+00],\n",
29 |        "       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,\n",
30 |        "        6.4800e+00],\n",
31 |        "       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,\n",
32 |        "        7.8800e+00]])"
33 |       ]
34 |      },
35 |      "execution_count": 13,
36 |      "metadata": {},
37 |      "output_type": "execute_result"
38 |     }
39 |    ],
40 |    "source": [
41 |     "x, y = load_boston(True)\n",
42 |     "x"
43 |    ]
44 |   }
45 |  ],
46 |  "metadata": {
47 |   "kernelspec": {
48 |    "display_name": "Python 3",
49 |    "language": "python",
50 |    "name": "python3"
51 |   },
52 |   "language_info": {
53 |    "codemirror_mode": {
54 |     "name": "ipython",
55 |     "version": 3
56 |    },
57 |    "file_extension": ".py",
58 |    "mimetype": "text/x-python",
59 |    "name": "python",
60 |    "nbconvert_exporter": "python",
61 |    "pygments_lexer": "ipython3",
62 |    "version": "3.6.7"
63 |   },
64 |   "toc": {
65 |    "base_numbering": 1,
66 |    "nav_menu": {},
67 |    "number_sections": true,
68 |    "sideBar": true,
69 |    "skip_h1_title": false,
70 |    "title_cell": "Table of Contents",
71 |    "title_sidebar": "Contents",
72 |    "toc_cell": false,
73 |    "toc_position": {},
74 |    "toc_section_display": true,
75 |    "toc_window_display": false
76 |   }
77 |  },
78 |  "nbformat": 4,
79 |  "nbformat_minor": 2
80 | }
81 | 


--------------------------------------------------------------------------------
/600 Transformer Machine Translation/train.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser, Namespace
 2 | 
 3 | from lightning import Trainer
 4 | from lightning.pytorch.callbacks import ModelCheckpoint
 5 | from lightning.pytorch.loggers import TensorBoardLogger
 6 | 
 7 | from torch.utils.data import DataLoader
 8 | 
 9 | from data import TranslationDataset
10 | from model import TransformerModule
11 | 
12 | 
13 | def init() -> Namespace:
14 |     parser = ArgumentParser()
15 |     parser.add_argument('--batch', default=64, type=int)
16 |     opt = parser.parse_args()
17 |     return opt
18 | 
19 | 
20 | def train():
21 |     opt = init()
22 |     train_dataset = TranslationDataset('./_data/train.parquet', 'sp-bpt-anderson.model')
23 |     valid_dataset = TranslationDataset('./_data/valid.parquet', 'sp-bpt-anderson.model')
24 |     test_dataset = TranslationDataset('./_data/test.parquet', 'sp-bpt-anderson.model')
25 | 
26 |     train_loader = DataLoader(train_dataset, batch_size=opt.batch)
27 |     valid_loader = DataLoader(valid_dataset, batch_size=opt.batch)
28 |     test_loader = DataLoader(test_dataset, batch_size=opt.batch)
29 | 
30 |     model = TransformerModule(src_vocab_size=8000, tgt_vocab_size=8000, d_model=256)
31 | 
32 |     # 보통 val_loss 를 잡는데, 실무에서 데이터 사이즈가 워낙 커서, 학습 도중에도 checkpoint 저장이 필요합니다. 그래서 loss 로 했습니다.
33 |     # 이를 위해서 mode 그리고 loss 값을 training_step 그리고 validation_step 에서 만들어 줘야 합니다.
34 |     checkpointer = ModelCheckpoint(
35 |         dirpath='./checkpoints',
36 |         filename='machine-translation-{mode}-{epoch:02d}-{step:06d}-{loss:.4f}',
37 |         monitor='loss',
38 |         every_n_epochs=1,
39 |         save_top_k=-1,
40 |         mode='min'
41 |     )
42 | 
43 |     tensorboard = TensorBoardLogger('./tb_logs', name='machine-translation')
44 | 
45 |     trainer = Trainer(max_epochs=100,
46 |                       accelerator='cuda',  # it's not GPU
47 |                       devices=1,
48 |                       log_every_n_steps=100,
49 |                       enable_checkpointing=True,
50 |                       enable_progress_bar=True,
51 |                       enable_model_summary=True,
52 |                       logger=tensorboard,  # TensorBoard 는 callback 에 넣는 것 아닙니다.
53 |                       callbacks=[checkpointer])
54 |     trainer.fit(model, train_loader, valid_loader)
55 |     # trainer.validate(model, valid_loader)
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     train()
60 | 


--------------------------------------------------------------------------------
/120 Ensemble/01. Ensemble.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "5416adf5",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Bias-Variance Tradeoff \n",
 9 |     "\n",
10 |     "![](img/underfit_right_overfit.png)\n",
11 |     "\n",
12 |     "## Error \n",
13 |     "\n",
14 |     "$$ Error = noise(x) + bias(x) + variance(x) $$ \n",
15 |     "\n",
16 |     " - error: irreducible error 로서 제거 할수 없는 존재.. \n",
17 |     " - bias 그리고 variance: 서로 tradeoff 관계에 있으며 적절하게 조정해서 minimize 하는게 목적\n",
18 |     "\n",
19 |     "\n",
20 |     "## Bias \n",
21 |     "\n",
22 |     "Bias는 예측된 값과 기대값(GT) 차이라고 볼 수 있습니다. \n",
23 |     "\n",
24 |     "\n",
25 |     "$$ \\text{Bias} = E[y - \\hat{y}] $$\n",
26 |     "\n",
27 |     " - Bias는 예측관 값과 Ground Truth 값과의 차이의 평균입니다.\n",
28 |     " - Low Bias: 약한 가정 / 오차가 적다 (Decision Tree, KNN, SVM)\n",
29 |     " - High Bias: 강한 가정 / 오차가 크다 (Linear Regression, Linear Discriminant Analysis, Logistic Regression)\n",
30 |     "\n",
31 |     "\n",
32 |     "## Variance \n",
33 |     "\n",
34 |     "$$ \\text{Variance} = E \\left[ \\hat{y} - E[\\hat{y}] \\right]^2  $$\n",
35 |     "\n",
36 |     " - 예측값과 예측값들의 평균의 차를 제곱해준 것입니다. \n",
37 |     " - 즉 예측값들끼리 얼마나 퍼져 있는지, 좁게 몰려 있는지를 나타낸 것입니다.\n",
38 |     " - Low Variance: \n",
39 |     " - High Variance: 모든 데이터들을 지나치게 학습\n",
40 |     "\n",
41 |     "\n",
42 |     "\n",
43 |     "## Underfitting and Overfitting\n",
44 |     "\n",
45 |     " - Underfitting : High Bias and Low Variance\n",
46 |     "     - 모델은 예측시 강한 가정(assumption)을 갖고 있음. \n",
47 |     "     - 데이터 부족으로 정확한 모델을 만들 수 없을 때 발생\n",
48 |     "     - Linear Model 을 Non-Linear Data 에 적용할때 발생\n",
49 |     " - Overfitting : Low Bias and High Variance \n",
50 |     "     - 노이즈 데이터까지 피팅 시켜서 발생함\n",
51 |     "     - 복잡한 모델을 단순한 데이터에 적용시 발생\n",
52 |     "     \n"
53 |    ]
54 |   }
55 |  ],
56 |  "metadata": {
57 |   "kernelspec": {
58 |    "display_name": "Python 3",
59 |    "language": "python",
60 |    "name": "python3"
61 |   },
62 |   "language_info": {
63 |    "codemirror_mode": {
64 |     "name": "ipython",
65 |     "version": 3
66 |    },
67 |    "file_extension": ".py",
68 |    "mimetype": "text/x-python",
69 |    "name": "python",
70 |    "nbconvert_exporter": "python",
71 |    "pygments_lexer": "ipython3",
72 |    "version": "3.8.8"
73 |   },
74 |   "toc": {
75 |    "base_numbering": 1,
76 |    "nav_menu": {},
77 |    "number_sections": true,
78 |    "sideBar": true,
79 |    "skip_h1_title": false,
80 |    "title_cell": "Table of Contents",
81 |    "title_sidebar": "Contents",
82 |    "toc_cell": false,
83 |    "toc_position": {},
84 |    "toc_section_display": true,
85 |    "toc_window_display": false
86 |   }
87 |  },
88 |  "nbformat": 4,
89 |  "nbformat_minor": 5
90 | }
91 | 


--------------------------------------------------------------------------------
/203 Decision Tree/images/entropy-box-tree.html:
--------------------------------------------------------------------------------
 1 | <!--[if IE]><meta http-equiv="X-UA-Compatible" content="IE=5,IE=9" ><![endif]-->
 2 | <!DOCTYPE html>
 3 | <html>
 4 | <head>
 5 | <title>entropy-box-tree</title>
 6 | <meta charset="utf-8"/>
 7 | </head>
 8 | <body><div class="mxgraph" style="max-width:100%;border:1px solid transparent;" data-mxgraph="{&quot;highlight&quot;:&quot;#0000ff&quot;,&quot;nav&quot;:true,&quot;resize&quot;:true,&quot;toolbar&quot;:&quot;zoom layers lightbox&quot;,&quot;edit&quot;:&quot;_blank&quot;,&quot;xml&quot;:&quot;&lt;mxfile userAgent=\&quot;Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36\&quot; version=\&quot;9.3.4\&quot; editor=\&quot;www.draw.io\&quot; type=\&quot;google\&quot;&gt;&lt;diagram id=\&quot;340878f5-6ac9-6d4b-b635-fdc7e5d13647\&quot; name=\&quot;Page-1\&quot;&gt;7Vzfc6M2EP5r/JgOoB/Yj7FzaV9612k60+kjMbLNHEY+LJ+d/vUVIDCS8MXhMJI7cmYyaAWS2e9baXdZPAGL7enXPNptfqcxSSeBF58m4GkSBFMM+f9C8FYJ/HAWVpJ1nsRCdha8JP8SIfSE9JDEZC+dyChNWbKThUuaZWTJJFmU5/Qon7aiqTzrLloTTfCyjFJd+ncSs424L+Sd5b+RZL2pZ/Y90fMaLb+uc3rIxHyTAKzKT9W9jeqxxPn7TRTTY0sEPk3AIqeUVUfb04KkhW5rtVXXPV/obb53TjJ2zQUYE2/phciLcTiD0epBjPA9Sg+kvoXyi7K3Wjl8BI4Db8yPm4SRl120LHqOnApctmHblLd8fhjtdxU4q+RE+ITzFc2YANsPixPSZJ3xxpJ/W5JzQRq9knTe6HBBU5qXk9Za5GMkadqSP08DEAAu37OcfiWtHq/88J51HsUJn6Huy2hGmgtqeLnm5+LWSc7I6aI+/QYlzn5Ct4Tlb/yU+oIaWMF8/jWq9vFMo1q0aTGolkWCuOtm5DN4/EDgdyWWQMNyTk9cwG8Up3zi+WsuIYu/HQrelTA97EucHvkJAdidzp38aM0aZbVowZXGZPxzwgeJXssTCiB2NMlYeYNoPkFPBQEOjO4FIbr4UGCRcLN8FB2MFhTbc8Yl2fqvovH0AGVilWQo2+Kr+UPA6smw+lMd1ulYsELdRMHzLeFIyYp9FIxG+VX7OdomaaG8BT3kCSl495kcZeSgN5ANIsUGAx2soAOs4BZg4ffXU74H7IrDVUpOj8XmxdVAslgcPi3TaL9PljKWyiroPxZ/H9A1wNUkoonK9TDKm+4GCRJrO+W7OLT0jDr0XMtykkYs+S4P36V8McMfBV9bMAMkwxwq+O357S+JuKq9ByoDzWS6AF8Zh+tlTZg2TkmF5q77sSN0u+3PWDpWkOsw9NE226kz9FsZOsK/DGTqgQeN2frszmy9sOfFwhpbDzyLjL2mjfPBLqEFLmyqJnwwv8Njdqb3AYdaARN2gDme6SENzH8GRvMaQ/Fnl6GmeUzyd4EZPEb1lbCnI0Ydz+T0uOezQ6noxRahFFyR7XNOay+nNfCm8gaIe7qsPgby4uuN57LWdHD8GJ4fwUwJanozJAhDcwzRc8xuNy57p7LZgtDkOq87wG43rsIUm1DSPduBQRo6G8g/ZTBiR5jSZIVsCFOCjsS/yxC00AJKPqfZtYyY3hXJOefo9HN0QtkRhqivm6Os1XA2nptTLxGOH8PzY6Y6wr0ZAmBojiF6KO0c4RJfpJgtNrjOAz2gdY5wtRvbhJIeVA4M0v/cEVZcK9ThWo1XhHZ/D18WC4vABMgmMPUQtSopXLiSwg/iiqYyrkZLCsEVZWpWGallhUhIST2YLfu9t6oyyx53Y7Ww0CiYel3Zlz9vuVD2SSOJZbJ2TH/sAg+4hmLFaw2gjhQczWvtyCG5jF8bLXWRNFmXXdPCZXQGz+hg35dx7lutiTCWBhqzWhO60oib8QMEQ9XzYsWJHpUhei7JZfyqyEY2W6MlTlDPJbmMX2WFNqF0b0ki2+JPX6klMlkcDzvqGJwj3EYLKmiZLI6H7g3FWzk6yFfeNupb4QfxTBpozAo/eEU2yfGjHz8AGqoGFE3NVQlDPUXlHOEShKlstkarC6GennKOcGWFFqFUryzOEe5pclBOCBitAUX3Vsdg2YMY9amaWTDvLkS1qyhFS9WZDFGRC1F/jFaovExqNERFLkS9VQiCZwO9pojVMs8RAxDkQtRb8SP0B3tNMcTmXlNELkS99EzeohfgkAtRL+zGgUUo4bsLUS1zhAPlhSiTUQ2+t997taw6O1RDVA/dCkzePP8ucLUpnn98GXz6Dw==&lt;/diagram&gt;&lt;/mxfile&gt;&quot;}"></div>
 9 | <script type="text/javascript" src="https://www.draw.io/js/viewer.min.js"></script>
10 | </body>
11 | </html>


--------------------------------------------------------------------------------
/data/time-series/international-airline-passengers.csv:
--------------------------------------------------------------------------------
  1 | "Month","International airline passengers: monthly totals in thousands"
  2 | "1949-01",112
  3 | "1949-02",118
  4 | "1949-03",132
  5 | "1949-04",129
  6 | "1949-05",121
  7 | "1949-06",135
  8 | "1949-07",148
  9 | "1949-08",148
 10 | "1949-09",136
 11 | "1949-10",119
 12 | "1949-11",104
 13 | "1949-12",118
 14 | "1950-01",115
 15 | "1950-02",126
 16 | "1950-03",141
 17 | "1950-04",135
 18 | "1950-05",125
 19 | "1950-06",149
 20 | "1950-07",170
 21 | "1950-08",170
 22 | "1950-09",158
 23 | "1950-10",133
 24 | "1950-11",114
 25 | "1950-12",140
 26 | "1951-01",145
 27 | "1951-02",150
 28 | "1951-03",178
 29 | "1951-04",163
 30 | "1951-05",172
 31 | "1951-06",178
 32 | "1951-07",199
 33 | "1951-08",199
 34 | "1951-09",184
 35 | "1951-10",162
 36 | "1951-11",146
 37 | "1951-12",166
 38 | "1952-01",171
 39 | "1952-02",180
 40 | "1952-03",193
 41 | "1952-04",181
 42 | "1952-05",183
 43 | "1952-06",218
 44 | "1952-07",230
 45 | "1952-08",242
 46 | "1952-09",209
 47 | "1952-10",191
 48 | "1952-11",172
 49 | "1952-12",194
 50 | "1953-01",196
 51 | "1953-02",196
 52 | "1953-03",236
 53 | "1953-04",235
 54 | "1953-05",229
 55 | "1953-06",243
 56 | "1953-07",264
 57 | "1953-08",272
 58 | "1953-09",237
 59 | "1953-10",211
 60 | "1953-11",180
 61 | "1953-12",201
 62 | "1954-01",204
 63 | "1954-02",188
 64 | "1954-03",235
 65 | "1954-04",227
 66 | "1954-05",234
 67 | "1954-06",264
 68 | "1954-07",302
 69 | "1954-08",293
 70 | "1954-09",259
 71 | "1954-10",229
 72 | "1954-11",203
 73 | "1954-12",229
 74 | "1955-01",242
 75 | "1955-02",233
 76 | "1955-03",267
 77 | "1955-04",269
 78 | "1955-05",270
 79 | "1955-06",315
 80 | "1955-07",364
 81 | "1955-08",347
 82 | "1955-09",312
 83 | "1955-10",274
 84 | "1955-11",237
 85 | "1955-12",278
 86 | "1956-01",284
 87 | "1956-02",277
 88 | "1956-03",317
 89 | "1956-04",313
 90 | "1956-05",318
 91 | "1956-06",374
 92 | "1956-07",413
 93 | "1956-08",405
 94 | "1956-09",355
 95 | "1956-10",306
 96 | "1956-11",271
 97 | "1956-12",306
 98 | "1957-01",315
 99 | "1957-02",301
100 | "1957-03",356
101 | "1957-04",348
102 | "1957-05",355
103 | "1957-06",422
104 | "1957-07",465
105 | "1957-08",467
106 | "1957-09",404
107 | "1957-10",347
108 | "1957-11",305
109 | "1957-12",336
110 | "1958-01",340
111 | "1958-02",318
112 | "1958-03",362
113 | "1958-04",348
114 | "1958-05",363
115 | "1958-06",435
116 | "1958-07",491
117 | "1958-08",505
118 | "1958-09",404
119 | "1958-10",359
120 | "1958-11",310
121 | "1958-12",337
122 | "1959-01",360
123 | "1959-02",342
124 | "1959-03",406
125 | "1959-04",396
126 | "1959-05",420
127 | "1959-06",472
128 | "1959-07",548
129 | "1959-08",559
130 | "1959-09",463
131 | "1959-10",407
132 | "1959-11",362
133 | "1959-12",405
134 | "1960-01",417
135 | "1960-02",391
136 | "1960-03",419
137 | "1960-04",461
138 | "1960-05",472
139 | "1960-06",535
140 | "1960-07",622
141 | "1960-08",606
142 | "1960-09",508
143 | "1960-10",461
144 | "1960-11",390
145 | "1960-12",432


--------------------------------------------------------------------------------
/010 Intro to Linear Algebra/01 Linear Equation, Solution, System of Linear Equations.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Linear Equation\n",
 8 |     "\n",
 9 |     "아래의 형태와 같은 형태를 띄고 있으면 Linear Equation 이라고 말할수 있습니다. \n",
10 |     "\n",
11 |     "$$ a_1 x_1 + a_2 x_2 + a_3 x_3 + ... + a_n x_n = y $$\n",
12 |     "\n",
13 |     "* $ x $: 변수 (variables)\n",
14 |     "* $ a $: 계수 (coefficients)\n",
15 |     "\n",
16 |     "각각의 수식들은 모두 linear operator (linear mapping, linear trasformation)에 기반하고 있습니다. <br>\n",
17 |     "간단하게 말하면 모두 일직선을 그리는 것이라고 생각하면 됩니다.\n",
18 |     "\n",
19 |     "![](images/linear-equation-01.png)\n",
20 |     "\n",
21 |     "이때 $ x^2 $, $ x^3 $, 또는 $ \\sqrt(x)$ 등등의 non-linear operation은 허용되지 않습니다.<br>\n",
22 |     "즉 1차식만 허용이 됩니다.\n",
23 |     "\n",
24 |     "\n"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "markdown",
29 |    "metadata": {},
30 |    "source": [
31 |     "# System of Linear Equations\n",
32 |     "\n",
33 |     "Linear Equation이 하나 이상이 모여서 만들어진 것이 Linear System 이라고 생각하면 됩니다. <br>\n",
34 |     "두개의 linear equations모두 직선의 방정식을 나타내고 있기 때문에, solution은 1개가 나올수 있습니다.<br>\n",
35 |     "아래의 그래프에서 빨간선과 파란색선이 교차하는 지점이 solution이라고 보면 됩니다.\n",
36 |     "\n",
37 |     "![](images/linear-equation-02.png)\n",
38 |     "\n",
39 |     "\n",
40 |     "Solution은 하나만 존재하거나, 아예 없거나, 무한대로 있을 수 있습니다.<br>\n",
41 |     "이때 Solution이 없을때는 \"Inconsistent\"하다고 말하며, Solution이 존재할때는 \"Consistent\"하다고 말합니다. (그냥 용어)\n",
42 |     "\n",
43 |     "![](images/system-linear-types.svg)"
44 |    ]
45 |   },
46 |   {
47 |    "cell_type": "markdown",
48 |    "metadata": {},
49 |    "source": [
50 |     "# Matrix Notation\n",
51 |     "\n",
52 |     "예를 들어서 다음의 linear system이 존재합니다.\n",
53 |     "\n",
54 |     "\n",
55 |     "$$ \\begin{align} \n",
56 |     "x_1 - 2x_2 + x_3 &= 0 \\\\\n",
57 |     "0  x_1 + 4x_2 -3x_3 &= 5 \\\\\n",
58 |     "-4x_1 + 5x_2 + 12x_3 &= -3\n",
59 |     "\\end{align} $$\n",
60 |     "\n",
61 |     "이것을 다음과 같이 coeficients만 가져와서 Matrix를 만들 수 있습니다.<br>\n",
62 |     "이때 우변에 있는 상수들을 표현했기 때문에 Augmented Matrix라고 하며, <br>\n",
63 |     "우변의 항이 없는 Matrix를 Coefficient Matrix라고 합니다.\n",
64 |     "\n",
65 |     "$$  \\left[\\begin{array}{rrr|r}\n",
66 |     "    1 & -2 & 1 & 0 \\\\\n",
67 |     "    0 & 4 & -3 & 5 \\\\\n",
68 |     "    -4 & 5 & 12 & -3\n",
69 |     "  \\end{array}\\right]\n",
70 |     "$$\n",
71 |     "\n",
72 |     "Linear system에서 우변에 있는 상수를 column vector로 추가한것을 Augmented Matrix라고 합니다.\n"
73 |    ]
74 |   }
75 |  ],
76 |  "metadata": {
77 |   "kernelspec": {
78 |    "display_name": "Python 3",
79 |    "language": "python",
80 |    "name": "python3"
81 |   },
82 |   "language_info": {
83 |    "codemirror_mode": {
84 |     "name": "ipython",
85 |     "version": 3
86 |    },
87 |    "file_extension": ".py",
88 |    "mimetype": "text/x-python",
89 |    "name": "python",
90 |    "nbconvert_exporter": "python",
91 |    "pygments_lexer": "ipython3",
92 |    "version": "3.6.7"
93 |   }
94 |  },
95 |  "nbformat": 4,
96 |  "nbformat_minor": 2
97 | }
98 | 


--------------------------------------------------------------------------------
/601 Dense Passage Retrieval/preprocess.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from pathlib import Path
 4 | from tempfile import gettempdir
 5 | from typing import Tuple
 6 | 
 7 | import numpy as np
 8 | from datasets import load_dataset
 9 | from sklearn.model_selection import train_test_split
10 | from tqdm import tqdm
11 | from transformers import AutoTokenizer
12 | 
13 | 
14 | def preprocess_raw_data():
15 |     np.random.seed(23)
16 |     data_dir_path = Path(gettempdir()) / 'korquad_preprocessed_data'
17 |     train_data_path = data_dir_path / 'train_data.json'
18 |     valid_data_path = data_dir_path / 'valid_data.json'
19 |     test_data_path = data_dir_path / 'test_data.json'
20 | 
21 |     if train_data_path.exists() and valid_data_path.exists() and test_data_path.exists():
22 |         return {'train': train_data_path, 'valid': valid_data_path, 'test': test_data_path}
23 | 
24 |     train_data, test_data = _download_data(data_dir_path)
25 | 
26 |     # Convert to Embedding Text
27 |     tokenizer = AutoTokenizer.from_pretrained('skt/kobert-base-v1')
28 |     train_data = _convert_to_encoded_data(tokenizer, train_data)
29 |     test_data = _convert_to_encoded_data(tokenizer, test_data)
30 | 
31 |     _save_to_json(train_data_path, train_data)
32 |     _save_to_json(test_data_path, test_data)
33 | 
34 |     return {'train': train_data_path,
35 |             'valid': valid_data_path,
36 |             'test': test_data_path}
37 | 
38 | 
39 | def _download_data(data_dir_path: Path) -> Tuple[list, list]:
40 |     if not data_dir_path.exists():
41 |         os.makedirs(data_dir_path)
42 |     dataset = load_dataset('squad_kor_v1', data_dir=str(data_dir_path))
43 |     train_data = dataset['train']
44 |     valid_data = dataset['validation']
45 | 
46 |     return train_data.to_list(), valid_data.to_list()
47 | 
48 | 
49 | def _convert_to_encoded_data(tokenizer, raw_data) -> list:
50 |     tokenized_data = []
51 |     for article in tqdm(raw_data):
52 |         article_id = article['id']
53 | 
54 |         title = article['title']
55 |         context = article['context']  # this is a paragraph
56 |         question = article['question']
57 | 
58 |         for answer, answer_start in zip(article['answers']['text'], article['answers']['answer_start']):
59 |             clue_start = max(0, answer_start - 5)
60 |             clue_end = min(len(context), answer_start + len(answer) + 5)
61 |             answer_clue = context[clue_start:clue_end]
62 |             article_id_int = int(article_id.split('-')[0])
63 | 
64 |             tokenized_data.append((question,  # question,
65 |                                    answer_clue,  # answer_clue,
66 |                                    article_id_int,
67 |                                    context,
68 |                                    answer,
69 |                                    title))
70 |     np.random.shuffle(tokenized_data)
71 |     return tokenized_data
72 | 
73 | 
74 | def _save_to_json(file_path: Path, data: list):
75 |     with open(file_path, 'wt', encoding='utf-8') as f:
76 |         json.dump(data, f)
77 | 


--------------------------------------------------------------------------------
/601 Dense Passage Retrieval/train.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lightning import Trainer
 4 | from lightning.pytorch.callbacks import ModelCheckpoint
 5 | from lightning.pytorch.loggers import TensorBoardLogger
 6 | from torch.utils.data import DataLoader
 7 | 
 8 | from data import KorQuadDataset, InBatchNegativeSampler, KorquadCollator
 9 | from preprocess import _download_data, preprocess_raw_data
10 | from model import KoBertBiEncoder
11 | 
12 | # 로거 생성 및 로그 레벨 설정
13 | logger = logging.getLogger(__name__)
14 | logger.setLevel(logging.INFO)
15 | 
16 | # 콘솔 출력을 위한 핸들러 생성 및 설정
17 | console_handler = logging.StreamHandler()
18 | console_handler.setLevel(logging.INFO)
19 | 
20 | 
21 | def train():
22 |     data_paths = preprocess_raw_data()
23 | 
24 |     model = KoBertBiEncoder(lr=1e-1, betas=(0.9, 0.99))
25 |     train_dataset = KorQuadDataset(data_paths['train'])
26 |     test_dataset = KorQuadDataset(data_paths['test'])
27 |     pad_id = train_dataset.pad_id
28 | 
29 |     # Max Sequence Size 는 GPU 메모리를 겁나게 갈가 먹습니다.
30 |     # 되도록 작은 값을 사용하도록 합니다.
31 |     train_loader = DataLoader(train_dataset,
32 |                               batch_sampler=InBatchNegativeSampler(train_dataset, batch_size=120, drop_last=False),
33 |                               collate_fn=KorquadCollator(pad_id=pad_id, max_seq_len=90),
34 |                               num_workers=4
35 |                               )
36 |     valid_loader = DataLoader(test_dataset,
37 |                               batch_sampler=InBatchNegativeSampler(test_dataset, batch_size=120, drop_last=False),
38 |                               collate_fn=KorquadCollator(pad_id=pad_id, max_seq_len=90),
39 |                               num_workers=4
40 |                               )
41 | 
42 |     checkpointer = ModelCheckpoint(
43 |         dirpath='./checkpoints',
44 |         filename='dense-passage-retrieval-{epoch:02d}-{step:06d}-{valid_loss:.4f}',
45 |         monitor='valid_loss',
46 |         every_n_epochs=1,
47 |         save_top_k=-1,
48 |         mode='min'
49 |     )
50 | 
51 |     # Resume 할때 필요
52 |     checkpoint_path = './checkpoints/dense-passage-retrieval-epoch=26-step=018465-valid_loss=4.3934.ckpt'
53 |     tensorboard = TensorBoardLogger('./tb_logs', name='dense-passage-retrieval')
54 |     trainer = Trainer(max_epochs=2000,
55 |                       accelerator='cuda',
56 |                       devices=1,
57 |                       enable_checkpointing=True,
58 |                       enable_progress_bar=True,
59 |                       enable_model_summary=True,
60 |                       # hard negative sampling - LightningDataModule 의 train_dataloader 함수에서 Subset 사용해서 추가
61 |                       reload_dataloaders_every_n_epochs=True,
62 |                       logger=tensorboard,
63 |                       callbacks=[checkpointer],
64 |                       )
65 |     trainer.fit(model, train_loader, valid_loader,
66 |                 ckpt_path=checkpoint_path
67 |                 )
68 |     # trainer.validate(model, valid_loaderB
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     train()
73 | 


--------------------------------------------------------------------------------
/data/basic_csv_data/gradient.csv:
--------------------------------------------------------------------------------
  1 | score-1,score-2,label
  2 | 34.62365962,78.02469282,0
  3 | 30.28671077,43.89499752,0
  4 | 35.84740877,72.90219803,0
  5 | 60.18259939,86.3085521,1
  6 | 79.03273605,75.34437644,1
  7 | 45.08327748,56.31637178,0
  8 | 61.10666454,96.51142588,1
  9 | 75.02474557,46.55401354,1
 10 | 76.0987867,87.42056972,1
 11 | 84.43281996,43.53339331,1
 12 | 95.86155507,38.22527806,0
 13 | 75.01365839,30.60326323,0
 14 | 82.30705337,76.4819633,1
 15 | 69.36458876,97.71869196,1
 16 | 39.53833914,76.03681085,0
 17 | 53.97105215,89.20735014,1
 18 | 69.07014406,52.74046973,1
 19 | 67.94685548,46.67857411,0
 20 | 70.66150955,92.92713789,1
 21 | 76.97878373,47.57596365,1
 22 | 67.37202755,42.83843832,0
 23 | 89.67677575,65.79936593,1
 24 | 50.53478829,48.85581153,0
 25 | 34.21206098,44.2095286,0
 26 | 77.92409145,68.97235999,1
 27 | 62.27101367,69.95445795,1
 28 | 80.19018075,44.82162893,1
 29 | 93.1143888,38.80067034,0
 30 | 61.83020602,50.25610789,0
 31 | 38.7858038,64.99568096,0
 32 | 61.37928945,72.80788731,1
 33 | 85.40451939,57.05198398,1
 34 | 52.10797973,63.12762377,0
 35 | 52.04540477,69.43286012,1
 36 | 40.23689374,71.16774802,0
 37 | 54.63510555,52.21388588,0
 38 | 33.91550011,98.86943574,0
 39 | 64.17698887,80.90806059,1
 40 | 74.78925296,41.57341523,0
 41 | 34.18364003,75.23772034,0
 42 | 83.90239366,56.30804622,1
 43 | 51.54772027,46.85629026,0
 44 | 94.44336777,65.56892161,1
 45 | 82.36875376,40.61825516,0
 46 | 51.04775177,45.82270146,0
 47 | 62.22267576,52.06099195,0
 48 | 77.19303493,70.4582,1
 49 | 97.77159928,86.72782233,1
 50 | 62.0730638,96.76882412,1
 51 | 91.5649745,88.69629255,1
 52 | 79.94481794,74.16311935,1
 53 | 99.27252693,60.999031,1
 54 | 90.54671411,43.39060181,1
 55 | 34.52451385,60.39634246,0
 56 | 50.28649612,49.80453881,0
 57 | 49.58667722,59.80895099,0
 58 | 97.64563396,68.86157272,1
 59 | 32.57720017,95.59854761,0
 60 | 74.24869137,69.82457123,1
 61 | 71.79646206,78.45356225,1
 62 | 75.39561147,85.75993667,1
 63 | 35.28611282,47.02051395,0
 64 | 56.2538175,39.26147251,0
 65 | 30.05882245,49.59297387,0
 66 | 44.66826172,66.45008615,0
 67 | 66.56089447,41.09209808,0
 68 | 40.45755098,97.53518549,1
 69 | 49.07256322,51.88321182,0
 70 | 80.27957401,92.11606081,1
 71 | 66.74671857,60.99139403,1
 72 | 32.72283304,43.30717306,0
 73 | 64.03932042,78.03168802,1
 74 | 72.34649423,96.22759297,1
 75 | 60.45788574,73.0949981,1
 76 | 58.84095622,75.85844831,1
 77 | 99.8278578,72.36925193,1
 78 | 47.26426911,88.475865,1
 79 | 50.4581598,75.80985953,1
 80 | 60.45555629,42.50840944,0
 81 | 82.22666158,42.71987854,0
 82 | 88.91389642,69.8037889,1
 83 | 94.83450672,45.6943068,1
 84 | 67.31925747,66.58935318,1
 85 | 57.23870632,59.51428198,1
 86 | 80.366756,90.9601479,1
 87 | 68.46852179,85.5943071,1
 88 | 42.07545454,78.844786,0
 89 | 75.47770201,90.424539,1
 90 | 78.63542435,96.64742717,1
 91 | 52.34800399,60.76950526,0
 92 | 94.09433113,77.15910509,1
 93 | 90.44855097,87.50879176,1
 94 | 55.48216114,35.57070347,0
 95 | 74.49269242,84.84513685,1
 96 | 89.84580671,45.35828361,1
 97 | 83.48916274,48.3802858,1
 98 | 42.26170081,87.10385094,1
 99 | 99.31500881,68.77540947,1
100 | 55.34001756,64.93193801,1
101 | 74.775893,89.5298129,1
102 | 


--------------------------------------------------------------------------------
/111 Linear Discriminant Analysis (LDA)/LDA Feature Extraction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Populating the interactive namespace from numpy and matplotlib\n"
 13 |      ]
 14 |     },
 15 |     {
 16 |      "name": "stderr",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "/usr/local/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
 20 |       "  return f(*args, **kwds)\n"
 21 |      ]
 22 |     }
 23 |    ],
 24 |    "source": [
 25 |     "%pylab inline\n",
 26 |     "import numpy as np\n",
 27 |     "import pandas as pd\n",
 28 |     "\n",
 29 |     "from sklearn.datasets import load_wine\n",
 30 |     "from sklearn.model_selection import train_test_split\n",
 31 |     "from sklearn.preprocessing import StandardScaler"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "# Data"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 2,
 44 |    "metadata": {},
 45 |    "outputs": [
 46 |     {
 47 |      "name": "stdout",
 48 |      "output_type": "stream",
 49 |      "text": [
 50 |       "train_x: (124, 13)\n",
 51 |       "train_y: (124,)\n",
 52 |       "test_x: (54, 13)\n",
 53 |       "test_y: (54,)\n"
 54 |      ]
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "wine = load_wine()\n",
 59 |     "\n",
 60 |     "data_x, data_y = wine['data'], wine['target']\n",
 61 |     "train_x, test_x, train_y, test_y = train_test_split(data_x, data_y, test_size=0.3, \n",
 62 |     "                                                    stratify=data_y, random_state=0)\n",
 63 |     "\n",
 64 |     "# Standardize the features\n",
 65 |     "sc = StandardScaler()\n",
 66 |     "train_x = sc.fit_transform(train_x)\n",
 67 |     "test_x = sc.transform(test_x)\n",
 68 |     "\n",
 69 |     "print('train_x:', train_x.shape)\n",
 70 |     "print('train_y:', train_y.shape)\n",
 71 |     "print('test_x:', test_x.shape)\n",
 72 |     "print('test_y:', test_y.shape)"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "# Scatter Matrix\n",
 80 |     "\n",
 81 |     "1. Within-class scatter matrix 그리고 between-class scatter matrix 를 생성합니다.\n",
 82 |     "2. 각 클래스마다 d-dimensional mean vector를 구한다. (d는 feature의 갯수)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": []
 91 |   }
 92 |  ],
 93 |  "metadata": {
 94 |   "kernelspec": {
 95 |    "display_name": "Python 3",
 96 |    "language": "python",
 97 |    "name": "python3"
 98 |   },
 99 |   "language_info": {
100 |    "codemirror_mode": {
101 |     "name": "ipython",
102 |     "version": 3
103 |    },
104 |    "file_extension": ".py",
105 |    "mimetype": "text/x-python",
106 |    "name": "python",
107 |    "nbconvert_exporter": "python",
108 |    "pygments_lexer": "ipython3",
109 |    "version": "3.6.4"
110 |   }
111 |  },
112 |  "nbformat": 4,
113 |  "nbformat_minor": 2
114 | }
115 | 


--------------------------------------------------------------------------------
/103 Logistic Regression/03 Predicting Breast Cancer with Scipy.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "from sklearn import datasets"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "### Data Preparation"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 13,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "breast = datasets.load_breast_cancer()\n",
 27 |     "data = breast.data\n",
 28 |     "target = breast.target\n",
 29 |     "\n",
 30 |     "# Shuffle\n",
 31 |     "# _rands = np.random.permutation(len(data))\n",
 32 |     "# data = data[_rands]\n",
 33 |     "# target = target[_rands]\n",
 34 |     "\n",
 35 |     "# # Min-max Standardization\n",
 36 |     "# for i in range(data.shape[1]):\n",
 37 |     "#     data[:, i] = (data[:, i] - data[:,i].min()) / (data[:,i].max() - data[:,i].min())\n",
 38 |     "    \n",
 39 |     "x_trains = data[:430]\n",
 40 |     "y_trains = target[:430]\n",
 41 |     "x_tests = data[430:]\n",
 42 |     "y_tests = target[430:]"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 18,
 48 |    "metadata": {},
 49 |    "outputs": [
 50 |     {
 51 |      "name": "stdout",
 52 |      "output_type": "stream",
 53 |      "text": [
 54 |       "0.9568345323741008\n"
 55 |      ]
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "from sklearn.linear_model import LogisticRegression\n",
 60 |     "\n",
 61 |     "lr = LogisticRegression(C=10, random_state=0, max_iter=10000)\n",
 62 |     "lr.fit(x_trains, y_trains)\n",
 63 |     "\n",
 64 |     "y_pred = lr.predict(x_tests) == y_tests\n",
 65 |     "print(y_pred.sum()/len(y_pred))"
 66 |    ]
 67 |   }
 68 |  ],
 69 |  "metadata": {
 70 |   "kernelspec": {
 71 |    "display_name": "Python 3",
 72 |    "language": "python",
 73 |    "name": "python3"
 74 |   },
 75 |   "language_info": {
 76 |    "codemirror_mode": {
 77 |     "name": "ipython",
 78 |     "version": 3
 79 |    },
 80 |    "file_extension": ".py",
 81 |    "mimetype": "text/x-python",
 82 |    "name": "python",
 83 |    "nbconvert_exporter": "python",
 84 |    "pygments_lexer": "ipython3",
 85 |    "version": "3.8.5"
 86 |   },
 87 |   "toc": {
 88 |    "base_numbering": 1,
 89 |    "nav_menu": {},
 90 |    "number_sections": true,
 91 |    "sideBar": true,
 92 |    "skip_h1_title": false,
 93 |    "title_cell": "Table of Contents",
 94 |    "title_sidebar": "Contents",
 95 |    "toc_cell": false,
 96 |    "toc_position": {},
 97 |    "toc_section_display": true,
 98 |    "toc_window_display": false
 99 |   },
100 |   "varInspector": {
101 |    "cols": {
102 |     "lenName": 16,
103 |     "lenType": 16,
104 |     "lenVar": 40
105 |    },
106 |    "kernels_config": {
107 |     "python": {
108 |      "delete_cmd_postfix": "",
109 |      "delete_cmd_prefix": "del ",
110 |      "library": "var_list.py",
111 |      "varRefreshCmd": "print(var_dic_list())"
112 |     },
113 |     "r": {
114 |      "delete_cmd_postfix": ") ",
115 |      "delete_cmd_prefix": "rm(",
116 |      "library": "var_list.r",
117 |      "varRefreshCmd": "cat(var_dic_list()) "
118 |     }
119 |    },
120 |    "types_to_exclude": [
121 |     "module",
122 |     "function",
123 |     "builtin_function_or_method",
124 |     "instance",
125 |     "_Feature"
126 |    ],
127 |    "window_display": false
128 |   }
129 |  },
130 |  "nbformat": 4,
131 |  "nbformat_minor": 1
132 | }
133 | 


--------------------------------------------------------------------------------
/032 Activations/01 Activations.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Activations \n",
  8 |     "\n",
  9 |     "[Dropout Neural Networks (with ReLU)](https://gist.github.com/yusugomori/cf7bce19b8e16d57488a)"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {
 16 |     "collapsed": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import numpy as np"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "### Identity"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {
 34 |     "collapsed": true
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "def identity(x):\n",
 39 |     "    return x\n",
 40 |     "\n",
 41 |     "def didentity(x):\n",
 42 |     "    return 1"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "metadata": {
 49 |     "collapsed": true
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "def sigmoid(x):\n",
 54 |     "    return 1. / (1 + numpy.exp(-x))\n",
 55 |     "\n",
 56 |     "def dsigmoid(x):\n",
 57 |     "    return x * (1. - x)\n",
 58 |     "\n",
 59 |     "def tanh(x):\n",
 60 |     "    return numpy.tanh(x)\n",
 61 |     "\n",
 62 |     "def dtanh(x):\n",
 63 |     "    return 1. - x * x\n",
 64 |     "\n",
 65 |     "def softmax(x):\n",
 66 |     "    e = numpy.exp(x - numpy.max(x))  # prevent overflow\n",
 67 |     "    if e.ndim == 1:\n",
 68 |     "        return e / numpy.sum(e, axis=0)\n",
 69 |     "    else:  \n",
 70 |     "        return e / numpy.array([numpy.sum(e, axis=1)]).T  # ndim = 2\n",
 71 |     "\n",
 72 |     "def ReLU(x):\n",
 73 |     "    return x * (x > 0)\n",
 74 |     "\n",
 75 |     "def dReLU(x):\n",
 76 |     "    return 1. * (x > 0)"
 77 |    ]
 78 |   }
 79 |  ],
 80 |  "metadata": {
 81 |   "kernelspec": {
 82 |    "display_name": "Python 3",
 83 |    "language": "python",
 84 |    "name": "python3"
 85 |   },
 86 |   "language_info": {
 87 |    "codemirror_mode": {
 88 |     "name": "ipython",
 89 |     "version": 3
 90 |    },
 91 |    "file_extension": ".py",
 92 |    "mimetype": "text/x-python",
 93 |    "name": "python",
 94 |    "nbconvert_exporter": "python",
 95 |    "pygments_lexer": "ipython3",
 96 |    "version": "3.8.5"
 97 |   },
 98 |   "toc": {
 99 |    "base_numbering": 1,
100 |    "nav_menu": {},
101 |    "number_sections": true,
102 |    "sideBar": true,
103 |    "skip_h1_title": false,
104 |    "title_cell": "Table of Contents",
105 |    "title_sidebar": "Contents",
106 |    "toc_cell": false,
107 |    "toc_position": {},
108 |    "toc_section_display": true,
109 |    "toc_window_display": false
110 |   },
111 |   "varInspector": {
112 |    "cols": {
113 |     "lenName": 16,
114 |     "lenType": 16,
115 |     "lenVar": 40
116 |    },
117 |    "kernels_config": {
118 |     "python": {
119 |      "delete_cmd_postfix": "",
120 |      "delete_cmd_prefix": "del ",
121 |      "library": "var_list.py",
122 |      "varRefreshCmd": "print(var_dic_list())"
123 |     },
124 |     "r": {
125 |      "delete_cmd_postfix": ") ",
126 |      "delete_cmd_prefix": "rm(",
127 |      "library": "var_list.r",
128 |      "varRefreshCmd": "cat(var_dic_list()) "
129 |     }
130 |    },
131 |    "types_to_exclude": [
132 |     "module",
133 |     "function",
134 |     "builtin_function_or_method",
135 |     "instance",
136 |     "_Feature"
137 |    ],
138 |    "window_display": false
139 |   }
140 |  },
141 |  "nbformat": 4,
142 |  "nbformat_minor": 2
143 | }
144 | 


--------------------------------------------------------------------------------
/500 Perceptron (mini-batch gradient descent)/Iris data classfication with Scikit-Learn.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Perceptron with Scikit-Learn"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stderr",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "/usr/local/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
 20 |       "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
 21 |      ]
 22 |     }
 23 |    ],
 24 |    "source": [
 25 |     "from sklearn import datasets\n",
 26 |     "from sklearn.cross_validation import train_test_split\n",
 27 |     "from sklearn.preprocessing import StandardScaler\n",
 28 |     "from sklearn.linear_model import Perceptron\n",
 29 |     "from sklearn.metrics import accuracy_score\n",
 30 |     "import numpy as np"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "### Data Preprocessing\n",
 38 |     "\n",
 39 |     "StandardScaler 클래스의 fit함수는 데이터의 sample mean $ u $ 그리고 standard deviation $ \\sigma $ 를 알아냅니다."
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 69,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "iris = datasets.load_iris()\n",
 49 |     "x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)\n",
 50 |     "\n",
 51 |     "sc = StandardScaler()\n",
 52 |     "sc.fit(x_train)\n",
 53 |     "x_train_std = sc.transform(x_train)\n",
 54 |     "x_test_std = sc.transform(x_test)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "### Train"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 78,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "np.random.seed(0)\n",
 71 |     "pn = Perceptron(max_iter=900, eta0=0.001, random_state=0)\n",
 72 |     "pn.fit(x_train_std, y_train)\n",
 73 |     "y_predicted = pn.predict(x_test_std)"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "### Test"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 77,
 86 |    "metadata": {},
 87 |    "outputs": [
 88 |     {
 89 |      "name": "stdout",
 90 |      "output_type": "stream",
 91 |      "text": [
 92 |       "accuracy: 0.933333333333\n",
 93 |       "accuracy: 0.933333333333\n"
 94 |      ]
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "print('accuracy:', (y_predicted == y_test).sum()/float(len(y_test)))\n",
 99 |     "print('accuracy:', accuracy_score(y_test, y_predicted))"
100 |    ]
101 |   }
102 |  ],
103 |  "metadata": {
104 |   "kernelspec": {
105 |    "display_name": "Python 3",
106 |    "language": "python",
107 |    "name": "python3"
108 |   },
109 |   "language_info": {
110 |    "codemirror_mode": {
111 |     "name": "ipython",
112 |     "version": 3
113 |    },
114 |    "file_extension": ".py",
115 |    "mimetype": "text/x-python",
116 |    "name": "python",
117 |    "nbconvert_exporter": "python",
118 |    "pygments_lexer": "ipython3",
119 |    "version": "3.6.1"
120 |   }
121 |  },
122 |  "nbformat": 4,
123 |  "nbformat_minor": 1
124 | }
125 | 


--------------------------------------------------------------------------------
/008 Mathmatics for ML/01 Derivatives Rules.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "444e7fba-a452-492b-a5ea-344acfe28fdf",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# 1. Derivatives Rules\n",
  9 |     "\n",
 10 |     "## 1.1 Rules\n",
 11 |     "\n",
 12 |     "derivative of x 에 대한 테이블\n",
 13 |     "\n",
 14 |     "| Rule        | Function     | Derivative     |\n",
 15 |     "|:------------|:-------------|:---------------| \n",
 16 |     "| Constant    | c            | 0              |\n",
 17 |     "| Line        | x            | 1              |\n",
 18 |     "|             | ax           | a              |\n",
 19 |     "| Square      | $ x^2 $      | $ 2x $         |\n",
 20 |     "| Square Root | $ \\sqrt{x} $ | $ ½ x^{-½} $   |\n",
 21 |     "| Exponential | $ e^x $      | $ e^x $        |\n",
 22 |     "|             | $ a^x $      | $ \\ln(a) a^x $ |\n",
 23 |     "| Logarithms  | $ \\ln(x) $   | 1/x            |\n",
 24 |     "|             | $ \\log_a(x) $| 1/ (x ln(a))   |\n",
 25 |     "\n",
 26 |     "Function Rules!\n",
 27 |     "\n",
 28 |     "| Rule                      | Function     | Derivative     |\n",
 29 |     "|:--------------------------|:-------------|:---------------|\n",
 30 |     "| Mutiplication by constant | cf           | cf`            |\n",
 31 |     "| Power Rule                | $ x^n $      | $ nx^{n-1} $   |\n",
 32 |     "| Sum Rule                  | f + g        | f' + g'        |\n",
 33 |     "| Difference Rule           | f - g        | f' - g'        |\n",
 34 |     "| Product Rule              | fg           | fg' + f'g      |\n",
 35 |     "| Quotient Rule             | f/g          | $ \\frac{f'g - g'f}{g^2} $ |\n",
 36 |     "| Reciprocal Rule           | 1/f           | $ \\frac{-f'}{f^2} $       |\n",
 37 |     "| Chain Rule                | $ f \\cdot g $ | $ (f \\cdot g) g' $    |\n",
 38 |     "| Chain Rule with '         | f(g(x))       | f'(g(x)) g'(x) |"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "id": "2b5ee8bd-7de6-412c-9c1e-ed870f781f0e",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "### $ \\frac{d}{dx} x^3 $ 에 대한 값은? \n",
 47 |     "\n",
 48 |     "derivative of $ x^3 $ 은 power rule 을 적용합니다. \n",
 49 |     "\n",
 50 |     "$$ \\begin{aligned} \\frac{d}{dx} x^n &= nx^{n-1}  \\\\\n",
 51 |     "\\frac{d}{dx} x^3 &= 3x^{3-1} = 3x^2\n",
 52 |     "\\end{aligned} $$\n",
 53 |     "\n"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "id": "d55ea26e-69be-45f8-9841-14fefc19576e",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "### x^2 + x^4 에 대한 값은? \n",
 62 |     "\n",
 63 |     "derivative of f + g = f' + g' 인 sum rule 을 사용. \n",
 64 |     "\n",
 65 |     "$$ \\begin{aligned} \n",
 66 |     "\\frac{d}{dx} \\left[ x^2 + x^4 \\right] = 2x + 4x^3\n",
 67 |     "\\end{aligned} $$"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "id": "29551409-1fcd-4d2f-996b-1914ff868875",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "### Chain Rule 예제 \n",
 76 |     "\n",
 77 |     "what is the derivative of $ f(x) = (3x + 1)^5 $ ?\n",
 78 |     "\n",
 79 |     "\n",
 80 |     "기본적으로 chain rule 은 바깥쪽에서 미분한번 하고, 안쪽에서 다시 미분하고.. 서로 곱하면 됨. <br>\n",
 81 |     "여기서 바깥쪽은 5제곱한 것이고, 안쪽은 3x + 1 임\n",
 82 |     "\n",
 83 |     "\n",
 84 |     "$$ f'(x) = 5(3x+1)^4 (3x +1)' = 5(3x+1)^4 \\times 3  $$"
 85 |    ]
 86 |   }
 87 |  ],
 88 |  "metadata": {
 89 |   "kernelspec": {
 90 |    "display_name": "Python 3 (ipykernel)",
 91 |    "language": "python",
 92 |    "name": "python3"
 93 |   },
 94 |   "language_info": {
 95 |    "codemirror_mode": {
 96 |     "name": "ipython",
 97 |     "version": 3
 98 |    },
 99 |    "file_extension": ".py",
100 |    "mimetype": "text/x-python",
101 |    "name": "python",
102 |    "nbconvert_exporter": "python",
103 |    "pygments_lexer": "ipython3",
104 |    "version": "3.8.10"
105 |   }
106 |  },
107 |  "nbformat": 4,
108 |  "nbformat_minor": 5
109 | }
110 | 


--------------------------------------------------------------------------------
/017 Hyperparameter Optimization/02 Grid Search (Hacker Rank).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Grid Search\n",
 19 |     "\n",
 20 |     "Grid search는 global Optimization으로서 search methods의 한 종류 입니다.<br>\n",
 21 |     "아래와 같은 패턴으로 검색을 하게 됩니다. <br>\n",
 22 |     "(그 외의 검색 방법으로 Spiral Search, Strip Search 등등이 있습니다.)\n",
 23 |     "\n",
 24 |     "<img src=\"./images/grid-search.jpg\">"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "# The Grid Search (Hacker Rank)\n",
 32 |     "\n",
 33 |     "* [Hacker Rank - Grid Search](https://www.hackerrank.com/challenges/the-grid-search)\n",
 34 |     "\n",
 35 |     "다음과 같은 2 dimansional matrix의 패턴을 찾아내는 것이 문제.\n",
 36 |     "\n",
 37 |     "1234567890  \n",
 38 |     "09**876543**21  \n",
 39 |     "11**111111**11  \n",
 40 |     "11**111111**11  \n",
 41 |     "2222222222  \n",
 42 |     "\n",
 43 |     "예를 들어서 위의 matrix에서 다음과 같은 패턴을 찾아내는것. \n",
 44 |     "\n",
 45 |     "876543  \n",
 46 |     "111111  \n",
 47 |     "111111"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "# Problem"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 63,
 60 |    "metadata": {
 61 |     "collapsed": false
 62 |    },
 63 |    "outputs": [
 64 |     {
 65 |      "data": {
 66 |       "text/plain": [
 67 |        "True"
 68 |       ]
 69 |      },
 70 |      "execution_count": 63,
 71 |      "metadata": {},
 72 |      "output_type": "execute_result"
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "x = np.array([[7, 2, 8, 3, 4, 5, 5, 8, 6, 4], \n",
 77 |     "              [6, 7, 3, 1, 1, 5, 8, 6, 1, 9],\n",
 78 |     "              [8, 9, 8, 8, 2, 4, 2, 6, 4, 3],\n",
 79 |     "              [3, 8, 3, 0, 5, 8, 9, 3, 2, 4],\n",
 80 |     "              [2, 2, 2, 9, 5, 0, 5, 8, 1, 3],\n",
 81 |     "              [5, 6, 3, 3, 8, 4, 5, 3, 7, 4],\n",
 82 |     "              [6, 4, 7, 3, 5, 3, 0, 2, 9, 3], \n",
 83 |     "              [7, 0, 5, 3, 1, 0, 6, 6, 0, 1],\n",
 84 |     "              [0, 8, 3, 4, 2, 8, 2, 9, 5, 6],\n",
 85 |     "              [4, 6, 0, 7, 9, 2, 4, 1, 3, 7]])\n",
 86 |     "\n",
 87 |     "\n",
 88 |     "y = np.array([[9, 5, 0, 5], \n",
 89 |     "              [3, 8, 4, 5], \n",
 90 |     "              [3, 5, 3, 0]])\n",
 91 |     "\n",
 92 |     "def match(x, y, xi, xj):\n",
 93 |     "    for yi in range(y.shape[0]):\n",
 94 |     "        for yj in range(y.shape[1]):\n",
 95 |     "            try:\n",
 96 |     "                if x[xi + yi][xj + yj] != y[yi][yj]:\n",
 97 |     "                    return False\n",
 98 |     "            except IndexError:\n",
 99 |     "                return False\n",
100 |     "    return True\n",
101 |     "\n",
102 |     "def grid_search(x, y):\n",
103 |     "    for idx, v in np.ndenumerate(x):\n",
104 |     "        matched = match(x, y, *idx)\n",
105 |     "        if matched:\n",
106 |     "            return True\n",
107 |     "    return False\n",
108 |     "        \n",
109 |     "\n",
110 |     "grid_search(x, y)"
111 |    ]
112 |   }
113 |  ],
114 |  "metadata": {
115 |   "kernelspec": {
116 |    "display_name": "Python 3",
117 |    "language": "python",
118 |    "name": "python3"
119 |   },
120 |   "language_info": {
121 |    "codemirror_mode": {
122 |     "name": "ipython",
123 |     "version": 3
124 |    },
125 |    "file_extension": ".py",
126 |    "mimetype": "text/x-python",
127 |    "name": "python",
128 |    "nbconvert_exporter": "python",
129 |    "pygments_lexer": "ipython3",
130 |    "version": "3.6.0"
131 |   }
132 |  },
133 |  "nbformat": 4,
134 |  "nbformat_minor": 2
135 | }
136 | 


--------------------------------------------------------------------------------
/503 International Airline Passengers prediction/rnn_with_numpy.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | from datetime import datetime
  3 | from random import randint
  4 | 
  5 | import numpy as np
  6 | 
  7 | 
  8 | class RNNNumpy(object):
  9 |     def __init__(self, word_dim, hidden_dim=100, bptt_truncate=4):
 10 |         self.word_dim = word_dim
 11 |         self.hidden_dim = hidden_dim
 12 |         self.bptt_truncate = bptt_truncate
 13 | 
 14 |         self.U = np.random.uniform(-1. / np.sqrt(word_dim), 1. / np.sqrt(word_dim), (hidden_dim, word_dim))
 15 |         self.V = np.random.uniform(-1. / np.sqrt(word_dim), 1. / np.sqrt(word_dim), (word_dim, hidden_dim))
 16 |         self.W = np.random.uniform(-1. / np.sqrt(word_dim), 1. / np.sqrt(word_dim), (hidden_dim, hidden_dim))
 17 | 
 18 |     def forward_propagation(self, x):
 19 |         """
 20 |         :param x: 한문장이 되는.. 정수값을 갖는 vector
 21 |         :return:
 22 |         """
 23 |         # The total number of time steps
 24 |         T = len(x)
 25 | 
 26 |         # During forward propagation we save all hidden states in s because need them later.
 27 |         # We add one additional element for the initial hidden, which we set to 0
 28 |         s = np.zeros((T + 1, self.hidden_dim))
 29 | 
 30 |         # The outputs at each time step. Again, we save them for later.
 31 |         o = np.zeros((T, self.word_dim))
 32 | 
 33 |         for t in range(T):
 34 |             s[t] = np.tanh(self.U[:, x[t]] + self.W.dot(s[t - 1]))
 35 |             o[t] = self.softmax(self.V.dot(s[t]))
 36 | 
 37 |         return [o, s]
 38 | 
 39 |     def cross_entropy(self, x, y):
 40 |         """
 41 |         :param x: sentence
 42 |         """
 43 |         N = len(y)
 44 |         o, s = self.forward_propagation(x)
 45 |         predicted_output = o[np.arange(len(y)), y]
 46 |         L = np.sum(np.log(predicted_output))
 47 |         return -1 * L / N
 48 | 
 49 |     def bptt(self, x, y):
 50 |         """
 51 |         :param x: an array of a sentence
 52 |         """
 53 |         T = len(y)
 54 |         o, s = self.forward_propagation(x)
 55 |         dldU = np.zeros(self.U.shape)
 56 |         dldV = np.zeros(self.V.shape)
 57 |         dldW = np.zeros(self.W.shape)
 58 |         delta_o = o
 59 |         delta_o[np.arange(T), y] -= 1.
 60 | 
 61 |         for t in np.arange(T)[::-1]:
 62 |             dldV += np.outer(delta_o[t], s[t].T)
 63 |             # Initial delta calculation
 64 |             delta_t = self.V.T.dot(delta_o[t]) * (1 - (s[t] ** 2))
 65 | 
 66 |             for bptt_step in np.arange(max(0, t - self.bptt_truncate), t + 1)[::-1]:
 67 |                 dldW += np.outer(delta_t, s[bptt_step - 1])
 68 |                 dldU[:, x[bptt_step]] += delta_t
 69 |                 delta_t = self.W.T.dot(delta_t) * (1 - s[bptt_step - 1] ** 2)
 70 |         return dldU, dldV, dldW
 71 | 
 72 |     def calculate_gradients(self, x, y, learning_rate=0.005):
 73 |         # Calculate the gradients
 74 |         dLdU, dLdV, dLdW = self.bptt(x, y)
 75 |         # Change parameters according to gradients and learning rate
 76 |         self.U -= learning_rate * dLdU
 77 |         self.V -= learning_rate * dLdV
 78 |         self.W -= learning_rate * dLdW
 79 | 
 80 |     def train(self, x_train, y_train, learning_rate=0.005, npoch=100):
 81 |         N = len(y_train)
 82 |         loss_show = N / 10
 83 | 
 84 |         print('Start Training')
 85 |         print('Total Data: ', N)
 86 | 
 87 |         for i in range(npoch):
 88 |             # One SGD step
 89 |             rand_idx = randint(0, N-1)
 90 |             self.calculate_gradients(x_train[rand_idx], y_train[rand_idx], learning_rate)
 91 |             if i % 100 == 0:
 92 |                 self._print_error(self.cross_entropy(x_train[i], y_train[i]), i)
 93 | 
 94 |     @staticmethod
 95 |     def _print_error(cost, i):
 96 |         t = datetime.now()
 97 |         '{time}: i={i}, cost={cost}'.format(time=t, i=i, cost=cost)
 98 | 
 99 |     def softmax(self, v, t=1.0):
100 |         e = np.exp(v / t)
101 |         return e / np.sum(e)
102 | 
103 |     def predict(self, x):
104 |         o, x = self.forward_propagation(x)
105 |         return np.argmax(o, axis=1)
106 | 


--------------------------------------------------------------------------------
/502 Convolutional Neural Network/02 Transposed Convolution .ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Transposed Convolution \n",
  8 |     "\n",
  9 |     "Transposed convolution은 일반적인 convolution의 연산의 반대로 transformation을 할때 사용을 합니다.<br>\n",
 10 |     "대부분 Deconvolution으로 알고 있지만, 잘못된 표현입니다. 예를 들어서 auto encoder를 하게 될때, Encoder에서 black box를 통해 convolution을 하게 됩니다. 이후 Decoder부분에서는 다시 black box를 통해서 deconvolution을 하게 됩니다. 이경우 원래 input image의 spatial resolution (공간적 모양 - 픽셀의 양) 로 black box를 통해 돌아가는 것이므로 deconvolution이라는 표현이 맞습니다. \n",
 11 |     "(Deconvolution은 수학적 연산이라기 보다는 input으로 사용된 spatial resolution 돌아가는 개념정도로 이해하는게 좋을듯 합니다)\n",
 12 |     "\n",
 13 |     "Transposed convolution은 deconvolution과 마찬가지로 동일한 spatial resolution으로 돌아가기 때문입니다.<br>\n",
 14 |     "다른점은 수학적으로 연산하는 방법 자체가 다릅니다.<br>\n",
 15 |     "Transposed convolution은 일반적인 convolution과 동일하지만 spatial transformation을 반대로 합니다."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "## No zero padding, unit strides"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {},
 29 |    "outputs": [
 30 |     {
 31 |      "name": "stdout",
 32 |      "output_type": "stream",
 33 |      "text": [
 34 |       "Populating the interactive namespace from numpy and matplotlib\n"
 35 |      ]
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "%pylab inline\n",
 40 |     "import numpy as np\n",
 41 |     "from scipy import signal as sg"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 82,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "name": "stdout",
 51 |      "output_type": "stream",
 52 |      "text": [
 53 |       "[[12 12 17]\n",
 54 |       " [10 17 19]\n",
 55 |       " [ 9  6 14]]\n"
 56 |      ]
 57 |     }
 58 |    ],
 59 |    "source": [
 60 |     "image = np.array([[3, 3, 2, 1, 0], \n",
 61 |     "                  [0, 0, 1, 3, 1], \n",
 62 |     "                  [3, 1, 2, 2, 3],\n",
 63 |     "                  [2, 0, 0, 2, 2],\n",
 64 |     "                  [2, 0, 0, 0, 1]])\n",
 65 |     "kernel = np.array([[0, 1, 2], \n",
 66 |     "                   [2, 2, 0], \n",
 67 |     "                   [0, 1, 2]])\n",
 68 |     "output = sg.correlate2d(image, kernel, mode='valid')\n",
 69 |     "print(output)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 80,
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "name": "stdout",
 79 |      "output_type": "stream",
 80 |      "text": [
 81 |       "[[8]]\n",
 82 |       "[[0 3 2]\n",
 83 |       " [0 0 0]\n",
 84 |       " [3 0 0]]\n",
 85 |       "8\n"
 86 |      ]
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "image = np.array([[3, 3, 2], \n",
 91 |     "                  [0, 0, 1], \n",
 92 |     "                  [3, 1, 2]])\n",
 93 |     "\n",
 94 |     "kernel = np.array([[0, 1, 1], \n",
 95 |     "                   [0, 0, 0], \n",
 96 |     "                   [1, 0, 0]])\n",
 97 |     "# kernel = np.array([[0, 1, 2], \n",
 98 |     "#                    [2, 2, 0], \n",
 99 |     "#                    [0, 1, 2]])\n",
100 |     "output = sg.correlate2d(kernel, image, mode='valid')\n",
101 |     "print(output)\n",
102 |     "\n",
103 |     "print(image * kernel)\n",
104 |     "print( (image * kernel).sum())"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "sg.deconvolve()"
114 |    ]
115 |   }
116 |  ],
117 |  "metadata": {
118 |   "kernelspec": {
119 |    "display_name": "Python 3",
120 |    "language": "python",
121 |    "name": "python3"
122 |   },
123 |   "language_info": {
124 |    "codemirror_mode": {
125 |     "name": "ipython",
126 |     "version": 3
127 |    },
128 |    "file_extension": ".py",
129 |    "mimetype": "text/x-python",
130 |    "name": "python",
131 |    "nbconvert_exporter": "python",
132 |    "pygments_lexer": "ipython3",
133 |    "version": "3.6.4"
134 |   }
135 |  },
136 |  "nbformat": 4,
137 |  "nbformat_minor": 2
138 | }
139 | 


--------------------------------------------------------------------------------
/601 Dense Passage Retrieval/data.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | from pathlib import Path
  4 | from typing import Optional, List, Union, Iterator, Iterable, Sized
  5 | 
  6 | import torch
  7 | from torch.nn.utils.rnn import pad_sequence
  8 | from torch.utils.data import Dataset, BatchSampler, Sampler, RandomSampler, SequentialSampler
  9 | from transformers import AutoTokenizer
 10 | 
 11 | 
 12 | class KorQuadDataset(Dataset):
 13 | 
 14 |     def __init__(self, kor_quad_path: Union[str, Path]):
 15 |         kor_quad_path: Path = Path(kor_quad_path)
 16 |         with open(kor_quad_path, 'r') as f:
 17 |             self.data: List[tuple] = json.load(f)
 18 |             logging.info(f'Data Loaded from {kor_quad_path}')
 19 | 
 20 |         self.tokenizer = AutoTokenizer.from_pretrained('skt/kobert-base-v1')
 21 |         self.pad_id = self.tokenizer.pad_token_id
 22 | 
 23 |     @property
 24 |     def dataset(self) -> List[tuple]:
 25 |         return self.data
 26 | 
 27 |     def __len__(self):
 28 |         return len(self.data)
 29 | 
 30 |     def __getitem__(self, idx) -> tuple:
 31 |         encoded_question, encoded_context, article_id, context, answer, title = self.data[idx]
 32 | 
 33 |         return encoded_question, encoded_context, article_id
 34 | 
 35 | 
 36 | class InBatchNegativeSampler(BatchSampler):
 37 |     """
 38 |     In-batch negative 학습을 위해서 중복 answer를 갖지 않도록 batch 를 구성합니다.
 39 |     따라서 중복되는 데이터 일부를  패스 하기 때문에, Dataset 에서의 전체 데이터셋의 크기보다 작을 수 있습니다.
 40 |     """
 41 | 
 42 |     ARTICLE_ID_IDX = 2
 43 | 
 44 |     def __init__(self, data_source: Sized,
 45 |                  batch_size: int,
 46 |                  drop_last: bool = False,
 47 |                  shuffle: bool = False) -> None:
 48 | 
 49 |         # Sampler 사용하기전에, RandomSampler 또는 SequentialSampler 를 wrapping 해야 됩니다.
 50 |         if shuffle:
 51 |             sampler = RandomSampler(data_source)
 52 |         else:
 53 |             sampler = SequentialSampler(data_source)
 54 |         super().__init__(sampler, batch_size=batch_size, drop_last=drop_last)
 55 | 
 56 |     def __iter__(self) -> Iterator[List[int]]:
 57 |         """
 58 |         만약 ground truth 값이 있다면 (y=1), 하나만 남겨두고 전부다 negative 로 둬도 될듯.
 59 |         """
 60 |         duplicates = set()
 61 |         sampled_ids = []
 62 |         for idx in self.sampler:
 63 |             item = self.sampler.data_source[idx]
 64 |             article_id = item[self.ARTICLE_ID_IDX]
 65 | 
 66 |             if article_id in duplicates:
 67 |                 logging.info(f'duplicated article_id: {article_id}')
 68 |                 continue
 69 | 
 70 |             sampled_ids.append(idx)
 71 |             duplicates.add(article_id)
 72 |             if len(sampled_ids) >= self.batch_size:
 73 |                 yield sampled_ids
 74 |                 sampled_ids.clear()
 75 |                 duplicates.clear()
 76 | 
 77 |         if len(sampled_ids) > 0 and not self.drop_last:
 78 |             yield sampled_ids
 79 | 
 80 | 
 81 | class KorquadCollator:
 82 |     def __init__(self, pad_id: int, max_seq_len: int) -> None:
 83 |         self.pad_id = pad_id
 84 |         self.max_seq_len = max_seq_len
 85 |         self.tokenizer = AutoTokenizer.from_pretrained('skt/kobert-base-v1')
 86 | 
 87 |     def __call__(self, batch: List[tuple]):
 88 |         """
 89 |          - batch_q: batch of encoded questions
 90 |          - batch_p: batch of paragraphs (context)
 91 |         """
 92 |         encoded_q = [self.tokenizer.encode(i[0]) for i in batch]
 93 |         encoded_p = [self.tokenizer.encode(i[1]) for i in batch]
 94 | 
 95 |         batch_q = pad_sequence([torch.LongTensor(q[:min(self.max_seq_len, len(q))]) for q in encoded_q],
 96 |                                batch_first=True,
 97 |                                padding_value=self.pad_id)
 98 |         batch_q_attn_mask = torch.Tensor(batch_q != self.pad_id).long()
 99 |         batch_p = pad_sequence([torch.LongTensor(p[:min(self.max_seq_len, len(p))]) for p in encoded_p],
100 |                                batch_first=True,
101 |                                padding_value=self.pad_id)
102 |         batch_p_attn_mask = torch.Tensor(batch_p != self.pad_id).long()
103 | 
104 |         return batch_q, batch_q_attn_mask, batch_p, batch_p_attn_mask
105 | 


--------------------------------------------------------------------------------
/data/linear-regression/linear-regression.csv:
--------------------------------------------------------------------------------
  1 | 32.502345269453031,31.70700584656992
  2 | 53.426804033275019,68.77759598163891
  3 | 61.530358025636438,62.562382297945803
  4 | 47.475639634786098,71.546632233567777
  5 | 59.813207869512318,87.230925133687393
  6 | 55.142188413943821,78.211518270799232
  7 | 52.211796692214001,79.64197304980874
  8 | 39.299566694317065,59.171489321869508
  9 | 48.10504169176825,75.331242297063056
 10 | 52.550014442733818,71.300879886850353
 11 | 45.419730144973755,55.165677145959123
 12 | 54.351634881228918,82.478846757497919
 13 | 44.164049496773352,62.008923245725825
 14 | 58.16847071685779,75.392870425994957
 15 | 56.727208057096611,81.43619215887864
 16 | 48.955888566093719,60.723602440673965
 17 | 44.687196231480904,82.892503731453715
 18 | 60.297326851333466,97.379896862166078
 19 | 45.618643772955828,48.847153317355072
 20 | 38.816817537445637,56.877213186268506
 21 | 66.189816606752601,83.878564664602763
 22 | 65.41605174513407,118.59121730252249
 23 | 47.48120860786787,57.251819462268969
 24 | 41.57564261748702,51.391744079832307
 25 | 51.84518690563943,75.380651665312357
 26 | 59.370822011089523,74.765564032151374
 27 | 57.31000343834809,95.455052922574737
 28 | 63.615561251453308,95.229366017555307
 29 | 46.737619407976972,79.052406169565586
 30 | 50.556760148547767,83.432071421323712
 31 | 52.223996085553047,63.358790317497878
 32 | 35.567830047746632,41.412885303700563
 33 | 42.436476944055642,76.617341280074044
 34 | 58.16454011019286,96.769566426108199
 35 | 57.504447615341789,74.084130116602523
 36 | 45.440530725319981,66.588144414228594
 37 | 61.89622268029126,77.768482417793024
 38 | 33.093831736163963,50.719588912312084
 39 | 36.436009511386871,62.124570818071781
 40 | 37.675654860850742,60.810246649902211
 41 | 44.555608383275356,52.682983366387781
 42 | 43.318282631865721,58.569824717692867
 43 | 50.073145632289034,82.905981485070512
 44 | 43.870612645218372,61.424709804339123
 45 | 62.997480747553091,115.24415280079529
 46 | 32.669043763467187,45.570588823376085
 47 | 40.166899008703702,54.084054796223612
 48 | 53.575077531673656,87.994452758110413
 49 | 33.864214971778239,52.725494375900425
 50 | 64.707138666121296,93.576118692658241
 51 | 38.119824026822805,80.166275447370964
 52 | 44.502538064645101,65.101711570560326
 53 | 40.599538384552318,65.562301260400375
 54 | 41.720676356341293,65.280886920822823
 55 | 51.088634678336796,73.434641546324301
 56 | 55.078095904923202,71.13972785861894
 57 | 41.377726534895203,79.102829683549857
 58 | 62.494697427269791,86.520538440347153
 59 | 49.203887540826003,84.742697807826218
 60 | 41.102685187349664,59.358850248624933
 61 | 41.182016105169822,61.684037524833627
 62 | 50.186389494880601,69.847604158249183
 63 | 52.378446219236217,86.098291205774103
 64 | 50.135485486286122,59.108839267699643
 65 | 33.644706006191782,69.89968164362763
 66 | 39.557901222906828,44.862490711164398
 67 | 56.130388816875467,85.498067778840223
 68 | 57.362052133238237,95.536686846467219
 69 | 60.269214393997906,70.251934419771587
 70 | 35.678093889410732,52.721734964774988
 71 | 31.588116998132829,50.392670135079896
 72 | 53.66093226167304,63.642398775657753
 73 | 46.682228649471917,72.247251068662365
 74 | 43.107820219102464,57.812512976181402
 75 | 70.34607561504933,104.25710158543822
 76 | 44.492855880854073,86.642020318822006
 77 | 57.50453330326841,91.486778000110135
 78 | 36.930076609191808,55.231660886212836
 79 | 55.805733357942742,79.550436678507609
 80 | 38.954769073377065,44.847124242467601
 81 | 56.901214702247074,80.207523139682763
 82 | 56.868900661384046,83.14274979204346
 83 | 34.33312470421609,55.723489260543914
 84 | 59.04974121466681,77.634182511677864
 85 | 57.788223993230673,99.051414841748269
 86 | 54.282328705967409,79.120646274680027
 87 | 51.088719898979143,69.588897851118475
 88 | 50.282836348230731,69.510503311494389
 89 | 44.211741752090113,73.687564318317285
 90 | 38.005488008060688,61.366904537240131
 91 | 32.940479942618296,67.170655768995118
 92 | 53.691639571070056,85.668203145001542
 93 | 68.76573426962166,114.85387123391394
 94 | 46.230966498310252,90.123572069967423
 95 | 68.319360818255362,97.919821035242848
 96 | 50.030174340312143,81.536990783015028
 97 | 49.239765342753763,72.111832469615663
 98 | 50.039575939875988,85.232007342325673
 99 | 48.149858891028863,66.224957888054632
100 | 25.128484647772304,53.454394214850524
101 | 


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/test.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import cv2
  4 | import gym
  5 | import tensorflow as tf
  6 | import tflearn
  7 | from environment import Environment
  8 | 
  9 | 
 10 | def test_argmax():
 11 |     input = tf.placeholder('float32', [10])
 12 |     max = tf.argmax(input, dimension=0)
 13 | 
 14 |     init = tf.initialize_all_variables()
 15 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1, allow_growth=True)
 16 |     with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
 17 |         sess.run(init)
 18 |         assert 8 == sess.run(max, feed_dict={input: [10, 20, 30, 40, 50, 60, 70, 80, 90, 0]})
 19 |         assert 0 == sess.run(max, feed_dict={input: [1000, 0, 0, 0, 0, 0, 0, 0, 0, 0]})
 20 | 
 21 | 
 22 | def test_one_hot_vector():
 23 |     input1 = tf.placeholder('int64', [None])
 24 | 
 25 |     # One hot vector
 26 |     one_hot_tf = tf.one_hot(input1, depth=5, on_value=1., off_value=0.)
 27 |     data = [0, 1, 2, 3, 1, 2]
 28 |     answer = [[1., 0., 0., 0., 0.],
 29 |               [0., 1., 0., 0., 0.],
 30 |               [0., 0., 1., 0., 0.],
 31 |               [0., 0., 0., 1., 0.],
 32 |               [0., 1., 0., 0., 0.],
 33 |               [0., 0., 1., 0., 0.]]
 34 | 
 35 |     # Reduced Sum
 36 |     reduced_sum = tf.reduce_sum(one_hot_tf, reduction_indices=1)
 37 |     reduced_sum_answer = [1., 1., 1., 1., 1., 1.]
 38 | 
 39 |     init = tf.initialize_all_variables()
 40 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1, allow_growth=True)
 41 |     with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
 42 |         sess.run(init)
 43 |         one_hot = sess.run(one_hot_tf, feed_dict={input1: data})
 44 |         assert np.array_equal(answer, one_hot)
 45 |         assert np.array_equal(reduced_sum_answer, sess.run(reduced_sum, feed_dict={one_hot_tf: one_hot}))
 46 | 
 47 | 
 48 | def test_clip_by_value():
 49 |     input1 = tf.placeholder('int64', [None])
 50 |     cliped_data = tf.clip_by_value(input1, clip_value_min=2, clip_value_max=5)
 51 |     data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
 52 |     answer = [2, 2, 3, 4, 5, 5, 5, 5, 5, 5]
 53 | 
 54 |     init = tf.initialize_all_variables()
 55 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1, allow_growth=True)
 56 |     with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
 57 |         sess.run(init)
 58 |         assert np.array_equal(answer, cliped_data.eval({input1: data}))
 59 | 
 60 | 
 61 | def test_weird():
 62 |     def build_dqn(num_actions, action_repeat):
 63 |         """
 64 |         Building a DQN.
 65 |         """
 66 |         inputs = tf.placeholder(tf.float32, [None, action_repeat, 84, 84])
 67 |         # Inputs shape: [batch, channel, height, width] need to be changed into
 68 |         # shape [batch, height, width, channel]
 69 |         net = tf.transpose(inputs, [0, 2, 3, 1])
 70 |         net1 = tflearn.conv_2d(net, 32, 8, strides=4, activation='relu')
 71 |         net2 = tflearn.conv_2d(net1, 64, 4, strides=2, activation='relu')
 72 |         net3 = tflearn.fully_connected(net2, 256, activation='relu')
 73 |         q_values = tflearn.fully_connected(net3, num_actions)
 74 |         return inputs, net1, q_values
 75 | 
 76 |     env = Environment('Breakout-v0')
 77 |     inputs, net1, q_values = build_dqn(env.action_size, 4)
 78 | 
 79 |     init = tf.initialize_all_variables()
 80 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1, allow_growth=True)
 81 |     with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
 82 |         sess.run(init)
 83 | 
 84 |         screens = env.get_initial_states()
 85 |         net_screens = []
 86 |         for i in range(30):
 87 |             screens = []
 88 |             for j in range(4):
 89 |                 action = env.random_action()
 90 |                 screen, reward, done, info = env.step(action)
 91 |                 screens.append(screen)
 92 |             if done:
 93 |                 env.reset()
 94 |                 break
 95 |             net_screens.append(screens)
 96 | 
 97 |         predicted_actions = q_values.eval(session=sess, feed_dict={inputs: net_screens})
 98 |         action_indices = np.argmax(predicted_actions, axis=1)
 99 |         print predicted_actions
100 |         print action_indices
101 | 


--------------------------------------------------------------------------------
/501 Autoencoder/01 Introduction Auto-Encoder.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Introduction Auto-Encoder \n",
  8 |     "\n",
  9 |     "### Autoencoders\n",
 10 |     "\n",
 11 |     "2000이후 가장 중요한 결과물중에 하나는 Deep Belief Networks입니다. Deep Belief Network는 초기는 random initialization이 아닌 각각의 Layers들을 unsupervised learning algorithm으로 미리학습(pretraining)시키는게 더 좋은 결과를 내 놓는다는 아이디어에 근거를 둡니다. Deep Belief Networks는 Restricted Boltzmann Machines, 그리고 Deep Autoencoders에 기반을 두고 있습니다."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "### Data Compression via Autoencoders\n",
 19 |     "\n",
 20 |     "예를 들어서 모바일 데이터를 클라우드로 보내려고 합니다.<br>\n",
 21 |     "이때 데이터는 다음과 같은 좌표로 구성이 되어 있으며, 눈으로 보면 알수 있듯이 x에 비해서 y값이 2배이상 빠르게 증가하는것을 알 수 있습니다.\n",
 22 |     "\n",
 23 |     "<img src=\"images/autoencoder01.png\" class=\"img-responsive img-rounded\">\n",
 24 |     "\n",
 25 |     "즉 이러한 관계를 이용하여, 데이터의 1 dimension만 클라우드로 보낸다면 데이터의 양을 compress할 수 있을 것입니다. <br>\n",
 26 |     "클라우드에서는 받은 데이터를 2배정도 증가시켜서 대략의 데이터값을 복원할 수 있습니다. \n",
 27 |     "\n",
 28 |     "1. **Encoding**: $ x^{(i)} $ data의 compress 해서 $ z^{(i)} $ data로 변환시킵니다.\n",
 29 |     "2. **Sending**: $ z^{(i)} $를 클라우드로 보냅니다.\n",
 30 |     "3. **Decoding**: $ z^{(i)}$ 데이터를  $ \\hat{x}^{(i)} $ 로 변환시킵니다.\n",
 31 |     "\n",
 32 |     "### $$ z^{(i)} = W_1  x^{(i)} + b_1$$\n",
 33 |     "\n",
 34 |     "### $$ \\hat{x}^{(i)} = W_2 z^{(i)} + b_2 $$"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Cost function\n",
 42 |     "\n",
 43 |     "Gradient descent를 활용하기 위해서 sum of sqaured error (SSE)를 사용합니다.\n",
 44 |     "\n",
 45 |     "### $$ \\sum^m_{i=1} \\left( \\hat{x}^{(i)} - x^{(i)} \\right)  $$\n",
 46 |     "\n",
 47 |     "###  $$ = \\sum^m_{i=1} \\left( W_2 z^{(i)} + b_2 - x^{(i)} \\right)  $$\n",
 48 |     "\n",
 49 |     "###  $$ = \\sum^m_{i=1} \\left( W_2 (W_1  x^{(i)} + b_1) + b_2 - x^{(i)} \\right)  $$"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "### Autoencoders as an initialization method\n",
 57 |     "\n",
 58 |     "Autoencoders는 data compression, visualization등등에서 활용될수 있습니다. 2006~2007년 사이에 Autoencoders가 neural network를 pretrain하는데 사용될수 있음을 알게됩니다. \n",
 59 |     "\n",
 60 |     "\n",
 61 |     "**Pretraining Steps**\n",
 62 |     "\n",
 63 |     "1. \n"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "### References\n",
 71 |     "\n",
 72 |     "* [haha](http://ai.stanford.edu/~quocle/tutorial2.pdf)"
 73 |    ]
 74 |   }
 75 |  ],
 76 |  "metadata": {
 77 |   "kernelspec": {
 78 |    "display_name": "Python 3",
 79 |    "language": "python",
 80 |    "name": "python3"
 81 |   },
 82 |   "language_info": {
 83 |    "codemirror_mode": {
 84 |     "name": "ipython",
 85 |     "version": 3
 86 |    },
 87 |    "file_extension": ".py",
 88 |    "mimetype": "text/x-python",
 89 |    "name": "python",
 90 |    "nbconvert_exporter": "python",
 91 |    "pygments_lexer": "ipython3",
 92 |    "version": "3.8.5"
 93 |   },
 94 |   "toc": {
 95 |    "base_numbering": 1,
 96 |    "nav_menu": {},
 97 |    "number_sections": true,
 98 |    "sideBar": true,
 99 |    "skip_h1_title": false,
100 |    "title_cell": "Table of Contents",
101 |    "title_sidebar": "Contents",
102 |    "toc_cell": false,
103 |    "toc_position": {},
104 |    "toc_section_display": true,
105 |    "toc_window_display": false
106 |   },
107 |   "varInspector": {
108 |    "cols": {
109 |     "lenName": 16,
110 |     "lenType": 16,
111 |     "lenVar": 40
112 |    },
113 |    "kernels_config": {
114 |     "python": {
115 |      "delete_cmd_postfix": "",
116 |      "delete_cmd_prefix": "del ",
117 |      "library": "var_list.py",
118 |      "varRefreshCmd": "print(var_dic_list())"
119 |     },
120 |     "r": {
121 |      "delete_cmd_postfix": ") ",
122 |      "delete_cmd_prefix": "rm(",
123 |      "library": "var_list.r",
124 |      "varRefreshCmd": "cat(var_dic_list()) "
125 |     }
126 |    },
127 |    "types_to_exclude": [
128 |     "module",
129 |     "function",
130 |     "builtin_function_or_method",
131 |     "instance",
132 |     "_Feature"
133 |    ],
134 |    "window_display": false
135 |   }
136 |  },
137 |  "nbformat": 4,
138 |  "nbformat_minor": 2
139 | }
140 | 


--------------------------------------------------------------------------------
/010 Intro to Linear Algebra/02 Vector & Linear Combination.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Vector\n",
  8 |     "\n",
  9 |     "Vector는 $ \\mathbb{R}^2 $ 처럼 표시할수 있으며, 여기서 R은 real number를 뜻합니다.\n",
 10 |     "\n",
 11 |     "Column vector $ \\mathbf{v} $ 의 예제는 다음과 같습니다.\n",
 12 |     "\n",
 13 |     "$$ \\mathbf{v} =  \\begin{bmatrix} a \\\\ b \\end{bmatrix} $$\n",
 14 |     "\n",
 15 |     "$ \\mathbf{v} $ 를 transpose시켜서 row vector로 만들면 다음과 같습니다.\n",
 16 |     "\n",
 17 |     "$$ \\mathbf{v}^T = (a, b) $$\n",
 18 |     "\n",
 19 |     "이때 중요한점은 row vector로 표기시 반드시 \"괄호 ()\"를 사용해서 표기를 해줍니다.<br>\n",
 20 |     "만약 \\[\\]를  사용해서 표기시 matrix를 표시하는 것입니다.<br>\n",
 21 |     "\n",
 22 |     "$$ [a, b] \\ne (a, b) $$\n",
 23 |     "\n",
 24 |     "앞의 $ [a, b ]$ 는 matrix $ \\mathbb{R}^{1 x 4} $ 를 나타냅니다.<br>\n",
 25 |     "$ \\begin{bmatrix} a \\\\ b \\end{bmatrix} $ 는 $ \\mathbb{R}^{4 x 1} $ matrix를 나타냅니다.\n",
 26 |     "\n"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "# Vector의 기하학적 의미\n",
 34 |     "\n",
 35 |     "Vector는 고등학교때 배웠듯이, 크기(magnitude) 그리고 방향(Direction)을 갖는다고 했는데.. <br>\n",
 36 |     "여기서 크기란 원점 (0, 0)에서 vector의 점까지 길이를 말하는 것이고, 방향은 아래서 보이는 화살표가 가르키는 방향을 나타냅니다.\n",
 37 |     "\n",
 38 |     "![](images/linear-equation-03.png)"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "## Vector Addition\n",
 46 |     "\n",
 47 |     "2개의 vector u 그리고 v 를 합했을때, 평행사변형(parallelogram)의 대각선 방향이 u+v를 나타냅니다.<br>\n",
 48 |     "또는 물리학에서는 u + v는 이동한 displacement(변위)를 나타내기도 합니다.\n",
 49 |     "\n",
 50 |     "![](images/linear-equation-04.png)\n",
 51 |     "\n",
 52 |     "\n",
 53 |     "> distance는 실제 이동한거리이고, 변위(displacement)는 마지막위치 - 시작위치 입니다."
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "## Vector Subtraction\n",
 61 |     "\n",
 62 |     "2개의 vector u 그리고 v의 차를 구했을때, 평행사변형(parallelogram)의 합했을때의 반대되는 대각선 방향이 v - u 를 나타냅니다.<br>\n",
 63 |     "\n",
 64 |     "$$ v - u = v + (-u) $$\n",
 65 |     "\n",
 66 |     "![](images/linear-equation-05.png)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "## Scaling\n",
 74 |     "\n",
 75 |     "Vecotor u에다가 scalar 값을 곱하는 것은 scalar multiplication이라고 합니다. <br>\n",
 76 |     "의미는 vector의 scale을 변형시킵니다.\n",
 77 |     "\n",
 78 |     "\n",
 79 |     "예를 들어서 2u 는 vector의 방향은 바꾸지 않지만, 크기(magnitude)를 변형시킵니다. <br>\n",
 80 |     "또는 -1u 를 곱하게 되면 크기는 변형시키지 않지만, 방향을 반대로 바꿉니다.\n",
 81 |     "\n",
 82 |     "\n",
 83 |     "![](images/linear-equation-06.png)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "# Linear Combination\n",
 91 |     "\n",
 92 |     "Vectors $ v_1, v_2, ..., v_n $ 이 있고, sclars 값 $ c_1, c_2, ..., c_n $ 이 있을때, <br>\n",
 93 |     "Linear Combination of $ v_1, ..., v_n $ with weights $ c_1, ..., c_2 $ 는 다음과 같이 표현합니다.\n",
 94 |     "\n",
 95 |     "$$ y = c_1 \\mathbf{v}_1 +  c_2 \\mathbf{v}_2 + ... + c_n \\mathbf{v}_n $$\n",
 96 |     "\n",
 97 |     "예를 들어서 아래의 3개의 equations도 모두 linear combinations이라고 할 수 있습니다.\n",
 98 |     "\n",
 99 |     "$$ \\begin{align} \\sqrt{3}  v_1 + v_2 &= y \\\\\n",
100 |     "\\sqrt{3}v_1 + 0 \\cdot v_2 &= g \\\\ \n",
101 |     "0 \\cdot v_1 + 0 \\cdot v_2 &= z\n",
102 |     "\\end{align} $$"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "metadata": {},
108 |    "source": [
109 |     "## 기하학적 표현\n",
110 |     "\n",
111 |     "Vector v 그리고 u가 있을때 linear combination을 이용해서 vector w를  표현할는 방법은 다음과 같이 합니다.\n",
112 |     "\n",
113 |     "![](images/linear-equation-07.png)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "## 문제\n",
121 |     "\n"
122 |    ]
123 |   }
124 |  ],
125 |  "metadata": {
126 |   "kernelspec": {
127 |    "display_name": "Python 3",
128 |    "language": "python",
129 |    "name": "python3"
130 |   },
131 |   "language_info": {
132 |    "codemirror_mode": {
133 |     "name": "ipython",
134 |     "version": 3
135 |    },
136 |    "file_extension": ".py",
137 |    "mimetype": "text/x-python",
138 |    "name": "python",
139 |    "nbconvert_exporter": "python",
140 |    "pygments_lexer": "ipython3",
141 |    "version": "3.6.7"
142 |   }
143 |  },
144 |  "nbformat": 4,
145 |  "nbformat_minor": 2
146 | }
147 | 


--------------------------------------------------------------------------------
/600 Transformer Machine Translation/beam_search.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from sentencepiece import SentencePieceProcessor
  3 | from torch import nn
  4 | 
  5 | from model import TransformerModule
  6 | from typing import Optional, List
  7 | 
  8 | 
  9 | class BeamSearch:
 10 | 
 11 |     def __init__(self, model: TransformerModule, sp: SentencePieceProcessor, device: Optional[str] = None,
 12 |                  max_seq_len: int = 128):
 13 |         self.model = model
 14 |         self.sp = sp
 15 |         self.max_seq_len = max_seq_len
 16 |         self.device = device if device else torch.device("cuda" if torch.cuda.is_available() else "cpu")
 17 | 
 18 |     def greedy_search_from_text(self, text):
 19 |         self.model.eval()
 20 |         self.model.to(self.device)
 21 | 
 22 |         src_tensor = self.create_source_tensor(text)
 23 |         src_padding_mask = self.get_padding_mask(src_tensor, pad_idx=self.sp.pad_id())
 24 |         return self.greedy_search(src_tensor, src_padding_mask)
 25 | 
 26 |     def greedy_search(self, src_tensor: torch.Tensor, src_padding_mask: torch.Tensor):
 27 |         batch_size = src_tensor.shape[0]
 28 |         # Get initial encoder output
 29 |         # if we wrap it with `torch.no_grad()` it doesn't work for some reason.
 30 |         with torch.enable_grad():
 31 |             memory = self.model.encode(src_tensor, src_padding_mask=src_padding_mask)
 32 |             memory = memory.to(self.device)
 33 |             mask = torch.zeros(batch_size).type(torch.bool).to(self.device)
 34 | 
 35 |         with torch.no_grad():
 36 |             # Create decoder input.
 37 |             # it starts with <bos> token.
 38 |             y_pred = (
 39 |                 torch.ones(batch_size, 1)
 40 |                 .fill_(self.sp.bos_id())
 41 |                 .type(torch.long)
 42 |                 .to(self.device)
 43 |             )
 44 | 
 45 |             for i in range(self.max_seq_len - 1):
 46 |                 tgt_mask = (nn.Transformer.generate_square_subsequent_mask(y_pred.size(1))
 47 |                             .type(torch.bool).to(self.device))
 48 |                 out = self.model.decode(y_pred, memory, tgt_mask)
 49 |                 prob = self.model.out(out[:, -1])
 50 |                 _, next_words = torch.max(prob, dim=1)
 51 | 
 52 |                 y_pred = torch.cat(
 53 |                     [y_pred,
 54 |                      next_words.masked_fill(mask, self.sp.pad_id()).type_as(src_tensor.data).unsqueeze(1)], dim=1).to(
 55 |                     self.device)
 56 | 
 57 |                 mask |= next_words == self.sp.eos_id()
 58 |                 if mask.all().item():
 59 |                     break
 60 | 
 61 |         return y_pred, prob
 62 | 
 63 |     def convert_output_to_text(self, y_pred: torch.Tensor):
 64 |         batch_size = y_pred.shape[0]
 65 |         output = [None] * batch_size
 66 |         for i in range(batch_size):
 67 |             output[i] = self.sp.Decode(y_pred[i].tolist())
 68 |         return output
 69 | 
 70 |     def create_source_tensor(self, texts: List[str]) -> torch.Tensor:
 71 |         # Create src input
 72 |         batch_size = len(texts)
 73 |         src_tensor = torch.zeros(batch_size, self.max_seq_len, dtype=torch.int32).to(self.model.device)
 74 | 
 75 |         for i, text in enumerate(texts):
 76 |             src_tokenized = self.sp.Encode(text, add_bos=True, add_eos=True)
 77 |             src_tokenized = src_tokenized[:self.max_seq_len]
 78 |             if src_tokenized[-1] != self.sp.eos_id():
 79 |                 src_tokenized[-1] = self.sp.eos_id()
 80 | 
 81 |             src_tensor[i, :len(src_tokenized)] = torch.Tensor(src_tokenized)
 82 |         src_tensor = src_tensor.to(self.device)
 83 |         return src_tensor
 84 | 
 85 |     @staticmethod
 86 |     def get_padding_mask(seq, pad_idx: int):
 87 |         return torch.tensor(seq == pad_idx).to(seq.device)
 88 | 
 89 | 
 90 | if __name__ == '__main__':
 91 |     from model import TransformerModule
 92 |     import sentencepiece as spm
 93 | 
 94 |     sp = spm.SentencePieceProcessor()
 95 |     sp.load("sp-bpt-anderson.model")
 96 | 
 97 |     model_path = (
 98 |         "checkpoints/machine-translation-mode=1.0-epoch=94-step=415625-loss=1.9441.ckpt"
 99 |     )
100 |     model = TransformerModule.load_from_checkpoint(model_path)
101 |     model.eval()
102 | 
103 |     search = BeamSearch(model, sp, device='cpu')
104 |     output, probs = search.greedy_search_from_text(["제가 이번 여름 휴가 보낸 이야기를 할게요.", "너가 나한테 돈 빌린것 지금 갚아"])
105 | 
106 |     print(search.convert_output_to_text(output))
107 | 


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/replay.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Modification of
  3 | https://github.com/carpedm20/deep-rl-tensorflow/blob/master/agents/experience.py
  4 | """
  5 | import numpy as np
  6 | import random
  7 | 
  8 | 
  9 | class ExperienceReplay(object):
 10 |     def __init__(self, env, action_repeat=4, batch_size=32, memory_size=500000):
 11 |         dims = list(env.dims)
 12 | 
 13 |         self.action_repeat = action_repeat
 14 |         self.batch_size = batch_size
 15 |         self.memory_size = memory_size
 16 | 
 17 |         self.actions = np.empty(self.memory_size, dtype=np.uint8)
 18 |         self.rewards = np.empty(self.memory_size, dtype=np.int8)
 19 |         self.screens = np.empty([self.memory_size] + dims, dtype=np.float16)
 20 |         self.terminals = np.empty(self.memory_size, dtype=np.bool)
 21 | 
 22 |         # pre-allocate prestates and poststates for minibatch
 23 |         self.prestates = np.empty([self.batch_size, self.action_repeat] + dims, dtype=np.float16)
 24 |         self.poststates = np.empty([self.batch_size, self.action_repeat] + dims, dtype=np.float16)
 25 | 
 26 |         self.count = 0
 27 |         self.current = 0
 28 | 
 29 |     def add(self, screen, reward, action, terminal):
 30 |         self.actions[self.current] = action
 31 |         self.rewards[self.current] = reward
 32 |         self.screens[self.current, ...] = screen
 33 |         self.terminals[self.current] = terminal
 34 |         self.count = max(self.count, self.current + 1)
 35 |         self.current = (self.current + 1) % self.memory_size
 36 | 
 37 | 
 38 | 
 39 |     def getState(self, index):
 40 |         assert self.count > 0, "replay memory is empy, use at least --random_steps 1"
 41 |         # normalize index to expected range, allows negative indexes
 42 |         index = index % self.count
 43 |         # if is not in the beginning of matrix
 44 |         if index >= self.action_repeat - 1:
 45 |             # use faster slicing
 46 |             return self.screens[(index - (self.action_repeat - 1)):(index + 1), ...]
 47 |         else:
 48 |             # otherwise normalize indexes and use slower list based access
 49 |             indexes = [(index - i) % self.count for i in reversed(range(self.action_repeat))]
 50 |             return self.screens[indexes, ...]
 51 | 
 52 |     def sample(self):
 53 |         # memory must include poststate, prestate and history
 54 |         assert self.count > self.action_repeat
 55 |         # sample random indexes
 56 |         indexes = []
 57 |         while len(indexes) < self.batch_size:
 58 |             # find random index
 59 |             while True:
 60 |                 # sample one index (ignore states wraping over
 61 |                 index = random.randint(self.action_repeat, self.count - 1)
 62 |                 # if wraps over current pointer, then get new one
 63 |                 if index >= self.current and index - self.action_repeat < self.current:
 64 |                     continue
 65 |                 # if wraps over episode end, then get new one
 66 |                 # NB! poststate (last screen) can be terminal state!
 67 |                 if self.terminals[(index - self.action_repeat):index].any():
 68 |                     continue
 69 |                 # otherwise use this index
 70 |                 break
 71 | 
 72 |             # NB! having index first is fastest in C-order matrices
 73 |             self.prestates[len(indexes), ...] = self.getState(index - 1)
 74 |             self.poststates[len(indexes), ...] = self.getState(index)
 75 |             indexes.append(index)
 76 | 
 77 |         actions = self.actions[indexes]
 78 |         rewards = self.rewards[indexes]
 79 |         terminals = self.terminals[indexes]
 80 | 
 81 |         return self.prestates, actions, rewards, self.poststates, terminals
 82 | 
 83 |     def retrieve(self, index=None):
 84 |         """
 85 |         Retrieve 4 screens (4 is action_repeat)
 86 |         """
 87 |         if index is None:
 88 |             index = min(self.count, self.memory_size)
 89 | 
 90 |         index = index % self.count
 91 |         if index >= self.action_repeat - 1:
 92 |             return self.screens[(index - (self.action_repeat - 1)):(index + 1), ...]
 93 |         else:
 94 |             indexes = [(index - i) % self.count for i in reversed(range(self.action_repeat))]
 95 |             return self.screens[indexes, ...]
 96 | 
 97 |     @property
 98 |     def available(self):
 99 |         return self.count >= self.action_repeat
100 | 
101 |     @property
102 |     def size(self):
103 |         return self.count
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     ExperienceReplay()
108 | 


--------------------------------------------------------------------------------
/601 Dense Passage Retrieval/model.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Tuple
  2 | 
  3 | import lightning as pl
  4 | import torch.nn
  5 | import transformers
  6 | from lightning.pytorch.utilities.types import OptimizerLRScheduler, STEP_OUTPUT
  7 | from torch.optim.lr_scheduler import CyclicLR
  8 | from transformers import BertModel
  9 | import torch.nn.functional as F
 10 | 
 11 | 
 12 | class KoBertBiEncoder(pl.LightningModule):
 13 | 
 14 |     def __init__(self, lr=0.00001, betas: Tuple[float, float] = (0.9, 0.999)) -> None:
 15 |         super().__init__()
 16 |         self.save_hyperparameters()
 17 |         self.lr = lr
 18 |         self.betas = betas
 19 | 
 20 |         self.quesion_encoder = BertModel.from_pretrained('skt/kobert-base-v1')
 21 |         self.passage_encoder = BertModel.from_pretrained('skt/kobert-base-v1')
 22 |         self.output_embedding_size = self.passage_encoder.pooler.dense.out_features  # 768
 23 | 
 24 |     def forward(self, x: torch.LongTensor, attention_mask: torch.LongTensor, is_question: bool) -> torch.FloatTensor:
 25 |         if is_question:
 26 |             return self.quesion_encoder(x, attention_mask=attention_mask).pooler_output
 27 |         return self.passage_encoder(x, attention_mask=attention_mask).pooler_output
 28 | 
 29 |     def training_step(self, batch, batch_idx):
 30 |         similarities = self.do_similarity_step(batch)
 31 |         loss = self.ibn_loss(similarities)
 32 |         batch_acc = self.calculate_batch_accuracy(similarities)
 33 | 
 34 |         self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
 35 |         self.log('train_batch_acc', batch_acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)
 36 | 
 37 |         return loss
 38 | 
 39 |     def validation_step(self, batch, batch_idx):
 40 |         similarities = self.do_similarity_step(batch)
 41 | 
 42 |         loss = self.ibn_loss(similarities)
 43 |         batch_acc = self.calculate_batch_accuracy(similarities)
 44 | 
 45 |         self.log('valid_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
 46 |         self.log('valid_batch_acc', batch_acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)
 47 |         return loss
 48 | 
 49 |     def do_similarity_step(self, batch):
 50 |         q_encoded, q_attn_mask, p_encoded, p_attn_mask = batch
 51 |         q_encoded, q_attn_mask, p_encoded, p_attn_mask = (
 52 |             q_encoded.to(self.device),
 53 |             q_attn_mask.to(self.device),
 54 |             p_encoded.to(self.device),
 55 |             p_attn_mask.to(self.device)
 56 |         )
 57 | 
 58 |         q_emb = self(q_encoded, attention_mask=q_attn_mask, is_question=True)
 59 |         p_emb = self(p_encoded, attention_mask=p_attn_mask, is_question=False)
 60 |         # 이게 들어가면 학습이 안됨. 이유는 모르겠음.
 61 |         # 같은 값으로 예측을 하는 이슈가 발생함.
 62 |         # 예를 들어서 [1, 1, 1, 1, 1, 1] 이런식으로 (마지막에 argmax 구했을때)
 63 |         # q_emb = F.normalize(q_emb, p=2, dim=1)
 64 |         # p_emb = F.normalize(p_emb, p=2, dim=1)
 65 |         similarities = torch.matmul(q_emb, p_emb.T)  # calculate similarity
 66 | 
 67 |         return similarities
 68 | 
 69 |     def ibn_loss(self, pred: torch.Tensor) -> torch.Tensor:
 70 |         """
 71 |         in-batch negative loss
 72 |         """
 73 |         batch_size = pred.size(0)
 74 |         targets = torch.arange(batch_size, dtype=torch.long).to(self.device)
 75 | 
 76 |         # cross entropy uses log softmax + nll_loss
 77 |         # so it's the same as
 78 |         # sim_scores = F.log_softmax(pred)
 79 |         # loss = F.nll_loss(sim_scores, self._targets)
 80 |         loss = torch.nn.functional.cross_entropy(pred, targets)
 81 |         print('loss:', loss.item())
 82 |         return loss
 83 | 
 84 |     def calculate_batch_accuracy(self, pred: torch.Tensor):
 85 |         """
 86 |         Batch 내에서의 accuracy 를 계산
 87 |         """
 88 |         batch_size = pred.size(0)
 89 |         target = torch.arange(batch_size)  # target = [0, 1, 2, 3, 4, ...]
 90 |         # max(1) -> values 그리고 indices 가 있으며, indices 는 max value 의 index 번호 입니다.
 91 |         # 즉 [0, 1, 2, 3, 4 .. ] 이렇게 나와서 target 과 일치하는게 몇개 있는지 계산 합니다.
 92 |         pred_max_indices = pred.detach().cpu().max(1).indices
 93 |         print('pred_max_indices:', pred_max_indices.tolist()[:40])
 94 |         return (pred_max_indices == target).sum().float() / batch_size
 95 | 
 96 |     def configure_optimizers(self) -> OptimizerLRScheduler:
 97 |         optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr, betas=self.betas)
 98 |         scheduler = transformers.get_linear_schedule_with_warmup(optimizer, 1000, 100000)
 99 |         return [optimizer], [scheduler]
100 | 


--------------------------------------------------------------------------------
/034 Classifier/01 Softmax.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Populating the interactive namespace from numpy and matplotlib\n"
 13 |      ]
 14 |     }
 15 |    ],
 16 |    "source": [
 17 |     "%pylab inline\n",
 18 |     "import numpy as np"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "# Softmax Classifier\n",
 26 |     "\n",
 27 |     "* http://cs231n.github.io/linear-classify/#softmax 참고 \n",
 28 |     "* https://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative/\n",
 29 |     "\n",
 30 |     "\n",
 31 |     "Softmax function은 N-dimensional vector를 받아서 다시 N-dimensional vector로 return을 합니다. <br>\n",
 32 |     "이때 output은 0~1 사이의 확률분포를 갖으며, 전체의 합은 1이 됩니다. $ S(a) : \\mathbb{R}^{\\mathbb{N}} \\rightarrow \\mathbb{R}^{\\mathbb{N}} $\n",
 33 |     "\n",
 34 |     "각각의 element단위의 공식은 다음과 같습니다. \n",
 35 |     "\n",
 36 |     "$$ S_i = \\frac{e^{x_i}}{\\sum^N_{k=1} e^{x_k}} $$\n",
 37 |     "\n",
 38 |     "$ S_i $ 는 exponent연산을 하기 때문에 항상 positive 값을 갖습니다.<br>\n",
 39 |     "또한 numerator부분이 denominator에서 합쳐져서 나오기 때문에  0~1 사이의 확률분포로 값이 나오게 됩니다."
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "# Derivative of softmax"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "## Example 1\n",
 54 |     "\n",
 55 |     "아래 예제에서 `[1.0, 2.0, 3.0]` 은 softmax함수에 의해서 `[0.09, 0.24, 0.67]` 로 transform되어야 하며, 합은 1이 되어야 합니다."
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 2,
 61 |    "metadata": {},
 62 |    "outputs": [
 63 |     {
 64 |      "name": "stdout",
 65 |      "output_type": "stream",
 66 |      "text": [
 67 |       "value    : [1, 2, 3]\n",
 68 |       "softmax  : [ 0.09003057  0.24472847  0.66524096]\n",
 69 |       "summed up: 1.0\n"
 70 |      ]
 71 |     }
 72 |    ],
 73 |    "source": [
 74 |     "def softmax(x):\n",
 75 |     "    exp_x = np.exp(x)\n",
 76 |     "    return exp_x/np.sum(exp_x, axis=0)\n",
 77 |     "\n",
 78 |     "a = [1, 2, 3]\n",
 79 |     "b = softmax(a)\n",
 80 |     "\n",
 81 |     "print('value    :', a)\n",
 82 |     "print('softmax  :', b)\n",
 83 |     "print('summed up:', np.sum(b))"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "## Example 2\n",
 91 |     "\n",
 92 |     "* [Udacity - Softmax](https://classroom.udacity.com/courses/ud730/lessons/6370362152/concepts/63815621490923#)\n",
 93 |     "\n",
 94 |     "1번정답 <br>\n",
 95 |     "```\n",
 96 |     "[ 0.09003057  0.24472847  0.66524096]\n",
 97 |     "```\n",
 98 |     "\n",
 99 |     "2번정답<br>\n",
100 |     "```\n",
101 |     "[[ 0.09003057  0.00242826  0.01587624  0.33333333]\n",
102 |     " [ 0.24472847  0.01794253  0.11731043  0.33333333]\n",
103 |     " [ 0.66524096  0.97962921  0.86681333  0.33333333]]\n",
104 |     "```"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 3,
110 |    "metadata": {},
111 |    "outputs": [
112 |     {
113 |      "name": "stdout",
114 |      "output_type": "stream",
115 |      "text": [
116 |       "[[ 0.09003057  0.00242826  0.01587624  0.33333333]\n",
117 |       " [ 0.24472847  0.01794253  0.11731043  0.33333333]\n",
118 |       " [ 0.66524096  0.97962921  0.86681333  0.33333333]] 4.0\n"
119 |      ]
120 |     }
121 |    ],
122 |    "source": [
123 |     "scores1 = np.array([1.0, 2.0, 3.0])\n",
124 |     "scores2 = np.array([[1, 2, 3, 6],\n",
125 |     "                    [2, 4, 5, 6], \n",
126 |     "                    [3, 8, 7, 6]])\n",
127 |     "\n",
128 |     "def softmax(x):\n",
129 |     "    exp_x = np.exp(x)\n",
130 |     "    return exp_x/np.sum(exp_x, axis=0)\n",
131 |     "\n",
132 |     "# plot(scores1, softmax(scores1))\n",
133 |     "print(softmax(scores2), np.sum(softmax(scores2)))"
134 |    ]
135 |   }
136 |  ],
137 |  "metadata": {
138 |   "kernelspec": {
139 |    "display_name": "Python 3",
140 |    "language": "python",
141 |    "name": "python3"
142 |   },
143 |   "language_info": {
144 |    "codemirror_mode": {
145 |     "name": "ipython",
146 |     "version": 3
147 |    },
148 |    "file_extension": ".py",
149 |    "mimetype": "text/x-python",
150 |    "name": "python",
151 |    "nbconvert_exporter": "python",
152 |    "pygments_lexer": "ipython3",
153 |    "version": "3.6.1"
154 |   }
155 |  },
156 |  "nbformat": 4,
157 |  "nbformat_minor": 2
158 | }
159 | 


--------------------------------------------------------------------------------
/Keras Tutorial/001 Getting Started/02 Get Weights and Biases as Numpy Array.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false,
  8 |     "deletable": true,
  9 |     "editable": true
 10 |    },
 11 |    "outputs": [
 12 |     {
 13 |      "name": "stderr",
 14 |      "output_type": "stream",
 15 |      "text": [
 16 |       "Using TensorFlow backend.\n"
 17 |      ]
 18 |     }
 19 |    ],
 20 |    "source": [
 21 |     "import numpy as np\n",
 22 |     "\n",
 23 |     "from keras.models import Sequential\n",
 24 |     "from keras.layers import Dense"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {
 31 |     "collapsed": true,
 32 |     "deletable": true,
 33 |     "editable": true
 34 |    },
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "def display_shapes(model):\n",
 38 |     "    for i, layer in enumerate(model.layers):\n",
 39 |     "        w, b = layer.get_weights()\n",
 40 |     "        print(f'[{i}] weights: {w.shape} \\tbiase: {b.shape}')"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {
 46 |     "deletable": true,
 47 |     "editable": true
 48 |    },
 49 |    "source": [
 50 |     "## Example 1"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 4,
 56 |    "metadata": {
 57 |     "collapsed": false,
 58 |     "deletable": true,
 59 |     "editable": true
 60 |    },
 61 |    "outputs": [
 62 |     {
 63 |      "name": "stdout",
 64 |      "output_type": "stream",
 65 |      "text": [
 66 |       "[0] weights: (10, 20) \tbiase: (20,)\n",
 67 |       "[1] weights: (20, 5) \tbiase: (5,)\n",
 68 |       "[2] weights: (5, 1) \tbiase: (1,)\n"
 69 |      ]
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "model = Sequential()\n",
 74 |     "model.add(Dense(20, batch_input_shape=(None, 10)))\n",
 75 |     "model.add(Dense(5))\n",
 76 |     "model.add(Dense(1))\n",
 77 |     "model.compile(optimizer='adam', loss='mean_squared_error')\n",
 78 |     "\n",
 79 |     "display_shapes(model)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 7,
 85 |    "metadata": {
 86 |     "collapsed": false
 87 |    },
 88 |    "outputs": [
 89 |     {
 90 |      "data": {
 91 |       "text/plain": [
 92 |        "<tf.Tensor 'dense_6/BiasAdd:0' shape=(?, 1) dtype=float32>"
 93 |       ]
 94 |      },
 95 |      "execution_count": 7,
 96 |      "metadata": {},
 97 |      "output_type": "execute_result"
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "x = np.random.randn(7, 10)\n",
102 |     "model.predict(x)\n",
103 |     "model.get_output_at(0)"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {
109 |     "deletable": true,
110 |     "editable": true
111 |    },
112 |    "source": [
113 |     "## Example 2"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 24,
119 |    "metadata": {
120 |     "collapsed": false,
121 |     "deletable": true,
122 |     "editable": true
123 |    },
124 |    "outputs": [
125 |     {
126 |      "name": "stdout",
127 |      "output_type": "stream",
128 |      "text": [
129 |       "[0] weights: (9, 32) \tbiase: (32,)\n",
130 |       "[1] weights: (32, 32) \tbiase: (32,)\n",
131 |       "[2] weights: (32, 32) \tbiase: (32,)\n",
132 |       "[3] weights: (32, 32) \tbiase: (32,)\n",
133 |       "[4] weights: (32, 16) \tbiase: (16,)\n",
134 |       "[5] weights: (16, 1) \tbiase: (1,)\n"
135 |      ]
136 |     }
137 |    ],
138 |    "source": [
139 |     "data_x = np.random.randn(5, 9)\n",
140 |     "data_y = np.random.randn(5, 1)\n",
141 |     "\n",
142 |     "model = Sequential()\n",
143 |     "model.add(Dense(32, batch_input_shape=(None, 9)))\n",
144 |     "model.add(Dense(32))\n",
145 |     "model.add(Dense(32))\n",
146 |     "model.add(Dense(32))\n",
147 |     "model.add(Dense(16))\n",
148 |     "model.add(Dense(1))\n",
149 |     "model.compile(optimizer='adam', loss='mean_squared_error')\n",
150 |     "y_pred = model.predict(data_x, verbose=0)\n",
151 |     "\n",
152 |     "display_shapes(model)"
153 |    ]
154 |   }
155 |  ],
156 |  "metadata": {
157 |   "kernelspec": {
158 |    "display_name": "Python 3",
159 |    "language": "python",
160 |    "name": "python3"
161 |   },
162 |   "language_info": {
163 |    "codemirror_mode": {
164 |     "name": "ipython",
165 |     "version": 3
166 |    },
167 |    "file_extension": ".py",
168 |    "mimetype": "text/x-python",
169 |    "name": "python",
170 |    "nbconvert_exporter": "python",
171 |    "pygments_lexer": "ipython3",
172 |    "version": "3.6.0"
173 |   }
174 |  },
175 |  "nbformat": 4,
176 |  "nbformat_minor": 2
177 | }
178 | 


--------------------------------------------------------------------------------
/data/exem/.Rhistory:
--------------------------------------------------------------------------------
  1 | setwd("~/@python/neo-notebook/data/exem")
  2 | train.data.raw <- read.csv('machine-error01.csv', header = T)
  3 | train.data.raw <- read.csv('machine-error01.csv', header = T)
  4 | head(train.data.raw)
  5 | subset(train.data.raw, drop = c(1))
  6 | head(subset(train.data.raw, drop = c(1)))
  7 | train.data.raw <- read.csv('machine-error02.csv', header = T)
  8 | head(subset(train.data.raw, drop = c(1)))
  9 | head(subset(train.data.raw, drop = c(1)))
 10 | View(train.data.raw)
 11 | View(train.data.raw)
 12 | head(subset(train.data.raw, drop = c(2)))
 13 | head(subset(train.data.raw, drop = 2)
 14 | head(subset(train.data.raw, drop = 2))
 15 | head(subset(train.data.raw, drop = 2))
 16 | head(subset(train.data.raw, drop = c(2))
 17 | head(subset(train.data.raw, drop = c(2)))
 18 | head(subset(train.data.raw, drop = c(2)))
 19 | head(subset(train.data.raw, select = c(1, 3, 4, 5, 6, 7, 8))
 20 | head(subset(train.data.raw, select = c(1, 3, 4, 5, 6, 7, 8)))
 21 | head(subset(train.data.raw, select = c(1, 3, 4, 5, 6, 7, 8)))
 22 | train.data.raw <- head(subset(train.data.raw, select = c(1, 3, 4, 5, 6, 7, 8)))
 23 | source('~/@python/neo-notebook/exem/machine-analysis01.R')
 24 | train.data.raw <- head(subset(train.data.raw, select = c(1, 3, 4, 5, 6, 7, 8)))
 25 | train.data.raw <- read.csv('machine-error02.csv', header = T)
 26 | train.data.raw <- head(subset(train.data.raw, select = c(1, 3, 4, 5, 6, 7, 8)))
 27 | head(train.data.raw)
 28 | library(Amelia)
 29 | missmap(train.data.raw)
 30 | nrow(train.data.raw)
 31 | nrow(train.data.raw)
 32 | train.data.raw
 33 | train.data.raw <- read.csv('machine-error02.csv', header = T)
 34 | train.data.raw <- head(subset(train.data.raw, select = c(1, 3, 4, 5, 6, 7, 8)))
 35 | train.data.raw
 36 | train.data.raw <- read.csv('machine-error02.csv', header = T)
 37 | train.data.raw
 38 | train.data.raw <- subset(train.data.raw, select = c(1, 3, 4, 5, 6, 7, 8))
 39 | train.data.raw
 40 | head(train.data.raw)
 41 | data <- train.data.raw
 42 | nrow(data)
 43 | train <- data[1:14000]
 44 | test <- data[14001:14375]
 45 | train <- data[1:14000,]
 46 | test <- data[14001:14375,]
 47 | model(formula=error~., family=binomial(link='logit'), data=train)
 48 | glm(formula=error~., family=binomial(link='logit'), data=train)
 49 | model = glm(formula=error~., family=binomial(link='logit'), data=train)
 50 | model
 51 | summary(model)
 52 | fitted.results <- predict(model, newdata=test, type='response')
 53 | fitted.results <- ifelse(fitted.results > 0.5, 1, 0)
 54 | mean(fitted.results == test$error)
 55 | head(train.data.raw)
 56 | train.data.raw <- subset(train.data.raw, select = c(1, 3, 4, 5, 6, 7))
 57 | data <- train.data.raw
 58 | train <- data[1:14000,]
 59 | test <- data[14001:14375,]
 60 | model = glm(formula=error~., family=binomial(link='logit'), data=train)
 61 | fitted.results <- predict(model, newdata=test, type='response')
 62 | fitted.results <- ifelse(fitted.results > 0.5, 1, 0)
 63 | mean(fitted.results == test$error)
 64 | mean(fitted.results == test$error)
 65 | train.data.raw <- read.csv('machine-error02.csv', header = T)
 66 | train.data.raw <- subset(train.data.raw, select = c(1, 3, 4, 5, 6, 7, 8))
 67 | data <- train.data.raw
 68 | train <- data[1:14000,]
 69 | test <- data[14001:14375,]
 70 | model = glm(formula=error~., family=binomial(link='logit'), data=train)
 71 | fitted.results <- predict(model, newdata=test, type='response')
 72 | fitted.results <- ifelse(fitted.results > 0.5, 1, 0)
 73 | mean(fitted.results == test$error)
 74 | train.data.raw <- read.csv('machine-error01.csv', header = T)
 75 | train.data.raw
 76 | head(train.data.raw)
 77 | subset(train.data.raw, drop=2)
 78 | head(subset(train.data.raw, drop=2))
 79 | head(subset(train.data.raw, drop=c(2)))
 80 | head(subset(train.data.raw, select=-c(2)))
 81 | train.data.raw <- subset(train.data.raw, select = -c(2))
 82 | data <- train.data.raw
 83 | train <- data[1:14000,]
 84 | test <- data[14001:14375,]
 85 | model = glm(formula=error~., family=binomial(link='logit'), data=train)
 86 | fitted.results <- predict(model, newdata=test, type='response')
 87 | fitted.results <- ifelse(fitted.results > 0.5, 1, 0)
 88 | mean(fitted.results == test$error)
 89 | summary(model)
 90 | mean(fitted.results == test$error)
 91 | head(train.data.raw)
 92 | summary(train.data.raw)
 93 | mean(fitted.results == test$error)
 94 | summary(model)
 95 | missmap(train.data.raw)
 96 | sapply(train.data.raw, function(x) sum(is.na(x)))
 97 | mean(fitted.results == test$error)
 98 | mean(fitted.results == test$error)
 99 | mean(fitted.results == test$error)
100 | table(data[14001:14375,]$error)
101 | train <- data[1:13000,]
102 | test <- data[13001:14375,]
103 | model = glm(formula=error~., family=binomial(link='logit'), data=train)
104 | fitted.results <- predict(model, newdata=test, type='response')
105 | fitted.results <- ifelse(fitted.results > 0.5, 1, 0)
106 | mean(fitted.results == test$error)
107 | summary(model)
108 | model
109 | 


--------------------------------------------------------------------------------
/601 Dense Passage Retrieval/test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 10,
  6 |    "id": "initial_id",
  7 |    "metadata": {
  8 |     "collapsed": true,
  9 |     "ExecuteTime": {
 10 |      "end_time": "2024-03-16T13:13:39.863049096Z",
 11 |      "start_time": "2024-03-16T13:13:39.681371552Z"
 12 |     }
 13 |    },
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stdout",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "/tmp/korquad_preprocessed_data/train_data.json\r\n"
 20 |      ]
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "%ls /tmp/korquad_preprocessed_data/train_data.json\n"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "import json\n",
 32 |     "\n",
 33 |     "with open('/tmp/korquad_preprocessed_data/train_data.json', 'rt') as f:\n",
 34 |     "    data = json.load(f)"
 35 |    ],
 36 |    "metadata": {
 37 |     "collapsed": false,
 38 |     "ExecuteTime": {
 39 |      "end_time": "2024-03-16T13:14:03.688140111Z",
 40 |      "start_time": "2024-03-16T13:14:01.874783631Z"
 41 |     }
 42 |    },
 43 |    "id": "b039e9c7daac5a35",
 44 |    "execution_count": 11
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "import pandas as pd\n",
 51 |     "df = pd.DataFrame(data)"
 52 |    ],
 53 |    "metadata": {
 54 |     "collapsed": false,
 55 |     "ExecuteTime": {
 56 |      "end_time": "2024-03-16T13:15:09.016384369Z",
 57 |      "start_time": "2024-03-16T13:15:09.015856469Z"
 58 |     }
 59 |    },
 60 |    "id": "36317e753b070b0c",
 61 |    "execution_count": 15
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "outputs": [
 66 |     {
 67 |      "data": {
 68 |       "text/plain": "30712"
 69 |      },
 70 |      "execution_count": 18,
 71 |      "metadata": {},
 72 |      "output_type": "execute_result"
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "df[3].unique().size"
 77 |    ],
 78 |    "metadata": {
 79 |     "collapsed": false,
 80 |     "ExecuteTime": {
 81 |      "end_time": "2024-03-16T13:17:40.183158074Z",
 82 |      "start_time": "2024-03-16T13:17:40.177531839Z"
 83 |     }
 84 |    },
 85 |    "id": "45ada84735839765",
 86 |    "execution_count": 18
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "outputs": [
 91 |     {
 92 |      "data": {
 93 |       "text/plain": "Empty DataFrame\nColumns: [0, 1, 2, 3, 4]\nIndex: []",
 94 |       "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n      <th>2</th>\n      <th>3</th>\n      <th>4</th>\n    </tr>\n  </thead>\n  <tbody>\n  </tbody>\n</table>\n</div>"
 95 |      },
 96 |      "execution_count": 27,
 97 |      "metadata": {},
 98 |      "output_type": "execute_result"
 99 |     }
100 |    ],
101 |    "source": [
102 |     "df[df[2].duplicated()]"
103 |    ],
104 |    "metadata": {
105 |     "collapsed": false,
106 |     "ExecuteTime": {
107 |      "end_time": "2024-03-16T13:19:40.993072097Z",
108 |      "start_time": "2024-03-16T13:19:40.932567068Z"
109 |     }
110 |    },
111 |    "id": "ddd0e5ea8bc34c81",
112 |    "execution_count": 27
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "outputs": [
117 |     {
118 |      "data": {
119 |       "text/plain": "0        False\n1        False\n2        False\n3        False\n4        False\n         ...  \n48320     True\n48321    False\n48322    False\n48323     True\n48324     True\nName: 3, Length: 48325, dtype: bool"
120 |      },
121 |      "execution_count": 29,
122 |      "metadata": {},
123 |      "output_type": "execute_result"
124 |     }
125 |    ],
126 |    "source": [
127 |     "df[df[3].duplicated()]"
128 |    ],
129 |    "metadata": {
130 |     "collapsed": false,
131 |     "ExecuteTime": {
132 |      "end_time": "2024-03-16T13:20:55.805464278Z",
133 |      "start_time": "2024-03-16T13:20:55.803098416Z"
134 |     }
135 |    },
136 |    "id": "720cea21e31ac71c",
137 |    "execution_count": 29
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "outputs": [],
142 |    "source": [],
143 |    "metadata": {
144 |     "collapsed": false
145 |    },
146 |    "id": "64c94305a3c372b0"
147 |   }
148 |  ],
149 |  "metadata": {
150 |   "kernelspec": {
151 |    "display_name": "Python 3",
152 |    "language": "python",
153 |    "name": "python3"
154 |   },
155 |   "language_info": {
156 |    "codemirror_mode": {
157 |     "name": "ipython",
158 |     "version": 2
159 |    },
160 |    "file_extension": ".py",
161 |    "mimetype": "text/x-python",
162 |    "name": "python",
163 |    "nbconvert_exporter": "python",
164 |    "pygments_lexer": "ipython2",
165 |    "version": "2.7.6"
166 |   }
167 |  },
168 |  "nbformat": 4,
169 |  "nbformat_minor": 5
170 | }
171 | 


--------------------------------------------------------------------------------
/Keras Tutorial/105 [Layer] Embedding Layers/01 Embedding Layers.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "deletable": true,
  7 |     "editable": true
  8 |    },
  9 |    "source": [
 10 |     "# Embedding Layers\n",
 11 |     "\n",
 12 |     "1. https://keras.io/layers/embeddings/\n",
 13 |     "2. [Distributed Representations of Words and Phrases and their Compositionality](http://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf)\n",
 14 |     "\n",
 15 |     "### Embedding Layer\n",
 16 |     "\n",
 17 |     "* **input_dim**: size of the vocabulary (즉 sentence로 제공되는 vector안에서 가장 큰 integer값)\n",
 18 |     "* **output_dim**: dense embedding의 dimension\n",
 19 |     "* **mask_zero**: input value 0을 특수 padding으로서 masked out해야 될지 결정. recurrent layers사용시 variable length input을 사용시 유용하게 사용될 수 있으며, mask_zero True로 사용시 그 이후의 모든 layers들은 masking을 지원해야함. \n",
 20 |     "* **input_length**: input sequence가 constant값일때 사용. 특히 Flatten 사용후 Dense사용시 input_length값을 지정해줘야 Keras가 dense output의 shape을 알 수 있음\n",
 21 |     "\n",
 22 |     "### Input & Output Shape\n",
 23 |     "\n",
 24 |     "* Input Shape은 **(batch_size, sequence_length)** 입니다.\n",
 25 |     "* Output Shape은 **(batch_size, sequence_length, output_dim)** 입니다. "
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {
 32 |     "collapsed": true,
 33 |     "deletable": true,
 34 |     "editable": true
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "import numpy as np \n",
 39 |     "from keras.models import Sequential\n",
 40 |     "from keras.layers import Embedding"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {
 46 |     "deletable": true,
 47 |     "editable": true
 48 |    },
 49 |    "source": [
 50 |     "## Data\n",
 51 |     "\n",
 52 |     "데이터의 shape은 (batch, sentence vector)입니다. "
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 132,
 58 |    "metadata": {
 59 |     "collapsed": true,
 60 |     "deletable": true,
 61 |     "editable": true
 62 |    },
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "data1 = np.array([[0, 1, 2, 3], \n",
 66 |     "                  [0, 0, 0, 1], \n",
 67 |     "                  [9, 9, 9, 9]])"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {
 73 |     "deletable": true,
 74 |     "editable": true
 75 |    },
 76 |    "source": [
 77 |     "## Model\n",
 78 |     "\n",
 79 |     "Embedding(10, 1) 에서 input_dim을 10으로 주었습니다.<br>\n",
 80 |     "이는 데이터의 가장 큰 값이 9이며 zero-based index를 사용하기 때문에 9 + 1 = 10 이 되기 때문입니다."
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 151,
 86 |    "metadata": {
 87 |     "collapsed": false,
 88 |     "deletable": true,
 89 |     "editable": true
 90 |    },
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "model = Sequential()\n",
 94 |     "model.add(Embedding(10, 1))\n",
 95 |     "model.compile('rmsprop', 'mse')"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {
101 |     "deletable": true,
102 |     "editable": true
103 |    },
104 |    "source": [
105 |     "## Result\n",
106 |     "\n",
107 |     "3번째 vector [9, 9, 9, 9]인데.. 이들의 값이 모두 동일하다는 것을 알 수 있습니다. <br>\n",
108 |     "즉 어떤 단어의 integer값 (예를 들어 hello 는 5000)이라면 Embedding을 거치고 나면 5000이 아닌 0에서 1사이의 다른 값으로 동일하게 사용됩니다. <br>"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 152,
114 |    "metadata": {
115 |     "collapsed": false,
116 |     "deletable": true,
117 |     "editable": true
118 |    },
119 |    "outputs": [
120 |     {
121 |      "data": {
122 |       "text/plain": [
123 |        "array([[[ 0.01588077],\n",
124 |        "        [ 0.04561056],\n",
125 |        "        [ 0.02556488],\n",
126 |        "        [ 0.04706056]],\n",
127 |        "\n",
128 |        "       [[ 0.01588077],\n",
129 |        "        [ 0.01588077],\n",
130 |        "        [ 0.01588077],\n",
131 |        "        [ 0.04561056]],\n",
132 |        "\n",
133 |        "       [[-0.00760218],\n",
134 |        "        [-0.00760218],\n",
135 |        "        [-0.00760218],\n",
136 |        "        [-0.00760218]]], dtype=float32)"
137 |       ]
138 |      },
139 |      "execution_count": 152,
140 |      "metadata": {},
141 |      "output_type": "execute_result"
142 |     }
143 |    ],
144 |    "source": [
145 |     "model.predict(data1)"
146 |    ]
147 |   }
148 |  ],
149 |  "metadata": {
150 |   "kernelspec": {
151 |    "display_name": "Python 3",
152 |    "language": "python",
153 |    "name": "python3"
154 |   },
155 |   "language_info": {
156 |    "codemirror_mode": {
157 |     "name": "ipython",
158 |     "version": 3
159 |    },
160 |    "file_extension": ".py",
161 |    "mimetype": "text/x-python",
162 |    "name": "python",
163 |    "nbconvert_exporter": "python",
164 |    "pygments_lexer": "ipython3",
165 |    "version": "3.6.0"
166 |   }
167 |  },
168 |  "nbformat": 4,
169 |  "nbformat_minor": 2
170 | }
171 | 


--------------------------------------------------------------------------------
/Keras Tutorial/200 [Preprocessing] Padding/01 Padding for RNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "from keras.preprocessing.sequence import pad_sequences"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "### pad_sequences\n",
 20 |     "\n",
 21 |     "\n",
 22 |     "* **sequences**: List of lists of int or float.\n",
 23 |     "* **maxlen**: None or int. Maximum sequence length, longer sequences are truncated and shorter sequences are padded with zeros at the end.\n",
 24 |     "* **dtype**: datatype of the Numpy array returned.\n",
 25 |     "* **padding**: 'pre' or 'post', pad either before or after each sequence.\n",
 26 |     "* **truncating**: 'pre' or 'post', remove values from sequences larger than maxlen either in the beginning or in the end of the sequence\n",
 27 |     "* **value**: float, value to pad the sequences to the desired value.\n",
 28 |     "\n",
 29 |     "sequences 를 받으면 (sample갯수, timesteps 갯수) shape을 갖은 2D Numpy array를 리턴합니다. "
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "### 3 dimensions -> 10 dimensions with 'pre' padding"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 5,
 42 |    "metadata": {
 43 |     "collapsed": false
 44 |    },
 45 |    "outputs": [
 46 |     {
 47 |      "data": {
 48 |       "text/plain": [
 49 |        "array([[0, 0, 0, 0, 0, 0, 0, 1, 2, 3],\n",
 50 |        "       [0, 0, 0, 0, 0, 0, 0, 4, 5, 6]], dtype=int32)"
 51 |       ]
 52 |      },
 53 |      "execution_count": 5,
 54 |      "metadata": {},
 55 |      "output_type": "execute_result"
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "data = np.array([[1,2,3], [4,5,6]])\n",
 60 |     "pad_sequences(data, maxlen=10)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "### 3 dimensions -> 10 dimensions with 'post' padding"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 6,
 73 |    "metadata": {
 74 |     "collapsed": false
 75 |    },
 76 |    "outputs": [
 77 |     {
 78 |      "data": {
 79 |       "text/plain": [
 80 |        "array([[1, 2, 3, 0, 0, 0, 0, 0, 0, 0],\n",
 81 |        "       [4, 5, 6, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)"
 82 |       ]
 83 |      },
 84 |      "execution_count": 6,
 85 |      "metadata": {},
 86 |      "output_type": "execute_result"
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "data = np.array([[1,2,3], [4,5,6]])\n",
 91 |     "pad_sequences(data, maxlen=10, padding='post')"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "### Truncate (10 -> 3) with pre option"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 13,
104 |    "metadata": {
105 |     "collapsed": false
106 |    },
107 |    "outputs": [
108 |     {
109 |      "data": {
110 |       "text/plain": [
111 |        "array([[ 7,  8,  9],\n",
112 |        "       [12, 11, 10]], dtype=int32)"
113 |       ]
114 |      },
115 |      "execution_count": 13,
116 |      "metadata": {},
117 |      "output_type": "execute_result"
118 |     }
119 |    ],
120 |    "source": [
121 |     "data = np.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], \n",
122 |     "                 [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10]])\n",
123 |     "pad_sequences(data, maxlen=3, truncating='pre')"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "### Truncate (10 -> 3) with post option"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 12,
136 |    "metadata": {
137 |     "collapsed": false
138 |    },
139 |    "outputs": [
140 |     {
141 |      "data": {
142 |       "text/plain": [
143 |        "array([[ 0,  1,  2],\n",
144 |        "       [20, 19, 18]], dtype=int32)"
145 |       ]
146 |      },
147 |      "execution_count": 12,
148 |      "metadata": {},
149 |      "output_type": "execute_result"
150 |     }
151 |    ],
152 |    "source": [
153 |     "data = np.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], \n",
154 |     "                 [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10]])\n",
155 |     "pad_sequences(data, maxlen=3, truncating='post')"
156 |    ]
157 |   }
158 |  ],
159 |  "metadata": {
160 |   "kernelspec": {
161 |    "display_name": "Python 3",
162 |    "language": "python",
163 |    "name": "python3"
164 |   },
165 |   "language_info": {
166 |    "codemirror_mode": {
167 |     "name": "ipython",
168 |     "version": 3
169 |    },
170 |    "file_extension": ".py",
171 |    "mimetype": "text/x-python",
172 |    "name": "python",
173 |    "nbconvert_exporter": "python",
174 |    "pygments_lexer": "ipython3",
175 |    "version": "3.6.0"
176 |   }
177 |  },
178 |  "nbformat": 4,
179 |  "nbformat_minor": 2
180 | }
181 | 


--------------------------------------------------------------------------------
/data/basic_csv_data/iris.csv:
--------------------------------------------------------------------------------
  1 | 5.1,3.5,1.4,0.2,Iris-setosa
  2 | 4.9,3.0,1.4,0.2,Iris-setosa
  3 | 4.7,3.2,1.3,0.2,Iris-setosa
  4 | 4.6,3.1,1.5,0.2,Iris-setosa
  5 | 5.0,3.6,1.4,0.2,Iris-setosa
  6 | 5.4,3.9,1.7,0.4,Iris-setosa
  7 | 4.6,3.4,1.4,0.3,Iris-setosa
  8 | 5.0,3.4,1.5,0.2,Iris-setosa
  9 | 4.4,2.9,1.4,0.2,Iris-setosa
 10 | 4.9,3.1,1.5,0.1,Iris-setosa
 11 | 5.4,3.7,1.5,0.2,Iris-setosa
 12 | 4.8,3.4,1.6,0.2,Iris-setosa
 13 | 4.8,3.0,1.4,0.1,Iris-setosa
 14 | 4.3,3.0,1.1,0.1,Iris-setosa
 15 | 5.8,4.0,1.2,0.2,Iris-setosa
 16 | 5.7,4.4,1.5,0.4,Iris-setosa
 17 | 5.4,3.9,1.3,0.4,Iris-setosa
 18 | 5.1,3.5,1.4,0.3,Iris-setosa
 19 | 5.7,3.8,1.7,0.3,Iris-setosa
 20 | 5.1,3.8,1.5,0.3,Iris-setosa
 21 | 5.4,3.4,1.7,0.2,Iris-setosa
 22 | 5.1,3.7,1.5,0.4,Iris-setosa
 23 | 4.6,3.6,1.0,0.2,Iris-setosa
 24 | 5.1,3.3,1.7,0.5,Iris-setosa
 25 | 4.8,3.4,1.9,0.2,Iris-setosa
 26 | 5.0,3.0,1.6,0.2,Iris-setosa
 27 | 5.0,3.4,1.6,0.4,Iris-setosa
 28 | 5.2,3.5,1.5,0.2,Iris-setosa
 29 | 5.2,3.4,1.4,0.2,Iris-setosa
 30 | 4.7,3.2,1.6,0.2,Iris-setosa
 31 | 4.8,3.1,1.6,0.2,Iris-setosa
 32 | 5.4,3.4,1.5,0.4,Iris-setosa
 33 | 5.2,4.1,1.5,0.1,Iris-setosa
 34 | 5.5,4.2,1.4,0.2,Iris-setosa
 35 | 4.9,3.1,1.5,0.1,Iris-setosa
 36 | 5.0,3.2,1.2,0.2,Iris-setosa
 37 | 5.5,3.5,1.3,0.2,Iris-setosa
 38 | 4.9,3.1,1.5,0.1,Iris-setosa
 39 | 4.4,3.0,1.3,0.2,Iris-setosa
 40 | 5.1,3.4,1.5,0.2,Iris-setosa
 41 | 5.0,3.5,1.3,0.3,Iris-setosa
 42 | 4.5,2.3,1.3,0.3,Iris-setosa
 43 | 4.4,3.2,1.3,0.2,Iris-setosa
 44 | 5.0,3.5,1.6,0.6,Iris-setosa
 45 | 5.1,3.8,1.9,0.4,Iris-setosa
 46 | 4.8,3.0,1.4,0.3,Iris-setosa
 47 | 5.1,3.8,1.6,0.2,Iris-setosa
 48 | 4.6,3.2,1.4,0.2,Iris-setosa
 49 | 5.3,3.7,1.5,0.2,Iris-setosa
 50 | 5.0,3.3,1.4,0.2,Iris-setosa
 51 | 7.0,3.2,4.7,1.4,Iris-versicolor
 52 | 6.4,3.2,4.5,1.5,Iris-versicolor
 53 | 6.9,3.1,4.9,1.5,Iris-versicolor
 54 | 5.5,2.3,4.0,1.3,Iris-versicolor
 55 | 6.5,2.8,4.6,1.5,Iris-versicolor
 56 | 5.7,2.8,4.5,1.3,Iris-versicolor
 57 | 6.3,3.3,4.7,1.6,Iris-versicolor
 58 | 4.9,2.4,3.3,1.0,Iris-versicolor
 59 | 6.6,2.9,4.6,1.3,Iris-versicolor
 60 | 5.2,2.7,3.9,1.4,Iris-versicolor
 61 | 5.0,2.0,3.5,1.0,Iris-versicolor
 62 | 5.9,3.0,4.2,1.5,Iris-versicolor
 63 | 6.0,2.2,4.0,1.0,Iris-versicolor
 64 | 6.1,2.9,4.7,1.4,Iris-versicolor
 65 | 5.6,2.9,3.6,1.3,Iris-versicolor
 66 | 6.7,3.1,4.4,1.4,Iris-versicolor
 67 | 5.6,3.0,4.5,1.5,Iris-versicolor
 68 | 5.8,2.7,4.1,1.0,Iris-versicolor
 69 | 6.2,2.2,4.5,1.5,Iris-versicolor
 70 | 5.6,2.5,3.9,1.1,Iris-versicolor
 71 | 5.9,3.2,4.8,1.8,Iris-versicolor
 72 | 6.1,2.8,4.0,1.3,Iris-versicolor
 73 | 6.3,2.5,4.9,1.5,Iris-versicolor
 74 | 6.1,2.8,4.7,1.2,Iris-versicolor
 75 | 6.4,2.9,4.3,1.3,Iris-versicolor
 76 | 6.6,3.0,4.4,1.4,Iris-versicolor
 77 | 6.8,2.8,4.8,1.4,Iris-versicolor
 78 | 6.7,3.0,5.0,1.7,Iris-versicolor
 79 | 6.0,2.9,4.5,1.5,Iris-versicolor
 80 | 5.7,2.6,3.5,1.0,Iris-versicolor
 81 | 5.5,2.4,3.8,1.1,Iris-versicolor
 82 | 5.5,2.4,3.7,1.0,Iris-versicolor
 83 | 5.8,2.7,3.9,1.2,Iris-versicolor
 84 | 6.0,2.7,5.1,1.6,Iris-versicolor
 85 | 5.4,3.0,4.5,1.5,Iris-versicolor
 86 | 6.0,3.4,4.5,1.6,Iris-versicolor
 87 | 6.7,3.1,4.7,1.5,Iris-versicolor
 88 | 6.3,2.3,4.4,1.3,Iris-versicolor
 89 | 5.6,3.0,4.1,1.3,Iris-versicolor
 90 | 5.5,2.5,4.0,1.3,Iris-versicolor
 91 | 5.5,2.6,4.4,1.2,Iris-versicolor
 92 | 6.1,3.0,4.6,1.4,Iris-versicolor
 93 | 5.8,2.6,4.0,1.2,Iris-versicolor
 94 | 5.0,2.3,3.3,1.0,Iris-versicolor
 95 | 5.6,2.7,4.2,1.3,Iris-versicolor
 96 | 5.7,3.0,4.2,1.2,Iris-versicolor
 97 | 5.7,2.9,4.2,1.3,Iris-versicolor
 98 | 6.2,2.9,4.3,1.3,Iris-versicolor
 99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor
101 | 6.3,3.3,6.0,2.5,Iris-virginica
102 | 5.8,2.7,5.1,1.9,Iris-virginica
103 | 7.1,3.0,5.9,2.1,Iris-virginica
104 | 6.3,2.9,5.6,1.8,Iris-virginica
105 | 6.5,3.0,5.8,2.2,Iris-virginica
106 | 7.6,3.0,6.6,2.1,Iris-virginica
107 | 4.9,2.5,4.5,1.7,Iris-virginica
108 | 7.3,2.9,6.3,1.8,Iris-virginica
109 | 6.7,2.5,5.8,1.8,Iris-virginica
110 | 7.2,3.6,6.1,2.5,Iris-virginica
111 | 6.5,3.2,5.1,2.0,Iris-virginica
112 | 6.4,2.7,5.3,1.9,Iris-virginica
113 | 6.8,3.0,5.5,2.1,Iris-virginica
114 | 5.7,2.5,5.0,2.0,Iris-virginica
115 | 5.8,2.8,5.1,2.4,Iris-virginica
116 | 6.4,3.2,5.3,2.3,Iris-virginica
117 | 6.5,3.0,5.5,1.8,Iris-virginica
118 | 7.7,3.8,6.7,2.2,Iris-virginica
119 | 7.7,2.6,6.9,2.3,Iris-virginica
120 | 6.0,2.2,5.0,1.5,Iris-virginica
121 | 6.9,3.2,5.7,2.3,Iris-virginica
122 | 5.6,2.8,4.9,2.0,Iris-virginica
123 | 7.7,2.8,6.7,2.0,Iris-virginica
124 | 6.3,2.7,4.9,1.8,Iris-virginica
125 | 6.7,3.3,5.7,2.1,Iris-virginica
126 | 7.2,3.2,6.0,1.8,Iris-virginica
127 | 6.2,2.8,4.8,1.8,Iris-virginica
128 | 6.1,3.0,4.9,1.8,Iris-virginica
129 | 6.4,2.8,5.6,2.1,Iris-virginica
130 | 7.2,3.0,5.8,1.6,Iris-virginica
131 | 7.4,2.8,6.1,1.9,Iris-virginica
132 | 7.9,3.8,6.4,2.0,Iris-virginica
133 | 6.4,2.8,5.6,2.2,Iris-virginica
134 | 6.3,2.8,5.1,1.5,Iris-virginica
135 | 6.1,2.6,5.6,1.4,Iris-virginica
136 | 7.7,3.0,6.1,2.3,Iris-virginica
137 | 6.3,3.4,5.6,2.4,Iris-virginica
138 | 6.4,3.1,5.5,1.8,Iris-virginica
139 | 6.0,3.0,4.8,1.8,Iris-virginica
140 | 6.9,3.1,5.4,2.1,Iris-virginica
141 | 6.7,3.1,5.6,2.4,Iris-virginica
142 | 6.9,3.1,5.1,2.3,Iris-virginica
143 | 5.8,2.7,5.1,1.9,Iris-virginica
144 | 6.8,3.2,5.9,2.3,Iris-virginica
145 | 6.7,3.3,5.7,2.5,Iris-virginica
146 | 6.7,3.0,5.2,2.3,Iris-virginica
147 | 6.3,2.5,5.0,1.9,Iris-virginica
148 | 6.5,3.0,5.2,2.0,Iris-virginica
149 | 6.2,3.4,5.4,2.3,Iris-virginica
150 | 5.9,3.0,5.1,1.8,Iris-virginica
151 | 
152 | 


--------------------------------------------------------------------------------
/data/titanic/gendermodel.csv:
--------------------------------------------------------------------------------
  1 | PassengerId,Survived
  2 | 892,0
  3 | 893,1
  4 | 894,0
  5 | 895,0
  6 | 896,1
  7 | 897,0
  8 | 898,1
  9 | 899,0
 10 | 900,1
 11 | 901,0
 12 | 902,0
 13 | 903,0
 14 | 904,1
 15 | 905,0
 16 | 906,1
 17 | 907,1
 18 | 908,0
 19 | 909,0
 20 | 910,1
 21 | 911,1
 22 | 912,0
 23 | 913,0
 24 | 914,1
 25 | 915,0
 26 | 916,1
 27 | 917,0
 28 | 918,1
 29 | 919,0
 30 | 920,0
 31 | 921,0
 32 | 922,0
 33 | 923,0
 34 | 924,1
 35 | 925,1
 36 | 926,0
 37 | 927,0
 38 | 928,1
 39 | 929,1
 40 | 930,0
 41 | 931,0
 42 | 932,0
 43 | 933,0
 44 | 934,0
 45 | 935,1
 46 | 936,1
 47 | 937,0
 48 | 938,0
 49 | 939,0
 50 | 940,1
 51 | 941,1
 52 | 942,0
 53 | 943,0
 54 | 944,1
 55 | 945,1
 56 | 946,0
 57 | 947,0
 58 | 948,0
 59 | 949,0
 60 | 950,0
 61 | 951,1
 62 | 952,0
 63 | 953,0
 64 | 954,0
 65 | 955,1
 66 | 956,0
 67 | 957,1
 68 | 958,1
 69 | 959,0
 70 | 960,0
 71 | 961,1
 72 | 962,1
 73 | 963,0
 74 | 964,1
 75 | 965,0
 76 | 966,1
 77 | 967,0
 78 | 968,0
 79 | 969,1
 80 | 970,0
 81 | 971,1
 82 | 972,0
 83 | 973,0
 84 | 974,0
 85 | 975,0
 86 | 976,0
 87 | 977,0
 88 | 978,1
 89 | 979,1
 90 | 980,1
 91 | 981,0
 92 | 982,1
 93 | 983,0
 94 | 984,1
 95 | 985,0
 96 | 986,0
 97 | 987,0
 98 | 988,1
 99 | 989,0
100 | 990,1
101 | 991,0
102 | 992,1
103 | 993,0
104 | 994,0
105 | 995,0
106 | 996,1
107 | 997,0
108 | 998,0
109 | 999,0
110 | 1000,0
111 | 1001,0
112 | 1002,0
113 | 1003,1
114 | 1004,1
115 | 1005,1
116 | 1006,1
117 | 1007,0
118 | 1008,0
119 | 1009,1
120 | 1010,0
121 | 1011,1
122 | 1012,1
123 | 1013,0
124 | 1014,1
125 | 1015,0
126 | 1016,0
127 | 1017,1
128 | 1018,0
129 | 1019,1
130 | 1020,0
131 | 1021,0
132 | 1022,0
133 | 1023,0
134 | 1024,1
135 | 1025,0
136 | 1026,0
137 | 1027,0
138 | 1028,0
139 | 1029,0
140 | 1030,1
141 | 1031,0
142 | 1032,1
143 | 1033,1
144 | 1034,0
145 | 1035,0
146 | 1036,0
147 | 1037,0
148 | 1038,0
149 | 1039,0
150 | 1040,0
151 | 1041,0
152 | 1042,1
153 | 1043,0
154 | 1044,0
155 | 1045,1
156 | 1046,0
157 | 1047,0
158 | 1048,1
159 | 1049,1
160 | 1050,0
161 | 1051,1
162 | 1052,1
163 | 1053,0
164 | 1054,1
165 | 1055,0
166 | 1056,0
167 | 1057,1
168 | 1058,0
169 | 1059,0
170 | 1060,1
171 | 1061,1
172 | 1062,0
173 | 1063,0
174 | 1064,0
175 | 1065,0
176 | 1066,0
177 | 1067,1
178 | 1068,1
179 | 1069,0
180 | 1070,1
181 | 1071,1
182 | 1072,0
183 | 1073,0
184 | 1074,1
185 | 1075,0
186 | 1076,1
187 | 1077,0
188 | 1078,1
189 | 1079,0
190 | 1080,1
191 | 1081,0
192 | 1082,0
193 | 1083,0
194 | 1084,0
195 | 1085,0
196 | 1086,0
197 | 1087,0
198 | 1088,0
199 | 1089,1
200 | 1090,0
201 | 1091,1
202 | 1092,1
203 | 1093,0
204 | 1094,0
205 | 1095,1
206 | 1096,0
207 | 1097,0
208 | 1098,1
209 | 1099,0
210 | 1100,1
211 | 1101,0
212 | 1102,0
213 | 1103,0
214 | 1104,0
215 | 1105,1
216 | 1106,1
217 | 1107,0
218 | 1108,1
219 | 1109,0
220 | 1110,1
221 | 1111,0
222 | 1112,1
223 | 1113,0
224 | 1114,1
225 | 1115,0
226 | 1116,1
227 | 1117,1
228 | 1118,0
229 | 1119,1
230 | 1120,0
231 | 1121,0
232 | 1122,0
233 | 1123,1
234 | 1124,0
235 | 1125,0
236 | 1126,0
237 | 1127,0
238 | 1128,0
239 | 1129,0
240 | 1130,1
241 | 1131,1
242 | 1132,1
243 | 1133,1
244 | 1134,0
245 | 1135,0
246 | 1136,0
247 | 1137,0
248 | 1138,1
249 | 1139,0
250 | 1140,1
251 | 1141,1
252 | 1142,1
253 | 1143,0
254 | 1144,0
255 | 1145,0
256 | 1146,0
257 | 1147,0
258 | 1148,0
259 | 1149,0
260 | 1150,1
261 | 1151,0
262 | 1152,0
263 | 1153,0
264 | 1154,1
265 | 1155,1
266 | 1156,0
267 | 1157,0
268 | 1158,0
269 | 1159,0
270 | 1160,1
271 | 1161,0
272 | 1162,0
273 | 1163,0
274 | 1164,1
275 | 1165,1
276 | 1166,0
277 | 1167,1
278 | 1168,0
279 | 1169,0
280 | 1170,0
281 | 1171,0
282 | 1172,1
283 | 1173,0
284 | 1174,1
285 | 1175,1
286 | 1176,1
287 | 1177,0
288 | 1178,0
289 | 1179,0
290 | 1180,0
291 | 1181,0
292 | 1182,0
293 | 1183,1
294 | 1184,0
295 | 1185,0
296 | 1186,0
297 | 1187,0
298 | 1188,1
299 | 1189,0
300 | 1190,0
301 | 1191,0
302 | 1192,0
303 | 1193,0
304 | 1194,0
305 | 1195,0
306 | 1196,1
307 | 1197,1
308 | 1198,0
309 | 1199,0
310 | 1200,0
311 | 1201,1
312 | 1202,0
313 | 1203,0
314 | 1204,0
315 | 1205,1
316 | 1206,1
317 | 1207,1
318 | 1208,0
319 | 1209,0
320 | 1210,0
321 | 1211,0
322 | 1212,0
323 | 1213,0
324 | 1214,0
325 | 1215,0
326 | 1216,1
327 | 1217,0
328 | 1218,1
329 | 1219,0
330 | 1220,0
331 | 1221,0
332 | 1222,1
333 | 1223,0
334 | 1224,0
335 | 1225,1
336 | 1226,0
337 | 1227,0
338 | 1228,0
339 | 1229,0
340 | 1230,0
341 | 1231,0
342 | 1232,0
343 | 1233,0
344 | 1234,0
345 | 1235,1
346 | 1236,0
347 | 1237,1
348 | 1238,0
349 | 1239,1
350 | 1240,0
351 | 1241,1
352 | 1242,1
353 | 1243,0
354 | 1244,0
355 | 1245,0
356 | 1246,1
357 | 1247,0
358 | 1248,1
359 | 1249,0
360 | 1250,0
361 | 1251,1
362 | 1252,0
363 | 1253,1
364 | 1254,1
365 | 1255,0
366 | 1256,1
367 | 1257,1
368 | 1258,0
369 | 1259,1
370 | 1260,1
371 | 1261,0
372 | 1262,0
373 | 1263,1
374 | 1264,0
375 | 1265,0
376 | 1266,1
377 | 1267,1
378 | 1268,1
379 | 1269,0
380 | 1270,0
381 | 1271,0
382 | 1272,0
383 | 1273,0
384 | 1274,1
385 | 1275,1
386 | 1276,0
387 | 1277,1
388 | 1278,0
389 | 1279,0
390 | 1280,0
391 | 1281,0
392 | 1282,0
393 | 1283,1
394 | 1284,0
395 | 1285,0
396 | 1286,0
397 | 1287,1
398 | 1288,0
399 | 1289,1
400 | 1290,0
401 | 1291,0
402 | 1292,1
403 | 1293,0
404 | 1294,1
405 | 1295,0
406 | 1296,0
407 | 1297,0
408 | 1298,0
409 | 1299,0
410 | 1300,1
411 | 1301,1
412 | 1302,1
413 | 1303,1
414 | 1304,1
415 | 1305,0
416 | 1306,1
417 | 1307,0
418 | 1308,0
419 | 1309,0
420 | 


--------------------------------------------------------------------------------
/903 One-Shot Learning with Memory-Augmented Neural Network/01 Turing Machine.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Turing Machine\n",
  8 |     "\n",
  9 |     "* https://en.wikipedia.org/wiki/Turing_machine\n",
 10 |     "* https://www.cl.cam.ac.uk/projects/raspberrypi/tutorials/turing-machine/one.html"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "### Formal Definition\n",
 18 |     "\n",
 19 |     "$$ M = (Q, R, b, \\Sigma, \\delta, q_0, F)  $$\n",
 20 |     "\n",
 21 |     "* $ Q $: is a finite, non-empty set of states\n",
 22 |     "* $ R $: is a finite non-empty set of tape alphabet symbols\n",
 23 |     "* $ b \\in R $: is the blank symbol (the only symbol allowed to occur on the tape infinitely often at any step during the computation)\n",
 24 |     "* $ \\Sigma \\subseteq R $: is the set of input symbols\n",
 25 |     "* $ \\delta $: the transition function <br> $ (Q/F) * R \\rightarrow Q * R * \\{L, N, R\\} $ <br> L은 왼쪽이동, N은 멈춤, R은 오른쪽이동을 가르킵니다. \n",
 26 |     "* $ q_0 $: 초기 state\n",
 27 |     "* $ F $: the set of final states\n",
 28 |     "\n",
 29 |     "아래는 예제입니다. \n",
 30 |     "\n",
 31 |     "* $ Q = \\{ A, B, C, HALT \\}$: \n",
 32 |     "* $ R = \\{ 0, 1 \\} $\n",
 33 |     "* $ b = 0  $ (blank)\n",
 34 |     "* $ \\Sigma = \\{1\\} $\n",
 35 |     "* $ q_0 = A $ (the initial state)\n",
 36 |     "* $ F = \\{HALT\\} $\n",
 37 |     "* $ \\delta =  $ 아래의 테이블 참조\n",
 38 |     "\n",
 39 |     "| State   | Tape Symbol (read) | Write Instruction | Move Instaruction | Next State | \n",
 40 |     "|:--------|:-------------------|:------------------|:------------------|:-----------|\n",
 41 |     "| State 0 | Blank              | Blank             | left              | State 1    |\n",
 42 |     "| State 0 | 0                  | Write 1           | right             | State 0    |\n",
 43 |     "| State 0 | 1                  | Write             | right             | State 0    |\n",
 44 |     "| State 1 | Blank              | Blank             | halt              | Finish     |\n",
 45 |     "| State 1 | 0                  | Write 1           | left              | State 1    |\n",
 46 |     "| State 1 | 1                  | Wrtie 0           | left              | State 1    |"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "# Turing Machine with Python\n",
 54 |     "\n",
 55 |     "11001110001이라는 값을 Inversion (00110001110으로 변환)시킨후 다시 원래의 값으로 돌아오는 Turing Machine"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 3,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [
 65 |     {
 66 |      "name": "stdout",
 67 |      "output_type": "stream",
 68 |      "text": [
 69 |       "-----------11001110001\n",
 70 |       "----------01001110001-\n",
 71 |       "---------00001110001--\n",
 72 |       "--------00101110001---\n",
 73 |       "-------00111110001----\n",
 74 |       "------00110110001-----\n",
 75 |       "-----00110010001------\n",
 76 |       "----00110000001-------\n",
 77 |       "---00110001001--------\n",
 78 |       "--00110001101---------\n",
 79 |       "-00110001111----------\n",
 80 |       "00110001110-----------\n",
 81 |       "-00110001110----------\n",
 82 |       "--00110001111---------\n",
 83 |       "---00110001101--------\n",
 84 |       "----00110001001-------\n",
 85 |       "-----00110000001------\n",
 86 |       "------00110010001-----\n",
 87 |       "-------00110110001----\n",
 88 |       "--------00111110001---\n",
 89 |       "---------00101110001--\n",
 90 |       "----------00001110001-\n",
 91 |       "-----------01001110001\n",
 92 |       "------------11001110001\n"
 93 |      ]
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "def turning_machine(tape, init_state, t):\n",
 98 |     "    \"\"\"\n",
 99 |     "    @param tape: a strip of tape which contains data like 0 or 1\n",
100 |     "    @param q: initial state\n",
101 |     "    @param gamma: transition function or table\n",
102 |     "    \"\"\"\n",
103 |     "    N = len(tape)\n",
104 |     "    current_state = init_state\n",
105 |     "    idx = 0 \n",
106 |     "    while current_state != 'finish':\n",
107 |     "        \n",
108 |     "        # Visualization\n",
109 |     "        print('-'*(N-idx) + ''.join([str(m) for m in tape]) + '-' * idx)\n",
110 |     "        \n",
111 |     "        # Read\n",
112 |     "        if idx < 0 or idx >= N:\n",
113 |     "            mark = None\n",
114 |     "        else:\n",
115 |     "            mark = tape[idx]\n",
116 |     "        \n",
117 |     "        # Transition\n",
118 |     "        write, action, next_state = t[(current_state, mark)]\n",
119 |     "        \n",
120 |     "        # Write\n",
121 |     "        if idx < N and write is not None:\n",
122 |     "            tape[idx] = write\n",
123 |     "        \n",
124 |     "        if action == 'left':\n",
125 |     "            idx -= 1\n",
126 |     "        elif action == 'right':\n",
127 |     "            idx += 1\n",
128 |     "            \n",
129 |     "        \n",
130 |     "        \n",
131 |     "        current_state = next_state\n",
132 |     "\n",
133 |     "tape = [1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1]\n",
134 |     "t = {\n",
135 |     "    ('state_0', None): (None, 'left', 'state_1'),\n",
136 |     "    ('state_0', 0): (1, 'right', 'state_0'),\n",
137 |     "    ('state_0', 1): (0, 'right', 'state_0'),\n",
138 |     "    ('state_1', None): (None, 'halt', 'finish'),\n",
139 |     "    ('state_1', 0): (1, 'left', 'state_1'),\n",
140 |     "    ('state_1', 1): (0, 'left', 'state_1'),\n",
141 |     "}\n",
142 |     "\n",
143 |     "turning_machine(tape, init_state='state_0', t=t)"
144 |    ]
145 |   }
146 |  ],
147 |  "metadata": {
148 |   "kernelspec": {
149 |    "display_name": "Python 3",
150 |    "language": "python",
151 |    "name": "python3"
152 |   },
153 |   "language_info": {
154 |    "codemirror_mode": {
155 |     "name": "ipython",
156 |     "version": 3
157 |    },
158 |    "file_extension": ".py",
159 |    "mimetype": "text/x-python",
160 |    "name": "python",
161 |    "nbconvert_exporter": "python",
162 |    "pygments_lexer": "ipython3",
163 |    "version": "3.6.0"
164 |   }
165 |  },
166 |  "nbformat": 4,
167 |  "nbformat_minor": 2
168 | }
169 | 


--------------------------------------------------------------------------------
/Keras Tutorial/106 [Layer] Merge Layers/01 Concatenate.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "deletable": true,
  7 |     "editable": true
  8 |    },
  9 |    "source": [
 10 |     "# concatenate"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {
 17 |     "collapsed": false,
 18 |     "deletable": true,
 19 |     "editable": true
 20 |    },
 21 |    "outputs": [
 22 |     {
 23 |      "name": "stderr",
 24 |      "output_type": "stream",
 25 |      "text": [
 26 |       "Using TensorFlow backend.\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "import numpy as np\n",
 32 |     "\n",
 33 |     "from keras.models import Model, Input\n",
 34 |     "from keras.layers import Dense, concatenate\n",
 35 |     "from keras.layers.merge import Concatenate"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {
 41 |     "deletable": true,
 42 |     "editable": true
 43 |    },
 44 |    "source": [
 45 |     "## Example 01\n",
 46 |     "\n",
 47 |     "2개의 데이터는 모두 시계열성 (Batch, Sequence, Data) 데이터 이지만, shape이 서로 다릅니다. <br>\n",
 48 |     "이때 concatenate를 했을때 문제없이 작동합니다."
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 2,
 54 |    "metadata": {
 55 |     "collapsed": false,
 56 |     "deletable": true,
 57 |     "editable": true
 58 |    },
 59 |    "outputs": [
 60 |     {
 61 |      "name": "stdout",
 62 |      "output_type": "stream",
 63 |      "text": [
 64 |       "layer1 shape: (?, ?, 1)\n",
 65 |       "layer2 shape: (?, ?, 3)\n"
 66 |      ]
 67 |     },
 68 |     {
 69 |      "data": {
 70 |       "text/plain": [
 71 |        "array([[[ 0.46914026],\n",
 72 |        "        [ 0.43851474],\n",
 73 |        "        [ 0.40835062]],\n",
 74 |        "\n",
 75 |        "       [[ 0.35991007],\n",
 76 |        "        [ 0.33195624],\n",
 77 |        "        [ 0.30513856]]], dtype=float32)"
 78 |       ]
 79 |      },
 80 |      "execution_count": 2,
 81 |      "metadata": {},
 82 |      "output_type": "execute_result"
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "a = np.array([[[1], [2], [3]], \n",
 87 |     "              [[4], [5], [6]]])\n",
 88 |     "b = np.array([[[0, 0, 0], [0, 0, 0], [0, 0, 0]], \n",
 89 |     "              [[1, 1, 1], [1, 1, 1], [1, 1, 1]]])\n",
 90 |     "\n",
 91 |     "input1 = Input(shape=(None, 1), name='input1')\n",
 92 |     "input2 = Input(shape=(None, 3), name='input2')\n",
 93 |     "\n",
 94 |     "layer1 = Dense(1)(input1)\n",
 95 |     "layer2 = Dense(3)(input2)\n",
 96 |     "print('layer1 shape:', layer1.shape)\n",
 97 |     "print('layer2 shape:', layer2.shape)\n",
 98 |     "\n",
 99 |     "merged_vector = concatenate([layer1, layer2], axis=-1)\n",
100 |     "predictions = Dense(1, activation='sigmoid')(merged_vector)\n",
101 |     "\n",
102 |     "model = Model(inputs=[input1, input2], outputs=predictions)\n",
103 |     "model.compile(optimizer='rmsprop', loss='mean_squared_error')\n",
104 |     "\n",
105 |     "model.predict([a, b])"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {
111 |     "deletable": true,
112 |     "editable": true
113 |    },
114 |    "source": [
115 |     "## Example 02\n",
116 |     "\n",
117 |     "시계열 shape이 서로 다른 경우 입니다. <br>\n",
118 |     "예를 들어서 (None, 3, 1) 그리고 (None, 2, 1) 을 Time Sequence 부분을 기준으로 concatenate하면 다음과 같이 됩니다. \n",
119 |     "\n",
120 |     "```\n",
121 |     ">> a = [[[a], [a], [a]]\n",
122 |     ">>      [[d], [e], [f]]]\n",
123 |     ">> b = [[[z], [z]],\n",
124 |     ">>      [[x], [x]]]\n",
125 |     ">>      \n",
126 |     ">> result = concatenate([a, b], axis=1)\n",
127 |     "\n",
128 |     "[[[a], [a], [a], [z], [z]], \n",
129 |     " [[d], [e], [f], [x], [x]]]\n",
130 |     "\n",
131 |     "```"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 13,
137 |    "metadata": {
138 |     "collapsed": false,
139 |     "deletable": true,
140 |     "editable": true
141 |    },
142 |    "outputs": [
143 |     {
144 |      "name": "stdout",
145 |      "output_type": "stream",
146 |      "text": [
147 |       "layer1 shape: (?, ?, 1)\n",
148 |       "layer2 shape: (?, ?, 1)\n"
149 |      ]
150 |     },
151 |     {
152 |      "data": {
153 |       "text/plain": [
154 |        "array([[[ 0.99980801],\n",
155 |        "        [ 0.99980801],\n",
156 |        "        [ 0.99980801],\n",
157 |        "        [ 0.5       ],\n",
158 |        "        [ 0.5       ]],\n",
159 |        "\n",
160 |        "       [[ 0.84703767],\n",
161 |        "        [ 0.99980801],\n",
162 |        "        [ 0.84703767],\n",
163 |        "        [ 0.15703523],\n",
164 |        "        [ 0.15703523]]], dtype=float32)"
165 |       ]
166 |      },
167 |      "execution_count": 13,
168 |      "metadata": {},
169 |      "output_type": "execute_result"
170 |     }
171 |    ],
172 |    "source": [
173 |     "a = np.array([[[5], [5], [5]], \n",
174 |     "              [[1], [5], [1]]])\n",
175 |     "b = np.array([[[0], [0]], \n",
176 |     "              [[1], [1]]])\n",
177 |     "\n",
178 |     "input1 = Input(shape=(None, 1), name='input1')\n",
179 |     "input2 = Input(shape=(None, 1), name='input2')\n",
180 |     "\n",
181 |     "layer1 = Dense(1)(input1)\n",
182 |     "layer2 = Dense(1)(input2)\n",
183 |     "print('layer1 shape:', layer1.shape)\n",
184 |     "print('layer2 shape:', layer2.shape)\n",
185 |     "\n",
186 |     "merged_vector = concatenate([layer1, layer2], axis=1)\n",
187 |     "predictions = Dense(1, activation='sigmoid')(merged_vector)\n",
188 |     "\n",
189 |     "model = Model(inputs=[input1, input2], outputs=predictions)\n",
190 |     "model.compile(optimizer='rmsprop', loss='mean_squared_error')\n",
191 |     "\n",
192 |     "model.predict([a, b])"
193 |    ]
194 |   }
195 |  ],
196 |  "metadata": {
197 |   "kernelspec": {
198 |    "display_name": "Python 3",
199 |    "language": "python",
200 |    "name": "python3"
201 |   },
202 |   "language_info": {
203 |    "codemirror_mode": {
204 |     "name": "ipython",
205 |     "version": 3
206 |    },
207 |    "file_extension": ".py",
208 |    "mimetype": "text/x-python",
209 |    "name": "python",
210 |    "nbconvert_exporter": "python",
211 |    "pygments_lexer": "ipython3",
212 |    "version": "3.6.0"
213 |   }
214 |  },
215 |  "nbformat": 4,
216 |  "nbformat_minor": 2
217 | }
218 | 


--------------------------------------------------------------------------------
/001 Performance Test (ROC, AUC, Confusion Matrix)/01. Performance Test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Confusion Matrix \n",
  8 |     "\n",
  9 |     "Confusion Matrix는 일반적으로 classification model이 테스트 데이터에 대한 performance를 측정하는데 사용됩니다. (ground-truth values를 알고 있는 상태) <br>\n",
 10 |     "confusion matrix는 상대적으로 꽤 쉽게 이해할수 있기 때문에 많이 사용됩니다. 다만 용어가 매우 혼돈스럽기 때문에 주의?가 필요합니다."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "## Binary Classification\n",
 18 |     "\n",
 19 |     "아래의 matrix는 165명의 환자를 대상으로 질병이 있는지 없는지 예측한 값과, 실제 값을 나타낸 것 입니다.\n",
 20 |     "\n",
 21 |     "![Confusion Matrix](images/confusion_matrix_simple2.png)\n",
 22 |     "\n",
 23 |     "\n",
 24 |     "* **True <span style=\"color:red\">Positives</span> (TP)**: 질병이 <span style=\"color:red\">있다고 예측</span>했고 실제로 질병이 있는 경우 (있는게 맞어)\n",
 25 |     "* **True <span style=\"color:red\">Negatives</span> (TN)**: 질병이 <span style=\"color:red\">없다고 예측</span>했고, 실제로 질병이 없는 경우 (없는게 맞어)\n",
 26 |     "* **False <span style=\"color:red\">Positives</span> (FP)**: 질병이 <span style=\"color:red\">있다고 예측</span>했지만, 실제로는 질병이 없음 (있는게 틀려)\n",
 27 |     "* **False <span style=\"color:red\">Negatives</span> (FN)**: 질병이 <span style=\"color:red\">없다고 예측</span>했지만, 실제로는 질병이 있음 (없는게 틀려)\n",
 28 |     "\n",
 29 |     "> Positives, Negatives는 예측한 값을 의미하고, True, False는 그 예측한 값이 맞냐 틀리냐를 말하는 상대적 개념"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "# Performance Measures\n",
 37 |     "\n",
 38 |     "일반적으로 classification에서 accuracy를 많이 보지만, 실무에서는 이것만 보지는 않습니다. <br>\n",
 39 |     "아래와 같은 analysis들을 보면서 해석을 합니다.\n",
 40 |     "\n",
 41 |     "자세한 내용은 [위키피디아 Confusion Matrix](https://en.wikipedia.org/wiki/Confusion_matrix)를 참고 합니다.\n",
 42 |     "\n",
 43 |     "## Accuracy \n",
 44 |     "\n",
 45 |     "* 전체 샘플중에 실제로 맞춘 비율\n",
 46 |     "* 가장 많이 사용되지만, class간의 비율이 동일할때 사용합니다.\n",
 47 |     "* 최적화에서 objective function으로 사용됩니다.\n",
 48 |     "\n",
 49 |     "$$ \\text{Accuracy} = \\frac{TP + TN}{N} = \\frac{100 + 50}{165} = 0.91  $$\n",
 50 |     "\n",
 51 |     "## Error Rate (Misclassification Rate)\n",
 52 |     "\n",
 53 |     "* 1 - accuracy \n",
 54 |     "* 얼마나 못 맞췄냐?\n",
 55 |     "\n",
 56 |     "$$ \\text{Misclassification Rate} = \\frac{FP + FN}{N} = \\frac{10 + 5}{165} = 0.09 $$\n",
 57 |     "\n",
 58 |     "\n",
 59 |     "## Recall (Sensitivity, True Positive Rate)\n",
 60 |     "\n",
 61 |     "* 실제 암에 걸린 환자들중 제대로 암이라고 판단한 비율\n",
 62 |     "* 실제 사기범들중에 유죄 판정을 내린 비율\n",
 63 |     "* 사기친 거래들중에 실제로 잡아낸 비율 -> 검거율!\n",
 64 |     "* 값이 높을수록 좋다\n",
 65 |     "\n",
 66 |     "$$ \\text{True Positive Rate} = \\frac{TP}{TP + FN} = \\frac{TP}{\\text{Actual Yes}} = \\frac{100}{100 + 5} = 0.95 $$\n",
 67 |     "\n",
 68 |     "\n",
 69 |     "## Fall-out  (False Alarm Ratio, False Positive Rate)\n",
 70 |     " \n",
 71 |     "* 실제로는 암이 아닌데 암이라고 말하는 비율\n",
 72 |     "* 실제로는 유죄가 아닌데 유죄라고 판결하는 비율\n",
 73 |     "* 실제로는 임신이 아닌데 임신이라고 오작동 하는 비율\n",
 74 |     "* 실제 정상거래들중, 사기라고 예측한 비율\n",
 75 |     "* 오판율\n",
 76 |     "* 1 - Specificity\n",
 77 |     "* 값이 높을수록 병신갖은 예측/판단을 한거다\n",
 78 |     "\n",
 79 |     "$$ \\text{True Positive Rate} = \\frac{FP}{FP + TN} = \\frac{FP}{\\text{Actual No}} =  \\frac{10}{10 + 50} = 0.17 $$\n",
 80 |     "\n",
 81 |     "\n",
 82 |     "## Specificity\n",
 83 |     "\n",
 84 |     "* 실제 아닌데, 예측도 아니라고 한 비율\n",
 85 |     "* 암이라고 판단했는데.. 실제로 맞은 비율\n",
 86 |     "* 유죄라고 판단했는데.. 실제로 맞은 비율\n",
 87 |     "* 값이 높을수록 좋다\n",
 88 |     "* 1 - False Positive Rate\n",
 89 |     "\n",
 90 |     "$$ \\text{Specificity} = \\frac{TN}{TN + FP} =  \\frac{TN}{\\text{Actual No}} = \\frac{50}{50 + 10} = 0.83 $$\n",
 91 |     "\n",
 92 |     "\n",
 93 |     "## Precision \n",
 94 |     "\n",
 95 |     "* 질병이 있다고 예측한 것중에 실제로 맞춘 비율 -> 값이 낮을 수록 암에 걸렸다고 진단했는데.. 실제로는 아닌 사람들이 있다. \n",
 96 |     "* 사기 거래에서 실제 사기를 제대로 잡아낸 비율 -> 값이 낮을 수록 무죄인 사람이 유죄로 잡혀 들어간 꼴이다.\n",
 97 |     "\n",
 98 |     "$$ \\text{Precision} = \\frac{TP}{TP + FP} = \\frac{TP}{\\text{Predicted Yes}} =  \\frac{100}{100 + 10} = 0.91 $$\n",
 99 |     "\n"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "#  ROC (Receiver Operating Characteristics)\n",
107 |     "\n",
108 |     "이름이 참 이상합니다. Receiver Operating Characteristics 라니.. (직역하면.. 수신기 작동 특성?) <br>\n",
109 |     "2차 대전때, 진주만 습격 이후로, 미군은 일본 비행기를 감지하는 레이더 시그널에 대해서 연구를 하기 시작합니다. <br>\n",
110 |     "레이더 수신기 장비 (Radar receiver operators)의 성능을 측정하기 위해서 사용한 방법은 Receiver Operating Characteristics 입니다.<br>\n",
111 |     "결론적으로 일본 전투기를 제대로 감지하는지 레이더의 성능을 측정하기 위한 방법으로 생겨났고.. 그래서 이름도 이렇게 됨. \n",
112 |     "\n",
113 |     "\n",
114 |     "ROC curve 그래프는 세로축을 True Positive Rate (Sensitivity or Recall) 로 하고, 가로축을 False Positive Rate 으로 시각화한 그래프로서 각각의 classification thresholds마다 TPR VS FPR 을 계산한 그래프입니다. 중간의 직선은 reference line 입니다.\n",
115 |     "\n",
116 |     "![ROC Curve](images/roc-example1.png)\n",
117 |     "\n",
118 |     "\n",
119 |     "보는 방법은 매우 간단합니다. TPR이 높고, FPR은 낮을수록 좋은거 입니다.<br>\n",
120 |     "\n",
121 |     "> TPR과 FPR은 서로 반비례적인 관계에 있습니다. "
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "## AUC (Area Under the ROC Curve)\n",
129 |     "\n",
130 |     "ROC curve의 밑면적을 계산한 값입니다. \n",
131 |     "\n",
132 |     "![AUC](images/auc-example.png)\n"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "metadata": {},
138 |    "source": [
139 |     "# References\n",
140 |     "\n",
141 |     "* http://www.dataschool.io/simple-guide-to-confusion-matrix-terminology/"
142 |    ]
143 |   }
144 |  ],
145 |  "metadata": {
146 |   "kernelspec": {
147 |    "display_name": "Python 3 (ipykernel)",
148 |    "language": "python",
149 |    "name": "python3"
150 |   },
151 |   "language_info": {
152 |    "codemirror_mode": {
153 |     "name": "ipython",
154 |     "version": 3
155 |    },
156 |    "file_extension": ".py",
157 |    "mimetype": "text/x-python",
158 |    "name": "python",
159 |    "nbconvert_exporter": "python",
160 |    "pygments_lexer": "ipython3",
161 |    "version": "3.8.10"
162 |   },
163 |   "toc": {
164 |    "base_numbering": 1,
165 |    "nav_menu": {},
166 |    "number_sections": true,
167 |    "sideBar": true,
168 |    "skip_h1_title": false,
169 |    "title_cell": "Table of Contents",
170 |    "title_sidebar": "Contents",
171 |    "toc_cell": false,
172 |    "toc_position": {},
173 |    "toc_section_display": true,
174 |    "toc_window_display": false
175 |   }
176 |  },
177 |  "nbformat": 4,
178 |  "nbformat_minor": 2
179 | }
180 | 


--------------------------------------------------------------------------------
/902 Deep Reinforcement Learning/02 Deep Reinforcement Learning Part 2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img src=\"images/google-breakout.jpg\" class=\"img-responsive img-rounded\" style=\"width:100%\">\n",
  8 |     "Part 2 에서는 DeepMind 팀에서 내놓은 Playing Atari with Deep Reinforcement Learning 논문을 해부할 것입니다."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 14,
 14 |    "metadata": {
 15 |     "collapsed": false
 16 |    },
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import pylab\n",
 20 |     "import numpy as np"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "### 1. Introduction\n",
 28 |     "\n",
 29 |     "이미지 또는 음성등에서 바로 Agent를 학습시키는 것은 RL (Reinforcement Learning)에서 오래된 챌린지중의 하나입니다.<br>\n",
 30 |     "이전의 RL방식들은 손으로 집접 만든 features들이나 policy등을 통해서 성공할수 있었지만 이 경우 특정 문제를 해결하는데만 최적화가 되어 있어 <br>\n",
 31 |     "같은 방식으로 다른 문제들을 해결하기에는 어려운 점들이 많습니다.\n",
 32 |     "\n",
 33 |     "최근 Deep Learning의 발전들은 raw sensory data (이미지등)에서 high-level features들을 뽑아내는게 가능하게 만들었고,<br>\n",
 34 |     "이는 Convolutional networks, Multiplayer Perceptrons, restricted Boltzmann machines 그리고 \n",
 35 |     "Recurrent Neural networks와 같은  컴퓨터 비전[11, 22, 16] 그리고 음성인식 [6, 7]에서의 비약적인 발전으로 이어졌습니다. \n",
 36 |     "\n",
 37 |     "하지만 Reinforcement Learning 은 deep learning의 관점에서 볼때 여러 챌린지들을 갖고 있습니다.<br>\n",
 38 |     "첫번째로 성공적인 deep learning applications들은 수작업한 엄청나게 많은 데이터를 통해서 학습됩니다. \n",
 39 |     "하지만 RL 알고리즘은 그와는 반대로 scalar reward signal을 통해서 배워야만 하며, \n",
 40 |     "이 reward는 매우 적게 분포하고 있으며 (frequently sparse), delayed 된 경우가 많습니다.  \n",
 41 |     "실질적으로 delay는 actions과 resulting rewards 사이에 수천 timesteps이 존재할정도로 거대합니다. \n",
 42 |     "이는 기존의 input과 targets이 direct로 연견될것과 비교해볼수 있습니다.<br>\n",
 43 |     "\n",
 44 |     "다른 이슈는 기존의 대부분의 deep learning이 모든 samples들이 independent 하다고 여깁니다.<br> \n",
 45 |     "하지만 Deep Reinforcement Learning 에서는 매우 연관성이 높은 (correrated) states의 sequences를 만나게 될 일이 많습니다."
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {
 51 |     "collapsed": true
 52 |    },
 53 |    "source": [
 54 |     "### Background\n",
 55 |     "\n",
 56 |     "\n",
 57 |     "environment $ \\epsilon $에 해당하는 Atari emulator안에서 Agent는 일련의 actions, observations, 그리고 rewards등을 받습니다.<br>\n",
 58 |     "각각의 time-step마다 Agent는 게임안에서 허용된 $ A = \\{1, ..., K\\} $ actions들로 부터 하나의 $ a_{t} $ (action)을 취하게 됩니다.<br>\n",
 59 |     "Action은 environment (Atari Emulator)안으로 들어가게 되고, Game state, score가 변하게 됩니다. \n",
 60 |     "Emulator안의 내부 state자체를 Agent가 얻는것이 아니라 (예를 들어서 공 object의 위치나, paddle object의 위치 등등) 이미지 자체를 $ x_t \\in \\Bbb{R}^d $\n",
 61 |     "(x 값이라는 것은.. input data로 사용된다는 뜻이고 x_t 라는건 어느 시점 (time)의 input image data를 말함) \n",
 62 |     "\n",
 63 |     "이미지 한장을 보고서 Agent가 어떤 상황인지 알아내는 것은 불가능합니다.<br>\n",
 64 |     "따라서 일련의 actions 그리고 화면이미지들 (observations) $ s_t= x_1,a_1,x_2,...,a_{t-1},x_t, $ 을 통해서 학습을 합니다.<br>\n",
 65 |     "모든 게임은 한정된 time-steps를 지나고 끝나게 됩니다. 따라서 MDPs (Markov Decision Process)를 사용해서 문제를 해결할 수 있습니다.\n",
 66 |     "\n",
 67 |     "\n",
 68 |     "| Name | Math Symbol | Description |\n",
 69 |     "|:-----|:------------|:------------|\n",
 70 |     "| Environment | $$ \\varepsilon $$ | Atari Emulator 를 뜻하며 Agent는 environment로 부터 actions, observations, rewards등을 주거니 받거니함<br>일반적으로  stochastic. |\n",
 71 |     "| Action      | $$ a_t $$ | 특정 시점의 action을 말하며, 게임안에서 허용된 $$ A = \\{1, ..., K\\} $$ 중에 하나를 사용함 |\n",
 72 |     "| Image (screen shot) | $$ x_t \\in \\Bbb{R}^d $$ | 현재시점의 화면을 나타내는 이미지 |\n",
 73 |     "| Reward | $$ r_t $$ | Reward는 이전 **전체** actions 그리고 observations과 연관이 있음. <br>즉 하나의 action에 대한 feedback을 받으려면 일반적으로 수천번의 time-steps이 지나간 이후 받을수 있음  |   \n",
 74 |     "\n",
 75 |     "\n"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "**Agent의 목표**는 미래에 받게될 rewards의 양을 최대한 늘리는 방향으로 actions을 선택하는 것입니다.<br>\n",
 83 |     "미래에 받게될 보상은 time-step 마다 a factor of $ \\gamma $ 만큼 줄어든다고 가정을 합니다. (미래는 불확실하기 때문에) <br>\n",
 84 |     "특정시점 $ t $ 에 받게될 future discounted return은 다음과 같이 정의 합니다.\n",
 85 |     "\n",
 86 |     "$$ R_t = \\sum^{T}_{t^{\\prime} = t} \\gamma^{t^{\\prime} - t} r $$\n",
 87 |     "\n",
 88 |     "| Name | Math Symbol | Description |\n",
 89 |     "|:-----|:------------|:------------|\n",
 90 |     "| 종료 시점 | $ T $ | 게임이 종료되는 시점 |\n",
 91 |     "| 현재 시점 | $ t $ | 계산을 하려는 시점 (현재 시점) |\n"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 28,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [
101 |     {
102 |      "name": "stdout",
103 |      "output_type": "stream",
104 |      "text": [
105 |       "100 = 0.9^0 * 100\n",
106 |       "190 = 0.9^1 * 100\n",
107 |       "271 = 0.9^2 * 100\n",
108 |       "343 = 0.9^3 * 100\n",
109 |       "409 = 0.9^4 * 100\n",
110 |       "468 = 0.9^5 * 100\n"
111 |      ]
112 |     },
113 |     {
114 |      "data": {
115 |       "text/plain": [
116 |        "468.55899999999997"
117 |       ]
118 |      },
119 |      "execution_count": 28,
120 |      "metadata": {},
121 |      "output_type": "execute_result"
122 |     }
123 |    ],
124 |    "source": [
125 |     "# 예를 들어서 도박으로 매달 100만원 정도씩 벌 수 있는 친구가 있습니다. \n",
126 |     "# discounted future return으로 6개월 동안 도박을 했을때 벌어들일수 있는 전체 수익은 얼마일까요?\n",
127 |     "# 이때 이 친구의 실력이 꽤나 뛰어나서 계속 100만원을 벌수 있는 확률을 90%정도로 보겠습니다. \n",
128 |     "\n",
129 |     "def discounted_future_return(gamma, reward, T):\n",
130 |     "    net_reward = 0\n",
131 |     "    for t in xrange(T):\n",
132 |     "        net_reward += gamma**t * reward\n",
133 |     "        print '%d = %.1f^%d * %d' % (net_reward, gamma, t, reward)\n",
134 |     "    return net_reward\n",
135 |     "        \n",
136 |     "discounted_future_return(gamma=0.9, reward=100, T=6)"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "### Optimal action-value function\n",
144 |     "\n",
145 |     "$$ Q * (s,a) = max_{\\pi} $$"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "### References \n",
153 |     "\n",
154 |     "* [Playing Atari with Deep Reinforcement Learning](https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf)\n",
155 |     "* [An Introduction to Markov Decision Processes - Bellman Equations 참고](http://www.cs.rice.edu/~vardi/dag01/givan1.pdf)"
156 |    ]
157 |   }
158 |  ],
159 |  "metadata": {
160 |   "kernelspec": {
161 |    "display_name": "Python 2",
162 |    "language": "python",
163 |    "name": "python2"
164 |   },
165 |   "language_info": {
166 |    "codemirror_mode": {
167 |     "name": "ipython",
168 |     "version": 2
169 |    },
170 |    "file_extension": ".py",
171 |    "mimetype": "text/x-python",
172 |    "name": "python",
173 |    "nbconvert_exporter": "python",
174 |    "pygments_lexer": "ipython2",
175 |    "version": "2.7.12"
176 |   }
177 |  },
178 |  "nbformat": 4,
179 |  "nbformat_minor": 1
180 | }
181 | 


--------------------------------------------------------------------------------
/203 Decision Tree/06 Titanic Decision Tree with Python (in Development).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 82,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stdout",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "Populating the interactive namespace from numpy and matplotlib\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "%pylab inline\n",
 20 |     "import pandas as pd\n",
 21 |     "import numpy as np\n",
 22 |     "from statsmodels.tools import categorical"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "# Data\n",
 30 |     "\n",
 31 |     "\n",
 32 |     "[Kaggle Titanic Data](https://www.kaggle.com/c/titanic/data)에서 자세한 데이터 정보를 얻을수 있습니다."
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 168,
 38 |    "metadata": {
 39 |     "collapsed": false
 40 |    },
 41 |    "outputs": [
 42 |     {
 43 |      "name": "stdout",
 44 |      "output_type": "stream",
 45 |      "text": [
 46 |       "[[  0.       0.      22.     ...,   0.       7.25     0.    ]\n",
 47 |       " [  1.       1.      38.     ...,   0.      71.2833   1.    ]\n",
 48 |       " [  0.       1.      26.     ...,   0.       7.925    0.    ]\n",
 49 |       " ..., \n",
 50 |       " [  0.       1.          nan ...,   2.      23.45     0.    ]\n",
 51 |       " [  1.       0.      26.     ...,   0.      30.       1.    ]\n",
 52 |       " [  0.       0.      32.     ...,   0.       7.75     2.    ]]\n",
 53 |       "[Training Data]\n",
 54 |       "   Survived  Pclass  Sex   Age  SibSp  Parch     Fare  Embarked\n",
 55 |       "0         0       0    0  22.0      1      0   7.2500         0\n",
 56 |       "1         1       1    1  38.0      1      0  71.2833         1\n",
 57 |       "2         1       0    1  26.0      0      0   7.9250         0\n",
 58 |       "3         1       1    1  35.0      1      0  53.1000         0\n",
 59 |       "4         0       0    0  35.0      0      0   8.0500         0\n",
 60 |       "\n",
 61 |       "[Test Data]\n",
 62 |       "Survived      int64\n",
 63 |       "Pclass        int64\n",
 64 |       "Sex           int64\n",
 65 |       "Age         float64\n",
 66 |       "SibSp         int64\n",
 67 |       "Parch         int64\n",
 68 |       "Fare        float64\n",
 69 |       "Embarked      int64\n",
 70 |       "dtype: object\n"
 71 |      ]
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "# Train Data\n",
 76 |     "train = pd.read_csv('../../data/titanic/train.csv', usecols=(1, 2, 4, 5, 6, 7, 9, 11))\n",
 77 |     "train['Survived'] = train['Survived'].factorize()[0]\n",
 78 |     "train['Pclass'] = train['Pclass'].factorize()[0]\n",
 79 |     "train['Sex'] = train['Sex'].factorize()[0]\n",
 80 |     "train['Embarked'] = train['Embarked'].factorize()[0]\n",
 81 |     "\n",
 82 |     "train_x = train.drop('Survived', axis=1).as_matrix()\n",
 83 |     "train_y = train['Survived'].as_matrix()\n",
 84 |     "\n",
 85 |     "\n",
 86 |     "# Test Data\n",
 87 |     "test = pd.read_csv('../../data/titanic/test.csv', usecols=(0, 1, 3, 4, 5, 6, 8, 10))\n",
 88 |     "test = pd.merge(test, pd.read_csv('../../data/titanic/gendermodel.csv'), on='PassengerId')\n",
 89 |     "test = test.drop('PassengerId', axis=1)\n",
 90 |     "\n",
 91 |     "test['Survived'] = test['Survived'].astype('category')\n",
 92 |     "test['Pclass'] = test['Pclass'].astype('category')\n",
 93 |     "test['Sex'] = test['Sex'].astype('category')\n",
 94 |     "test['Embarked'] = test['Embarked'].astype('category')\n",
 95 |     "\n",
 96 |     "test_x = test.drop('Survived', axis=1).as_matrix()\n",
 97 |     "test_y = test['Survived'].as_matrix()\n",
 98 |     "\n",
 99 |     "print '[Training Data]'\n",
100 |     "print train.head()\n",
101 |     "print\n",
102 |     "\n",
103 |     "print '[Test Data]'\n",
104 |     "print train.dtypes"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 211,
110 |    "metadata": {
111 |     "collapsed": false
112 |    },
113 |    "outputs": [
114 |     {
115 |      "name": "stdout",
116 |      "output_type": "stream",
117 |      "text": [
118 |       "[ True  True False  True  True False  True]\n",
119 |       "[ True  True False  True  True False  True]\n"
120 |      ]
121 |     }
122 |    ],
123 |    "source": [
124 |     "d = np.nanstd(train_x, axis=0)\n",
125 |     "N, C = train_x.shape\n",
126 |     "x = [len(np.unique(train_x[:, i]))/float(N) for i in xrange(C)]\n",
127 |     "print x < np.mean(x)\n",
128 |     "print d < 3"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "# Decision Tree with Python"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 249,
141 |    "metadata": {
142 |     "collapsed": false
143 |    },
144 |    "outputs": [
145 |     {
146 |      "name": "stdout",
147 |      "output_type": "stream",
148 |      "text": [
149 |       "0.448933782267\n",
150 |       "0.35241301908\n",
151 |       "0.317620650954\n",
152 |       "0.239057239057\n",
153 |       "0.27721661055\n"
154 |      ]
155 |     }
156 |    ],
157 |    "source": [
158 |     "class DecisionTree(object):\n",
159 |     "    \n",
160 |     "    def __init__(self, impurity='error'):\n",
161 |     "        assert impurity in ('entropy', 'gini', 'error')\n",
162 |     "        self.impurity = getattr(self, impurity)\n",
163 |     "        \n",
164 |     "    def prepare_data(self, data):\n",
165 |     "        self.categories = np.nanstd(data, axis=0) < 3\n",
166 |     "    \n",
167 |     "    def train(self, X, Y):\n",
168 |     "        self.prepare_data(X)\n",
169 |     "        \n",
170 |     "        C = X.shape[1]\n",
171 |     "        for i, is_categorical in enumerate(self.categories):\n",
172 |     "            if is_categorical:\n",
173 |     "                self.information_gain(X[:, i], X[:, i], Y)\n",
174 |     "            \n",
175 |     "        \n",
176 |     "    def information_gain(self, p, x, y):\n",
177 |     "        Dp = self.impurity(p)\n",
178 |     "        \n",
179 |     "    def entropy(self, p):\n",
180 |     "        return -(p*np.log2(p) + (1-p)*np.log2(1-p))\n",
181 |     "    \n",
182 |     "    def gini(self, p):\n",
183 |     "        return 1 - (p**2 + (1-p)**2)\n",
184 |     "    \n",
185 |     "    def error(self, data):\n",
186 |     "        shapes = [float(data[data == c].size) for c in np.unique(data)]\n",
187 |     "        shapes = shapes/np.sum(shapes)\n",
188 |     "        return 1 - np.max(shapes)\n",
189 |     "        \n",
190 |     "        \n",
191 |     "#         return 1 - np.max([p, 1-p], axis=0)\n",
192 |     "    \n",
193 |     "dt = DecisionTree()   \n",
194 |     "dt.train(train_x, train_y)"
195 |    ]
196 |   }
197 |  ],
198 |  "metadata": {
199 |   "kernelspec": {
200 |    "display_name": "Python 2",
201 |    "language": "python",
202 |    "name": "python2"
203 |   },
204 |   "language_info": {
205 |    "codemirror_mode": {
206 |     "name": "ipython",
207 |     "version": 2
208 |    },
209 |    "file_extension": ".py",
210 |    "mimetype": "text/x-python",
211 |    "name": "python",
212 |    "nbconvert_exporter": "python",
213 |    "pygments_lexer": "ipython2",
214 |    "version": "2.7.12"
215 |   }
216 |  },
217 |  "nbformat": 4,
218 |  "nbformat_minor": 1
219 | }
220 | 


--------------------------------------------------------------------------------