├── .github
    ├── FUNDING.yml
    └── desktop.ini
├── LICENSE
├── README.md
├── bernoulli_distribution_tutorial
    ├── bernoulli_distribution.py
    └── desktop.ini
├── convolutional-neural-networks-python
    ├── cnns.py
    ├── desktop.ini
    └── masks.py
├── decision_tree_learning
    ├── Iris.csv
    ├── basic_decision_tree.py
    ├── decision_tree_classificaiton.py
    └── desktop.ini
├── deep-learning
    ├── deep_learning_basics.ipynb
    ├── desktop.ini
    └── neuron.py
├── descriptive-statistics
    ├── descriptive-statistics-pdf-book-sample.pdf
    ├── descriptive_statistics.ipynb
    ├── descriptive_statistics.py
    └── desktop.ini
├── desktop.ini
├── genetic-algorithm-tutorial
    ├── desktop.ini
    ├── genetic_algorithm_python_tutorial.ipynb
    └── implementation.py
├── google_colab_tutorial
    ├── check_gpu.py
    ├── collab_magic.py
    ├── desktop.ini
    └── kaggle_data_download.py
├── gradient_descent_tutorial
    ├── data.txt
    ├── desktop.ini
    └── gradient_descent_tutorial.ipynb
├── k-nearest-neighbors
    ├── desktop.ini
    └── k_nearest_neighbor_knn_tutorial.py
├── linear-algebra-for-ml-and-deep-learning
    ├── desktop.ini
    ├── house_price.csv
    ├── linear_regression.py
    └── pca_with_python.py
├── logic
    ├── desktop.ini
    └── seven_planets_riddle.py
├── machine_learning_algorithms_for_beginners
    ├── desktop.ini
    ├── exponential_regression.py
    ├── linear_regression_example.py
    ├── logarithmic_regression.py
    ├── machine_learning_algorithms_for_beginners.ipynb
    ├── ml_algorithms_1.py
    ├── multivariable_linear_regression.py
    ├── polynomial_regression.py
    └── sinusoidal_regression.py
├── moment_generating_function
    ├── desktop.ini
    └── moment_generating_function.py
├── monte_carlo_simulation
    ├── desktop.ini
    ├── monte_carlo_buffon's_needle_problem.py
    ├── monte_carlo_casino_example.py
    ├── monte_carlo_coin_flip.py
    ├── monte_carlo_estimating_pi_using_circle_and_square.py
    └── monte_carlo_monty_hall_problem.py
├── natural_language_processing
    ├── Natural_Language_Processing_Text.txt
    ├── circle.png
    ├── desktop.ini
    ├── natural_language_processing_code.py
    ├── natural_language_processing_tutorial.ipynb
    └── semantic-analysis.py
├── neural_networks_tutorial_part_1
    ├── desktop.ini
    ├── neural_network_part1_1.py
    ├── neural_network_part1_2.py
    ├── neural_network_part1_3.py
    └── neural_networks_tutorial.ipynb
├── neural_networks_tutorial_part_2
    ├── desktop.ini
    ├── neural_networks_part2_1.py
    ├── neural_networks_part2_2.py
    ├── neural_networks_part2_3.py
    └── neural_networks_tutorial_2.ipynb
├── pandas
    ├── desktop.ini
    ├── pd-melt.py
    ├── pd_dropna().py
    ├── pd_fillna().py
    ├── pd_isna().py
    ├── pd_isnull().py
    ├── pd_join().py
    ├── pd_notna().py
    └── pd_notnull().py
├── poisson-distribution-process
    ├── desktop.ini
    ├── poisson.py
    └── poisson_distribution_and_poisson_process_tutorial.ipynb
├── principal_component_analysis
    ├── correlation_matrix_covariance_matrix.py
    ├── desktop.ini
    └── pca_with_python.py
├── programming
    ├── desktop.ini
    └── variable_swap_data_science.py
├── random-number-generator
    ├── desktop.ini
    ├── random_number_generator_tutorial.ipynb
    └── random_number_generator_tutorial_with_python.py
├── recommendation_system_tutorial
    ├── desktop.ini
    ├── movie_titles.csv
    ├── new_features.csv
    └── recommendation_system_tutorial_netflix.py
├── sentiment_analysis_tutorial
    ├── desktop.ini
    ├── sentiment_analysis_tutorial.ipynb
    └── women_clothing_review.csv
├── simple_linear_regression_tutorial
    ├── Fuel_Consumption.csv
    ├── desktop.ini
    └── simple_linear_regression_from_scratch.py
├── support-vector-machine-svm
    ├── desktop.ini
    └── svm_machine_learning.py
├── survival_analysis_in_python
    ├── desktop.ini
    ├── lung.csv
    ├── survival_analysis_1.py
    ├── survival_analysis_2.py
    └── survival_analysis_3.py
└── what-is-a-gpu
    ├── desktop.ini
    └── script.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms 
 2 | 
 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: ['https://paypal.me/towardsai']
13 | 


--------------------------------------------------------------------------------
/.github/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Towards AI Co.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Tutorials
 2 | 
 3 | **Please know that only the code contained in this repository is under the MIT license found at "[LICENSE](https://github.com/towardsai/tutorials/blob/master/LICENSE)." All tutorials, articles, and books listed in this repository are property of Towards AI, Inc.**
 4 | 
 5 | Please feel free to contribute. We'll review your pull requests ad-hoc. 
 6 | If you'd like to work with one of our data scientists in an editorial tutorial, please reach out via [editor@towardsai.net](mailto:editor@towardsai.net) and cc: [pub@towardsai.net](mailto:pub@towardsai.net).
 7 | 
 8 | To **contribute** directly to Towards AI, check out our [guidelines to get published](https://contribute.towardsai.net).
 9 | 
10 | Join our [AI community](https://community.towardsai.net).
11 | 
12 | If you'd like to support Towards AI, please support us by [buying one of our books](https://gumroad.com/towardsai) (listed below), [sponsoring this open-source work](https://paypal.me/towardsai).
13 | 
14 | Thank you for reading and for being a supporter of Towards AI!
15 | 
16 | Access any [tutorial for free](https://towardsai.net/p/category/editorial).
17 | 
18 | **[Terms](https://towardsai.net/terms) | [Privacy Policy](https://towardsai.net/privacy)**
19 | 
20 | ________________________________________________________________________________
21 | 
22 | [Machine Learning Algorithms For Beginners with Code Examples in Python](https://towardsai.net/p/machine-learning/machine-learning-algorithms-for-beginners-with-python-code-examples-ml-19c6afd60daa)
23 | 
24 | [Neural Networks from Scratch with Python Code and Math in Detail— I](https://towardsai.net/p/machine-learning/building-neural-networks-from-scratch-with-python-code-and-math-in-detail-i-536fae5d7bbf)
25 | 
26 | [Building Neural Networks with Python Code and Math in Detail — II](https://towardsai.net/p/machine-learning/building-neural-networks-with-python-code-and-math-in-detail-ii-bbe8accbf3d1)
27 | 
28 | [Natural Language Processing (NLP) with Python — Tutorial](https://towardsai.net/p/nlp/natural-language-processing-nlp-with-python-tutorial-for-beginners-1f54e610a1a0)
29 | 
30 | [Monte Carlo Simulation An In-depth Tutorial with Python](https://towardsai.net/p/machine-learning/monte-carlo-simulation-an-in-depth-tutorial-with-python-bcf6eb7856c8)
31 | 
32 | [Survival Analysis with Python Tutorial — How, What, When, and Why](https://towardsai.net/p/machine-learning/survival-analysis-with-python-tutorial-how-what-when-and-why-19a5cfb3c312)
33 | 
34 | [Moment Generating Function for Probability Distribution with Python](https://towardsai.net/p/data-science/moment-generating-function-for-probability-distribution-with-python-tutorial-34857e93d8f6)
35 | 
36 | [Bernoulli Distribution — Probability Tutorial with Python](https://towardsai.net/p/statistics/bernoulli-distribution-probability-tutorial-with-python-90061ee078a)
37 | 
38 | [Recommendation System Tutorial with Python using Collaborative Filtering](https://towardsai.net/p/machine-learning/recommendation-system-in-depth-tutorial-with-python-for-netflix-using-collaborative-filtering-533ff8a0e444)
39 | 
40 | [Linear Algebra for Deep Learning and Machine Learning (ML) Python Tutorial](https://towardsai.net/p/machine-learning/basic-linear-algebra-for-deep-learning-and-machine-learning-ml-python-tutorial-444e23db3e9e)
41 | 
42 | [Principal Component Analysis (PCA) with Python Examples — Tutorial](https://towardsai.net/p/data-science/principal-component-analysis-pca-with-python-examples-tutorial-67a917bae9aa)
43 | 
44 | [Decision Trees in Machine Learning (ML) with Python Tutorial](https://towardsai.net/p/machine-learning/decision-trees-in-machine-learning-ml-with-python-tutorial-3bfb457bce67)
45 | 
46 | [Convolutional Neural Networks (CNNs) Tutorial with Python](https://towardsai.net/p/deeplearning/convolutional-neural-networks-cnns-tutorial-with-python-417c29f0403f)
47 | 
48 | [Sentiment Analysis (Opinion Mining) with Python - NLP Tutorial](https://towardsai.net/p/nlp/sentiment-analysis-opinion-mining-with-python-nlp-tutorial-d1f173ca4e3c)
49 | 
50 | [Gradient Descent for Machine Learning (ML) 101 with Python Tutorial](https://towardsai.net/p/data-science/gradient-descent-algorithm-for-machine-learning-python-tutorial-ml-9ded189ec556)
51 | 
52 | [Random Number Generator Tutorial with Python](https://towardsai.net/p/data-science/random-number-generator-tutorial-with-python-3b35986132c7)
53 | 
54 | [What is Deep Learning?](https://towardsai.net/p/deep-learning/what-is-deep-learning-34767bb10366)
55 | 
56 | [Genetic Algorithm (GA) Introduction with Example Code](https://towardsai.net/p/programming/genetic-algorithm-ga-introduction-with-example-code-e59f9bc58eaf)
57 | 
58 | [K-Nearest Neighbors (KNN) Algorithm Tutorial — Machine Learning Basics](https://news.towardsai.net/knn)
59 | 
60 | [What is a GPU? Are GPUs Needed for Deep Learning?](https://news.towardsai.net/gpu)
61 | 
62 | # Books
63 | 
64 | [Descriptive Statistics for Data-driven Decision Making with Python](https://gumroad.com/l/descriptive-statistics)
65 | 
66 | # Sponsors
67 | 
68 | Big thank you to C4H3I LLC for sponsoring us in June, 2022!
69 | 


--------------------------------------------------------------------------------
/bernoulli_distribution_tutorial/bernoulli_distribution.py:
--------------------------------------------------------------------------------
 1 | #Import required libraries:
 2 | from scipy.stats import bernoulli
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | #Define probability of success:
 6 | p = 0.7
 7 | 
 8 | #Find the statisticsal values:
 9 | mean, var, skew, kurt = bernoulli.stats(p, moments='mvsk')
10 | 
11 | #Print mean:
12 | print("Mean = ",mean)
13 | 
14 | #Print variance:
15 | print("Variance =  ",var)
16 | 
17 | #Print skewness:
18 | print("Skewness = ",skew)
19 | 
20 | #Print kurtosis:
21 | print("Kurtosis = ",kurt)
22 | 
23 | #Get only mean value:
24 | mean = bernoulli.mean(p)
25 | print("Mean = ",mean)
26 | 
27 | #Get only median value:
28 | median = bernoulli.median(p)
29 | print("Median = ",median)
30 | 
31 | #Get only variance value:
32 | var = bernoulli.var(p)
33 | print("Variance = ",var)
34 | 
35 | #Get only standard deviation value:
36 | std = bernoulli.std(p)
37 | print("Standard Deviation = ",std)
38 | 
39 | #Get Probability Mass Function(PMF):
40 | x = [0,1]
41 | p=0.7
42 | print("Probability Mass Function = ",bernoulli.pmf(x,p))
43 | 
44 | #Plot the graph for Probability Mass Function(PMF):
45 | x = [0,1]
46 | p=0.7
47 | plt.scatter(x,bernoulli.pmf(x,p),label="PMF")
48 | plt.title("Probability Mass Function")
49 | plt.xlabel("Data Points")
50 | plt.ylabel("Probability")
51 | plt.legend()
52 | 
53 | #Get Cumulative Density Function(CDF):
54 | x = [0,1]
55 | p = 0.7
56 | print("Cumulative Density Function = ",bernoulli.cdf(x,p))
57 | 
58 | #Plot the Cumulative Density Function(CDF):
59 | x = [0,1]
60 | p = 0.7
61 | plt.scatter(x,bernoulli.cdf(x,p),label="CDF")
62 | plt.title("Cumulative Density Function")
63 | plt.xlabel("Data Points")
64 | plt.ylabel("Probability")
65 | plt.legend()
66 | 
67 | #Plot the bar graph for PMF:
68 | x = [0,1]
69 | p = 0.7
70 | plt.bar(x,bernoulli.pmf(x,p),width=0.1,color=["r","b"])
71 | plt.title("Probability Mass Function")
72 | plt.xlabel("Data Points")
73 | plt.ylabel("Probability")
74 | 
75 | 
76 | #Plot the bar graph for CDF:
77 | x = [0,1]
78 | p = 0.7
79 | plt.bar(x,bernoulli.cdf(x,p),width=0.1,color=["r","b"])
80 | plt.title("Cumulative Density Function")
81 | plt.xlabel("Data Points")
82 | plt.ylabel("Probability")
83 | 
84 | #Generate Output for Random Bernoulli Events:
85 | p = 0.7
86 | r = bernoulli.rvs(p, size=100)
87 | print(r)
88 | 


--------------------------------------------------------------------------------
/bernoulli_distribution_tutorial/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/convolutional-neural-networks-python/cnns.py:
--------------------------------------------------------------------------------
 1 | #Import required libraries
 2 | import numpy as np
 3 | import pandas as pd
 4 | from keras.optimizers import SGD
 5 | from keras.datasets import cifar10
 6 | from keras.models import Sequential
 7 | from keras.utils import np_utils as utils
 8 | from keras.layers import Dropout, Dense, Flatten
 9 | from keras.layers.convolutional import Conv2D, MaxPooling2D
10 | 
11 | #Load the Cifar01 dataset
12 | (X, y), (X_test, y_test) = cifar10.load_data()
13 | 
14 | #Display the test dataset
15 | X_test
16 | 
17 | #Normalize the data
18 | X, X_test = X.astype("float32") / 255.0, X_test.astype("float32") / 255.0
19 | 
20 | #Convert to categorical
21 | y, y_test = utils.to_categorical(y, 10), u.to_categorical(y_test, 10)
22 | 
23 | #Initialize the model
24 | model = Sequential()
25 | 
26 | #Add a convolutional layer with test parameters
27 | model.add(
28 |     Conv2D(32, (3, 3), input_shape=(32, 32, 3), padding="same", activation="relu")
29 | )
30 | 
31 | #Add the dropout rate
32 | model.add(Dropout(0.2))
33 | 
34 | #Add another CNN layer with a valid padding value
35 | model.add(Conv2D(32, (3, 3), activation="relu", padding="valid"))
36 | 
37 | #Add a max pooling lkayer
38 | model.add(MaxPooling2D(pool_size=(2, 2)))
39 | 
40 | #Flatten the data
41 | model.add(Flatten())
42 | 
43 | #Add a dense layer
44 | model.add(Dense(512, activation="relu"))
45 | 
46 | #Add dropout
47 | model.add(Dropout(0.3))
48 | 
49 | #Add the output dense layer
50 | model.add(Dense(10, activation="softmax"))
51 | 
52 | #Compile the model
53 | model.compile(
54 |     loss="categorical_crossentropy",
55 |     optimizer=SGD(momentum=0.5, decay=0.0004),
56 |     metrics=["accuracy"],
57 | )
58 | 
59 | #Fit the algorithm with a number of epochs, 25 in this case
60 | model.fit(X, y, validation_data=(X_test, y_test), epochs=25, batch_size=512)
61 | 
62 | #Check the accuracy of the model
63 | print("Accuracy: &2.f%%" %(model.evaluate(X_test, y_test)[1]*100))
64 | 
65 | #Max pooling shape
66 | model.add(MaxPooling1D(pool_size=2))
67 | 
68 | #Filter shape
69 | model.add(Conv1D(filters=32, kernel_size=3, padding="same", activation="relu"))
70 | 
71 | #Number of filters
72 | model.add(Conv1D(filters=32, kernel_size=3, padding="same", activation="relu"))
73 | 
74 | #Add dropout
75 | model.add(Dropout(0.2))
76 | 
77 | # Early stopping for overfitting
78 | from keras.callbacks import EarlyStopping
79 | 
80 | earlystop = EarlyStopping(
81 |     monitor="val_loss", min_delta=0, patience=3, verbose=1, restore_best_weights=True
82 | )
83 | 


--------------------------------------------------------------------------------
/convolutional-neural-networks-python/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/convolutional-neural-networks-python/masks.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import numpy.ma as ma
3 | 
4 | original_array = np.array([1, 2, 3, -1, 5])
5 | original_array
6 | 
7 | masked = ma.masked_array(original_array, mask=[0, 0, 0, 1, 0])
8 | masked
9 | 


--------------------------------------------------------------------------------
/decision_tree_learning/Iris.csv:
--------------------------------------------------------------------------------
  1 | Id,sepal_length,sepal_width,petal_length,petal_width,species
  2 | 1,5.1,3.5,1.4,0.2,Iris-setosa
  3 | 2,4.9,3.0,1.4,0.2,Iris-setosa
  4 | 3,4.7,3.2,1.3,0.2,Iris-setosa
  5 | 4,4.6,3.1,1.5,0.2,Iris-setosa
  6 | 5,5.0,3.6,1.4,0.2,Iris-setosa
  7 | 6,5.4,3.9,1.7,0.4,Iris-setosa
  8 | 7,4.6,3.4,1.4,0.3,Iris-setosa
  9 | 8,5.0,3.4,1.5,0.2,Iris-setosa
 10 | 9,4.4,2.9,1.4,0.2,Iris-setosa
 11 | 10,4.9,3.1,1.5,0.1,Iris-setosa
 12 | 11,5.4,3.7,1.5,0.2,Iris-setosa
 13 | 12,4.8,3.4,1.6,0.2,Iris-setosa
 14 | 13,4.8,3.0,1.4,0.1,Iris-setosa
 15 | 14,4.3,3.0,1.1,0.1,Iris-setosa
 16 | 15,5.8,4.0,1.2,0.2,Iris-setosa
 17 | 16,5.7,4.4,1.5,0.4,Iris-setosa
 18 | 17,5.4,3.9,1.3,0.4,Iris-setosa
 19 | 18,5.1,3.5,1.4,0.3,Iris-setosa
 20 | 19,5.7,3.8,1.7,0.3,Iris-setosa
 21 | 20,5.1,3.8,1.5,0.3,Iris-setosa
 22 | 21,5.4,3.4,1.7,0.2,Iris-setosa
 23 | 22,5.1,3.7,1.5,0.4,Iris-setosa
 24 | 23,4.6,3.6,1.0,0.2,Iris-setosa
 25 | 24,5.1,3.3,1.7,0.5,Iris-setosa
 26 | 25,4.8,3.4,1.9,0.2,Iris-setosa
 27 | 26,5.0,3.0,1.6,0.2,Iris-setosa
 28 | 27,5.0,3.4,1.6,0.4,Iris-setosa
 29 | 28,5.2,3.5,1.5,0.2,Iris-setosa
 30 | 29,5.2,3.4,1.4,0.2,Iris-setosa
 31 | 30,4.7,3.2,1.6,0.2,Iris-setosa
 32 | 31,4.8,3.1,1.6,0.2,Iris-setosa
 33 | 32,5.4,3.4,1.5,0.4,Iris-setosa
 34 | 33,5.2,4.1,1.5,0.1,Iris-setosa
 35 | 34,5.5,4.2,1.4,0.2,Iris-setosa
 36 | 35,4.9,3.1,1.5,0.1,Iris-setosa
 37 | 36,5.0,3.2,1.2,0.2,Iris-setosa
 38 | 37,5.5,3.5,1.3,0.2,Iris-setosa
 39 | 38,4.9,3.1,1.5,0.1,Iris-setosa
 40 | 39,4.4,3.0,1.3,0.2,Iris-setosa
 41 | 40,5.1,3.4,1.5,0.2,Iris-setosa
 42 | 41,5.0,3.5,1.3,0.3,Iris-setosa
 43 | 42,4.5,2.3,1.3,0.3,Iris-setosa
 44 | 43,4.4,3.2,1.3,0.2,Iris-setosa
 45 | 44,5.0,3.5,1.6,0.6,Iris-setosa
 46 | 45,5.1,3.8,1.9,0.4,Iris-setosa
 47 | 46,4.8,3.0,1.4,0.3,Iris-setosa
 48 | 47,5.1,3.8,1.6,0.2,Iris-setosa
 49 | 48,4.6,3.2,1.4,0.2,Iris-setosa
 50 | 49,5.3,3.7,1.5,0.2,Iris-setosa
 51 | 50,5.0,3.3,1.4,0.2,Iris-setosa
 52 | 51,7.0,3.2,4.7,1.4,Iris-versicolor
 53 | 52,6.4,3.2,4.5,1.5,Iris-versicolor
 54 | 53,6.9,3.1,4.9,1.5,Iris-versicolor
 55 | 54,5.5,2.3,4.0,1.3,Iris-versicolor
 56 | 55,6.5,2.8,4.6,1.5,Iris-versicolor
 57 | 56,5.7,2.8,4.5,1.3,Iris-versicolor
 58 | 57,6.3,3.3,4.7,1.6,Iris-versicolor
 59 | 58,4.9,2.4,3.3,1.0,Iris-versicolor
 60 | 59,6.6,2.9,4.6,1.3,Iris-versicolor
 61 | 60,5.2,2.7,3.9,1.4,Iris-versicolor
 62 | 61,5.0,2.0,3.5,1.0,Iris-versicolor
 63 | 62,5.9,3.0,4.2,1.5,Iris-versicolor
 64 | 63,6.0,2.2,4.0,1.0,Iris-versicolor
 65 | 64,6.1,2.9,4.7,1.4,Iris-versicolor
 66 | 65,5.6,2.9,3.6,1.3,Iris-versicolor
 67 | 66,6.7,3.1,4.4,1.4,Iris-versicolor
 68 | 67,5.6,3.0,4.5,1.5,Iris-versicolor
 69 | 68,5.8,2.7,4.1,1.0,Iris-versicolor
 70 | 69,6.2,2.2,4.5,1.5,Iris-versicolor
 71 | 70,5.6,2.5,3.9,1.1,Iris-versicolor
 72 | 71,5.9,3.2,4.8,1.8,Iris-versicolor
 73 | 72,6.1,2.8,4.0,1.3,Iris-versicolor
 74 | 73,6.3,2.5,4.9,1.5,Iris-versicolor
 75 | 74,6.1,2.8,4.7,1.2,Iris-versicolor
 76 | 75,6.4,2.9,4.3,1.3,Iris-versicolor
 77 | 76,6.6,3.0,4.4,1.4,Iris-versicolor
 78 | 77,6.8,2.8,4.8,1.4,Iris-versicolor
 79 | 78,6.7,3.0,5.0,1.7,Iris-versicolor
 80 | 79,6.0,2.9,4.5,1.5,Iris-versicolor
 81 | 80,5.7,2.6,3.5,1.0,Iris-versicolor
 82 | 81,5.5,2.4,3.8,1.1,Iris-versicolor
 83 | 82,5.5,2.4,3.7,1.0,Iris-versicolor
 84 | 83,5.8,2.7,3.9,1.2,Iris-versicolor
 85 | 84,6.0,2.7,5.1,1.6,Iris-versicolor
 86 | 85,5.4,3.0,4.5,1.5,Iris-versicolor
 87 | 86,6.0,3.4,4.5,1.6,Iris-versicolor
 88 | 87,6.7,3.1,4.7,1.5,Iris-versicolor
 89 | 88,6.3,2.3,4.4,1.3,Iris-versicolor
 90 | 89,5.6,3.0,4.1,1.3,Iris-versicolor
 91 | 90,5.5,2.5,4.0,1.3,Iris-versicolor
 92 | 91,5.5,2.6,4.4,1.2,Iris-versicolor
 93 | 92,6.1,3.0,4.6,1.4,Iris-versicolor
 94 | 93,5.8,2.6,4.0,1.2,Iris-versicolor
 95 | 94,5.0,2.3,3.3,1.0,Iris-versicolor
 96 | 95,5.6,2.7,4.2,1.3,Iris-versicolor
 97 | 96,5.7,3.0,4.2,1.2,Iris-versicolor
 98 | 97,5.7,2.9,4.2,1.3,Iris-versicolor
 99 | 98,6.2,2.9,4.3,1.3,Iris-versicolor
100 | 99,5.1,2.5,3.0,1.1,Iris-versicolor
101 | 100,5.7,2.8,4.1,1.3,Iris-versicolor
102 | 101,6.3,3.3,6.0,2.5,Iris-virginica
103 | 102,5.8,2.7,5.1,1.9,Iris-virginica
104 | 103,7.1,3.0,5.9,2.1,Iris-virginica
105 | 104,6.3,2.9,5.6,1.8,Iris-virginica
106 | 105,6.5,3.0,5.8,2.2,Iris-virginica
107 | 106,7.6,3.0,6.6,2.1,Iris-virginica
108 | 107,4.9,2.5,4.5,1.7,Iris-virginica
109 | 108,7.3,2.9,6.3,1.8,Iris-virginica
110 | 109,6.7,2.5,5.8,1.8,Iris-virginica
111 | 110,7.2,3.6,6.1,2.5,Iris-virginica
112 | 111,6.5,3.2,5.1,2.0,Iris-virginica
113 | 112,6.4,2.7,5.3,1.9,Iris-virginica
114 | 113,6.8,3.0,5.5,2.1,Iris-virginica
115 | 114,5.7,2.5,5.0,2.0,Iris-virginica
116 | 115,5.8,2.8,5.1,2.4,Iris-virginica
117 | 116,6.4,3.2,5.3,2.3,Iris-virginica
118 | 117,6.5,3.0,5.5,1.8,Iris-virginica
119 | 118,7.7,3.8,6.7,2.2,Iris-virginica
120 | 119,7.7,2.6,6.9,2.3,Iris-virginica
121 | 120,6.0,2.2,5.0,1.5,Iris-virginica
122 | 121,6.9,3.2,5.7,2.3,Iris-virginica
123 | 122,5.6,2.8,4.9,2.0,Iris-virginica
124 | 123,7.7,2.8,6.7,2.0,Iris-virginica
125 | 124,6.3,2.7,4.9,1.8,Iris-virginica
126 | 125,6.7,3.3,5.7,2.1,Iris-virginica
127 | 126,7.2,3.2,6.0,1.8,Iris-virginica
128 | 127,6.2,2.8,4.8,1.8,Iris-virginica
129 | 128,6.1,3.0,4.9,1.8,Iris-virginica
130 | 129,6.4,2.8,5.6,2.1,Iris-virginica
131 | 130,7.2,3.0,5.8,1.6,Iris-virginica
132 | 131,7.4,2.8,6.1,1.9,Iris-virginica
133 | 132,7.9,3.8,6.4,2.0,Iris-virginica
134 | 133,6.4,2.8,5.6,2.2,Iris-virginica
135 | 134,6.3,2.8,5.1,1.5,Iris-virginica
136 | 135,6.1,2.6,5.6,1.4,Iris-virginica
137 | 136,7.7,3.0,6.1,2.3,Iris-virginica
138 | 137,6.3,3.4,5.6,2.4,Iris-virginica
139 | 138,6.4,3.1,5.5,1.8,Iris-virginica
140 | 139,6.0,3.0,4.8,1.8,Iris-virginica
141 | 140,6.9,3.1,5.4,2.1,Iris-virginica
142 | 141,6.7,3.1,5.6,2.4,Iris-virginica
143 | 142,6.9,3.1,5.1,2.3,Iris-virginica
144 | 143,5.8,2.7,5.1,1.9,Iris-virginica
145 | 144,6.8,3.2,5.9,2.3,Iris-virginica
146 | 145,6.7,3.3,5.7,2.5,Iris-virginica
147 | 146,6.7,3.0,5.2,2.3,Iris-virginica
148 | 147,6.3,2.5,5.0,1.9,Iris-virginica
149 | 148,6.5,3.0,5.2,2.0,Iris-virginica
150 | 149,6.2,3.4,5.4,2.3,Iris-virginica
151 | 150,5.9,3.0,5.1,1.8,Iris-virginica
152 | 


--------------------------------------------------------------------------------
/decision_tree_learning/basic_decision_tree.py:
--------------------------------------------------------------------------------
 1 | from sklearn.tree import DecisionTreeClassifier
 2 | from sklearn.datasets import load_breast_cancer
 3 | from sklearn.model_selection import train_test_split
 4 | 
 5 | cancer = load_breast_cancer()
 6 | 
 7 | X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify=cancer.target, random_state=42)
 8 | 
 9 | tree = DecisionTreeClassifier(random_state=0)
10 | tree.fit(X_train, y_train)
11 | 
12 | print("Accuracy on training set: {:.3f}".format(tree.score(X_train, y_train)))
13 | print("Accuracy on test set: {:.3f}".format(tree.score(X_test, y_test)))


--------------------------------------------------------------------------------
/decision_tree_learning/decision_tree_classificaiton.py:
--------------------------------------------------------------------------------
 1 | # Commented out IPython magic to ensure Python compatibility.
 2 | import numpy as np 
 3 | import pandas as pd 
 4 | import matplotlib.pyplot as plt
 5 | import seaborn as sns 
 6 | 
 7 | from sklearn import tree
 8 | 
 9 | # %matplotlib inline
10 | 
11 | """**Read Iris Dataset**"""
12 | 
13 | data = pd.read_csv('Iris.csv')
14 | data
15 | 
16 | data.shape
17 | 
18 | """**Define Colunms**"""
19 | 
20 | col_names = ['id', 'sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
21 | 
22 | 
23 | data.columns = col_names
24 | 
25 | col_names
26 | 
27 | """**Drop Id Column**"""
28 | 
29 | data = data.drop(['id'], axis=1)
30 | 
31 | data.head()
32 | 
33 | data.info()
34 | 
35 | """**Checking the  target categorical counts**"""
36 | 
37 | data['species'].value_counts()
38 | 
39 | """**Check missing values in variables**"""
40 | 
41 | data.isnull().sum()
42 | 
43 | target_col = ['species']
44 | 
45 | X = data.drop(['species'], axis=1)
46 | 
47 | y = data['species']
48 | 
49 | """**Split dataset into train and test**"""
50 | 
51 | from sklearn.model_selection import train_test_split
52 | 
53 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 42)
54 | 
55 | """**Check datatypes**"""
56 | 
57 | X_train.dtypes
58 | 
59 | """**Decision Tree Classification based on Gini index criterion**"""
60 | 
61 | from sklearn.tree import DecisionTreeClassifier
62 | 
63 | clf_gini = DecisionTreeClassifier(criterion='gini', max_depth=3, random_state=0)
64 | clf_gini.fit(X_train, y_train)
65 | 
66 | y_pred_gini = clf_gini.predict(X_test)
67 | y_pred_gini
68 | 
69 | """**Check accurcy of model**"""
70 | 
71 | from sklearn.metrics import accuracy_score
72 | 
73 | print('Model accuracy score with criterion gini index: {0:0.4f}'. format(accuracy_score(y_test, y_pred_gini)))# y_pred_gini are the predicted class labels in the test-set.
74 | 
75 | #Compare the train-set and test-set accuracy
76 | y_pred_train_gini = clf_gini.predict(X_train)
77 | 
78 | y_pred_train_gini
79 | 
80 | print('Training-set accuracy score: {0:0.4f}'. format(accuracy_score(y_train, y_pred_train_gini)))
81 | 
82 | #Check for overfitting and underfitting
83 | 
84 | print('Training set score: {:.4f}'.format(clf_gini.score(X_train, y_train)))
85 | 
86 | print('Test set score: {:.4f}'.format(clf_gini.score(X_test, y_test)))
87 | 
88 | """**Pictorial representation of Decision Tree**"""
89 | 
90 | plt.figure(figsize=(12,8))
91 | tree.plot_tree(clf_gini.fit(X_train, y_train))


--------------------------------------------------------------------------------
/decision_tree_learning/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/deep-learning/deep_learning_basics.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "deep-learning-basics.ipynb",
  7 |       "provenance": [],
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     }
 14 |   },
 15 |   "cells": [
 16 |     {
 17 |       "cell_type": "markdown",
 18 |       "metadata": {
 19 |         "id": "view-in-github",
 20 |         "colab_type": "text"
 21 |       },
 22 |       "source": [
 23 |         "<a href=\"https://colab.research.google.com/github/towardsai/tutorials/blob/master/deep-learning/deep_learning_basics.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 24 |       ]
 25 |     },
 26 |     {
 27 |       "cell_type": "markdown",
 28 |       "metadata": {
 29 |         "id": "DrdHQbwAvYcO"
 30 |       },
 31 |       "source": [
 32 |         "# Neuron Implementation\r\n",
 33 |         "\r\n",
 34 |         "* Tutorial: https://towardsai.net/deep-learning\r\n",
 35 |         "* Github: https://github.com/towardsai/tutorials/tree/master/deep-learning"
 36 |       ]
 37 |     },
 38 |     {
 39 |       "cell_type": "code",
 40 |       "metadata": {
 41 |         "id": "29JF978gvSpE"
 42 |       },
 43 |       "source": [
 44 |         "import numpy as np"
 45 |       ],
 46 |       "execution_count": 1,
 47 |       "outputs": []
 48 |     },
 49 |     {
 50 |       "cell_type": "markdown",
 51 |       "metadata": {
 52 |         "id": "c814KzCsvk8x"
 53 |       },
 54 |       "source": [
 55 |         "**Create Sigmoid function**"
 56 |       ]
 57 |     },
 58 |     {
 59 |       "cell_type": "code",
 60 |       "metadata": {
 61 |         "id": "D_VDpun0vdJ1"
 62 |       },
 63 |       "source": [
 64 |         "def sigmoid(x):\r\n",
 65 |         "  return 1/ (1 + np.exp(-x))"
 66 |       ],
 67 |       "execution_count": 2,
 68 |       "outputs": []
 69 |     },
 70 |     {
 71 |       "cell_type": "markdown",
 72 |       "metadata": {
 73 |         "id": "oDvAJGrlv-6_"
 74 |       },
 75 |       "source": [
 76 |         "# Creating an Artificial Neuron (AN)"
 77 |       ]
 78 |     },
 79 |     {
 80 |       "cell_type": "code",
 81 |       "metadata": {
 82 |         "id": "EzX9ccMXvnjM"
 83 |       },
 84 |       "source": [
 85 |         "class Neuron:\r\n",
 86 |         "  def __init__(self, weights, bias):\r\n",
 87 |         "    self.weights = weights\r\n",
 88 |         "    self.bias = bias\r\n",
 89 |         "  \r\n",
 90 |         "  def feedforwards(self, inputs):\r\n",
 91 |         "    total = np.dot(self.weights, inputs) + self.bias\r\n",
 92 |         "    return sigmoid(total)\r\n"
 93 |       ],
 94 |       "execution_count": 3,
 95 |       "outputs": []
 96 |     },
 97 |     {
 98 |       "cell_type": "code",
 99 |       "metadata": {
100 |         "colab": {
101 |           "base_uri": "https://localhost:8080/"
102 |         },
103 |         "id": "DdAkVREWwBmT",
104 |         "outputId": "1711b30b-c520-4965-9636-162ffd1814a8"
105 |       },
106 |       "source": [
107 |         "weights = np.array([0, 1])\r\n",
108 |         "bias = 4\r\n",
109 |         "\r\n",
110 |         "neuron = Neuron(weights, bias)\r\n",
111 |         "\r\n",
112 |         "x = np.array([2, 3])\r\n",
113 |         "\r\n",
114 |         "forward = neuron.feedforwards(x)\r\n",
115 |         "\r\n",
116 |         "print(forward)\r\n"
117 |       ],
118 |       "execution_count": 4,
119 |       "outputs": [
120 |         {
121 |           "output_type": "stream",
122 |           "text": [
123 |             "0.9990889488055994\n"
124 |           ],
125 |           "name": "stdout"
126 |         }
127 |       ]
128 |     }
129 |   ]
130 | }


--------------------------------------------------------------------------------
/deep-learning/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/deep-learning/neuron.py:
--------------------------------------------------------------------------------
 1 | # import numpy
 2 | import numpy as np 
 3 | 
 4 | # define our sigmoid function
 5 | def sigmoid(x):
 6 |   return 1/ (1 + np.exp(-x))
 7 | 
 8 | # craete the AN
 9 | class Neuron:
10 |   def __init__(self, weights, bias):
11 |     self.weights = weights
12 |     self.bias = bias
13 | 
14 |   def feedforwards(self, inputs):
15 |     total = np.dot(self.weights, inputs) + self.bias
16 |     return sigmoid(total)
17 | 
18 | weights = np.array([0, 1])
19 | bias = 4
20 | 
21 | neuron = Neuron(weights, bias)
22 | 
23 | x = np.array([2, 3])
24 | 
25 | forward = neuron.feedforwards(x)
26 | 
27 | print(forward)
28 | 


--------------------------------------------------------------------------------
/descriptive-statistics/descriptive-statistics-pdf-book-sample.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/towardsai/tutorials/cc12fe183d50ce6095f044d7346f30d5d0522584/descriptive-statistics/descriptive-statistics-pdf-book-sample.pdf


--------------------------------------------------------------------------------
/descriptive-statistics/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/genetic-algorithm-tutorial/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     
6 | 


--------------------------------------------------------------------------------
/genetic-algorithm-tutorial/implementation.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """genetic-algorithm-python-tutorial.ipynb
  3 | 
  4 | Automatically generated by Colaboratory.
  5 | 
  6 | Original file is located at
  7 |     https://colab.research.google.com/drive/161ijkvn8wG_seVtQexm-p3fW3r5p8s_x
  8 | 
  9 | # Genetic Algorithm Implementation with Python
 10 | 
 11 | * Tutorial: https://towardsai.net/p/computer-science/genetic-algorithm-ga-introduction-with-example-code-e59f9bc58eaf
 12 | 
 13 | * Github: https://github.com/towardsai/tutorials/tree/master/genetic-algorithm-tutorial
 14 | 
 15 | The Genetic Algorithm is a class of evolutionary algorithm that is broadly inspired by biological evolution. We all know evolution, it is a selection of parents, reproduction, and mutation of offsprings. The main aim of evolution is to reproduce offsprings that are biologically better than their parents. Genetic algorithm is mainly based on natural selection and it tries to simulate the theory of evolution.
 16 | """
 17 | 
 18 | import numpy as np
 19 | import matplotlib.pyplot as plt
 20 | import copy
 21 | 
 22 | # cost function
 23 | def sphere(x):
 24 |   ''' This is the problem we will be
 25 |   optimizing, each chromosome of parent has a cost
 26 |   which is calculated from this cost function'''
 27 | 
 28 |   return sum(x**2)
 29 | 
 30 | def roulette_wheel_selection(p):
 31 |   ''' Roulette Wheel Selection is a method of parent
 32 |   selection for breeding. We take the cummulative sum of probabilities
 33 |   and select the first parent whose cummulative sum is greater than
 34 |   random number'''
 35 | 
 36 |   c = np.cumsum(p)
 37 |   r = sum(p) * np.random.rand()
 38 |   ind = np.argwhere(r <= c)
 39 | 
 40 |   return ind[0][0]
 41 | 
 42 | def crossover(p1, p2):
 43 |   ''' Performing uniform crossover. Alpha is the flag
 44 |   that determines which gene of each chromosome is choosen
 45 |   to be inherited by the offspring. Maultiply the alpha value
 46 |   with each gene of every chromosome of both the parents and
 47 |   then add the resultant value to get child chromosome'''
 48 | 
 49 |   c1 = copy.deepcopy(p1)
 50 |   c2 = copy.deepcopy(p2)
 51 | 
 52 |   # Uniform crossover
 53 |   alpha = np.random.uniform(0, 1, *(c1['position'].shape))
 54 |   c1['position'] = alpha*p1['position'] + (1-alpha)*p2['position']
 55 |   c2['position'] = alpha*p2['position'] + (1-alpha)*p1['position']
 56 | 
 57 |   return c1, c2
 58 | 
 59 | def mutate(c, mu, sigma):
 60 |   '''
 61 |   c: child chromosome
 62 |   mu: mutation rate. % of gene to be modified
 63 |   sigma: step size of mutation'''
 64 | 
 65 |   y = copy.deepcopy(c)
 66 |   flag = np.random.rand(*(c['position'].shape)) <= mu  # array of True and Flase, indicating at which position to perform mutation
 67 |   ind = np.argwhere(flag)
 68 |   y['position'][ind] += sigma * np.random.randn(*ind.shape)
 69 | 
 70 |   return y
 71 | 
 72 | def bounds(c, varmin, varmax):
 73 |   ''' Defines the upper and lower bound of gene value'''
 74 | 
 75 |   c['position'] = np.maximum(c['position'], varmin)
 76 |   c['position'] = np.minimum(c['position'], varmax)
 77 | 
 78 | def sort(arr):
 79 |   ''' Bubble sorting the population + offsoring
 80 |   in every iteration to get best fit individuals at top'''
 81 | 
 82 |   n = len(arr)
 83 | 
 84 |   for i in range(n-1):
 85 | 
 86 |     for j in range(0, n-i-1):
 87 |             if arr[j]['cost'] > arr[j+1]['cost'] :
 88 |                 arr[j], arr[j+1] = arr[j+1], arr[j]
 89 |     return arr
 90 | 
 91 | def ga(costfunc, num_var, varmin, varmax, maxit, npop, num_children, mu, sigma, beta):
 92 | 
 93 |   # Placeholder for each individual
 94 |   population = {}
 95 |   for i in range(npop):                                                         # each inidivdual has position(chromosomes) and cost,
 96 |     population[i] = {'position': None, 'cost': None}                            # create individual as many as population size(npop)
 97 | 
 98 |   # Best solution found
 99 |   bestsol = copy.deepcopy(population)
100 |   bestsol_cost = np.inf                                                         # initial best cost is infinity
101 | 
102 |   # Initialize population - 1st Gen
103 |   for i in range(npop):
104 |       population[i]['position'] = np.random.uniform(varmin, varmax, num_var)    # randomly initialize the chromosomes and cost
105 |       population[i]['cost'] = costfunc(population[i]['position'])
106 | 
107 |       if population[i]['cost'] < bestsol_cost:                                  # if cost of an individual is less(best) than best cost,
108 |         bestsol = copy.deepcopy(population[i])                                  # replace the best solution with that individual
109 | 
110 |   # Best cost of each generation/iteration
111 |   bestcost = np.empty(maxit)
112 | 
113 |   # Main loop
114 |   for it in range(maxit):
115 | 
116 |     # Calculating probability for roulette wheel selection
117 |     costs = []
118 |     for i in range(len(population)):
119 |       costs.append(population[i]['cost'])                                       # list of all the population cost
120 |     costs = np.array(costs)
121 |     avg_cost = np.mean(costs)                                                   # taking average of the costs
122 |     if avg_cost != 0:
123 |       costs = costs/avg_cost
124 |     probs = np.exp(-beta*costs)                                                 # probability is exponensial of -ve beta times costs
125 | 
126 |     for _ in range(num_children//2):                                            # we will be having two off springs for each crossover
127 |                                                                                 # hence divide number of children by 2
128 |       '''
129 |       -> choosing two parents randomly for mating
130 |       -> we are shuffling all the 20 parent individuals and
131 |       -> choosing first two of the shuffled array as our parents for mating
132 | 
133 |       Randomly selecting parents by shiffling them.
134 |       But we will be using roulette wheel slection
135 |       for our algorithm
136 | 
137 |       q = np.random.permutation(npop)
138 |       p1 = population[q[0]]
139 |       p2 = population[q[1]]
140 |       '''
141 | 
142 |       # Roulette wheel selection
143 |       p1 = population[roulette_wheel_selection(probs)]
144 |       p2 = population[roulette_wheel_selection(probs)]
145 | 
146 |       # crossover two parents
147 |       c1, c2 = crossover(p1, p2)
148 | 
149 |       # Perform mutation
150 |       c1 = mutate(c1, mu, sigma)
151 |       c2 = mutate(c2, mu, sigma)
152 | 
153 |       # Apply bounds
154 |       bounds(c1, varmin, varmax)
155 |       bounds(c2, varmin, varmax)
156 | 
157 |       # Evaluate first off spring
158 |       c1['cost'] = costfunc(c1['position'])                                     # calculate cost function of child 1
159 | 
160 |       if type(bestsol_cost) == float:
161 |         if c1['cost'] < bestsol_cost:                                           # replacing best solution in every generation/iteration
162 |           bestsol_cost = copy.deepcopy(c1)
163 |       else:
164 |         if c1['cost'] < bestsol_cost['cost']:                                   # replacing best solution in every generation/iteration
165 |           bestsol_cost = copy.deepcopy(c1)
166 | 
167 | 
168 |       # Evaluate second off spring
169 |       if c2['cost'] < bestsol_cost['cost']:                                     # replacing best solution in every generation/iteration
170 |         bestsol_cost = copy.deepcopy(c2)
171 | 
172 |     # Merge, Sort and Select
173 |     population[len(population)] = c1
174 |     population[len(population)] = c2
175 | 
176 |     population = sort(population)
177 | 
178 |     # Store best cost
179 |     bestcost[it] = bestsol_cost['cost']
180 | 
181 |     # Show generation information
182 |     print('Iteration {}: Best Cost = {}'. format(it, bestcost[it]))
183 | 
184 | 
185 |   out = population
186 |   Bestsol = bestsol
187 |   bestcost = bestcost
188 |   return (out, Bestsol, bestcost)
189 | 
190 | # Problem definition
191 | costfunc = sphere
192 | num_var = 5       # number of decicion variables
193 | varmin = -10      # lower bound
194 | varmax = 10       # upper bound
195 | 
196 | # GA Parameters
197 | maxit = 501                                              # number of iterations
198 | npop = 20                                                # initial population size
199 | beta = 1
200 | prop_children = 1                                        # proportion of children to population
201 | num_children = int(np.round(prop_children * npop/2)*2)   # making sure it always an even number
202 | mu = 0.2                                                 # mutation rate 20%, 205 of 5 is 1, mutating 1 gene
203 | sigma = 0.1                                              # step size of mutation
204 | 
205 | 
206 | # Run GA
207 | out = ga(costfunc, num_var, varmin, varmax, maxit, npop, num_children, mu, sigma, beta)
208 | 
209 | # Results
210 | #(out, Bestsol, bestcost)
211 | plt.plot(out[2])
212 | plt.xlim(0, maxit)
213 | plt.xlabel('Generations')
214 | plt.ylabel('Best Cost')
215 | plt.title('Genetic Algorithm')
216 | plt.grid(True)
217 | plt.show
218 | 


--------------------------------------------------------------------------------
/google_colab_tutorial/check_gpu.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """check_gpu.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/drive/1FDLs0qvY17D8-_4Ds5NDslVvl1oC_i5K
 8 | 
 9 | # Check the detail about GPU Hardware Accelator in Colab
10 | """
11 | 
12 | import tensorflow as tf
13 | from tensorflow.python.client import device_lib
14 | 
15 | tf.test.gpu_device_name()
16 | 
17 | device_lib.list_local_devices()
18 | 
19 | 


--------------------------------------------------------------------------------
/google_colab_tutorial/collab_magic.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """collab_magic.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/drive/1ad_oOndoeQsxr0W_fOJ-NocZ4Y8isacy
 8 | 
 9 | # Colab Magics
10 | 
11 | **List of All Magic Commands**
12 | """
13 | 
14 | # Commented out IPython magic to ensure Python compatibility.
15 | # %lsmagic
16 | 
17 | """**List Local Directries**"""
18 | 
19 | # Commented out IPython magic to ensure Python compatibility.
20 | # %ldir
21 | 
22 | """**Get Notebook History**"""
23 | 
24 | # Commented out IPython magic to ensure Python compatibility.
25 | # %history
26 | 
27 | """**CPU Time**"""
28 | 
29 | # Commented out IPython magic to ensure Python compatibility.
30 | # %time
31 | 
32 | """**How long the system has been running?**"""
33 | 
34 | !uptime
35 | 
36 | """**Display available and used memory**"""
37 | 
38 | !free -h
39 | print("-"*100)
40 | 
41 | """**Display the CPU specification**"""
42 | 
43 | !lscpu
44 | print("-"*70)
45 | 
46 | """**List all running VM processes**"""
47 | 
48 | # Commented out IPython magic to ensure Python compatibility.
49 | # %%sh
50 | # echo "List all running VM processes."
51 | # ps -ef
52 | # echo "Done"
53 | 
54 | """**Embed HTML**"""
55 | 
56 | # Commented out IPython magic to ensure Python compatibility.
57 | # %%html
58 | # <marquee>Towards AI is a great publication platform</marquee>
59 | 
60 | #@title Personal Detail
61 | #@markdown Informations.
62 | 
63 | Name = 'Peter' #@param {type: "string"}
64 | Age = 25  #@param {type: "slider", min: 1, max: 100}
65 | zip = 1234  #@param {type: "number"}
66 | Date = '2020-01-26'  #@param {type: "date"}
67 | Gender = "Male"  #@param ['Male', 'Female', 'Other']
68 | #@markdown ---
69 | print("Submitting the form")
70 | print(string_type, slider_value, number, date, pick_me)
71 | print("Submitted")
72 | 
73 | """# Plotting"""
74 | 
75 | # Commented out IPython magic to ensure Python compatibility.
76 | # %matplotlib inline
77 | import numpy as np
78 | from matplotlib import pyplot
79 | 
80 | random_data = np.random.rand(500).astype(np.float32)
81 | noise_data = np.random.normal(scale=0.5, size=len(x))
82 | y = np.sin(random_data * 7) + noise_data
83 | pyplot.scatter(random_data, y)
84 | 
85 | """**Plot HeatMap**"""
86 | 
87 | import matplotlib.pyplot as plt
88 | import numpy as np
89 | import seaborn as sns
90 | 
91 | length = 10
92 | data = 5 + np.random.randn(length, length)
93 | data += np.arange(length)
94 | data += np.reshape(np.arange(length), (length, 1))
95 | sns.heatmap(data)
96 | plt.show()


--------------------------------------------------------------------------------
/google_colab_tutorial/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/google_colab_tutorial/kaggle_data_download.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """kaggle_data_download.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/drive/1-bSWzsTCU18243Z75sY2zvs5q6uFbw6v
 8 | 
 9 | # Directly Upload Data from Kaggle
10 | 
11 | **Install Kaggle Package**
12 | """
13 | 
14 | !pip install -q kaggle
15 | 
16 | from google.colab import files
17 | 
18 | files.upload()
19 | 
20 | !mkdir ~/.kaggle
21 | 
22 | !cp kaggle.json ~/.kaggle/
23 | 
24 | !chmod 600 ~/.kaggle/kaggle.json
25 | 
26 | !kaggle datasets list
27 | 
28 | !kaggle competitions download -c competitive-data-science-predict-future-sales


--------------------------------------------------------------------------------
/gradient_descent_tutorial/data.txt:
--------------------------------------------------------------------------------
 1 | 6.1101,17.592
 2 | 5.5277,9.1302
 3 | 8.5186,13.662
 4 | 7.0032,11.854
 5 | 5.8598,6.8233
 6 | 8.3829,11.886
 7 | 7.4764,4.3483
 8 | 8.5781,12
 9 | 6.4862,6.5987
10 | 5.0546,3.8166
11 | 5.7107,3.2522
12 | 14.164,15.505
13 | 5.734,3.1551
14 | 8.4084,7.2258
15 | 5.6407,0.71618
16 | 5.3794,3.5129
17 | 6.3654,5.3048
18 | 5.1301,0.56077
19 | 6.4296,3.6518
20 | 7.0708,5.3893
21 | 6.1891,3.1386
22 | 20.27,21.767
23 | 5.4901,4.263
24 | 6.3261,5.1875
25 | 5.5649,3.0825
26 | 18.945,22.638
27 | 12.828,13.501
28 | 10.957,7.0467
29 | 13.176,14.692
30 | 22.203,24.147
31 | 5.2524,-1.22
32 | 6.5894,5.9966
33 | 9.2482,12.134
34 | 5.8918,1.8495
35 | 8.2111,6.5426
36 | 7.9334,4.5623
37 | 8.0959,4.1164
38 | 5.6063,3.3928
39 | 12.836,10.117
40 | 6.3534,5.4974
41 | 5.4069,0.55657
42 | 6.8825,3.9115
43 | 11.708,5.3854
44 | 5.7737,2.4406
45 | 7.8247,6.7318
46 | 7.0931,1.0463
47 | 5.0702,5.1337
48 | 5.8014,1.844
49 | 11.7,8.0043
50 | 5.5416,1.0179
51 | 7.5402,6.7504
52 | 5.3077,1.8396
53 | 7.4239,4.2885
54 | 7.6031,4.9981
55 | 6.3328,1.4233
56 | 6.3589,-1.4211
57 | 6.2742,2.4756
58 | 5.6397,4.6042
59 | 9.3102,3.9624
60 | 9.4536,5.4141
61 | 8.8254,5.1694
62 | 5.1793,-0.74279
63 | 21.279,17.929
64 | 14.908,12.054
65 | 18.959,17.054
66 | 7.2182,4.8852
67 | 8.2951,5.7442
68 | 10.236,7.7754
69 | 5.4994,1.0173
70 | 20.341,20.992
71 | 10.136,6.6799
72 | 7.3345,4.0259
73 | 6.0062,1.2784
74 | 7.2259,3.3411
75 | 5.0269,-2.6807
76 | 6.5479,0.29678
77 | 7.5386,3.8845
78 | 5.0365,5.7014
79 | 10.274,6.7526
80 | 5.1077,2.0576
81 | 5.7292,0.47953
82 | 5.1884,0.20421
83 | 6.3557,0.67861
84 | 9.7687,7.5435
85 | 6.5159,5.3436
86 | 8.5172,4.2415
87 | 9.1802,6.7981
88 | 6.002,0.92695
89 | 5.5204,0.152
90 | 5.0594,2.8214
91 | 5.7077,1.8451
92 | 7.6366,4.2959
93 | 5.8707,7.2029
94 | 5.3054,1.9869
95 | 8.2934,0.14454
96 | 13.394,9.0551
97 | 5.4369,0.61705
98 | 


--------------------------------------------------------------------------------
/gradient_descent_tutorial/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/k-nearest-neighbors/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/k-nearest-neighbors/k_nearest_neighbor_knn_tutorial.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | #K-Nearest Neighbors (KNN) Algorithm Tutorial - Machine Learning Basics
  4 | * Tutorial: https://news.towardsai.net/knn
  5 | * Github: https://github.com/towardsai/tutorials/tree/master/k-nearest-neighbors
  6 | """
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | import matplotlib.pyplot as plt
 11 | import seaborn as sns
 12 | 
 13 | from sklearn.model_selection import train_test_split
 14 | from sklearn.neighbors import KNeighborsClassifier
 15 | from sklearn import metrics
 16 | 
 17 | # Import the iris dataset as provided by the sklearn Python module
 18 | from sklearn.datasets import load_iris
 19 | iris = load_iris()
 20 | 
 21 | type(iris)
 22 | 
 23 | # Converting sklearn data into Pandas dataframe
 24 | # target variables imply
 25 | # 0.0 - Setosa
 26 | # 1.0 - Versicolor
 27 | # 2.0 - Virginica
 28 | iris = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
 29 |                      columns= iris['feature_names'] + ['target'])
 30 | iris.head()
 31 | 
 32 | """## Checking for outliers and imbalanced data"""
 33 | 
 34 | # data is perfectly balanced
 35 | sns.countplot(x='target', data=iris)
 36 | 
 37 | # not much of outliers to br handled
 38 | for feature in ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']:
 39 |   sns.boxplot(x='target', y=feature, data=iris)
 40 |   plt.show()
 41 | 
 42 | """## Plotting a 2-D graph"""
 43 | 
 44 | sns.scatterplot(x='sepal length (cm)', y='sepal width (cm)', data=iris, hue='target', palette="deep")
 45 | 
 46 | """## Separating features and target"""
 47 | 
 48 | # X variable contains flower features
 49 | # Y variable contains target values
 50 | X = iris.drop(['target'], axis=1)
 51 | y = iris['target']
 52 | 
 53 | """## Split the dataset into train and test sets"""
 54 | 
 55 | # 60% of the data will be randomly selected at training data
 56 | # remaining 40% as testing data
 57 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
 58 | 
 59 | # checking accuracy score for k-value rangin from 1 to 26
 60 | k_range = list(range(1,26))
 61 | scores = []
 62 | 
 63 | # model fitting and calculating accuracy score
 64 | # for each k-value in the range 1-26
 65 | for k in k_range:
 66 |     knn = KNeighborsClassifier(n_neighbors=k)
 67 |     knn.fit(X_train, y_train)
 68 |     y_pred = knn.predict(X_test)
 69 |     scores.append(metrics.accuracy_score(y_test, y_pred))
 70 | 
 71 | plt.plot(k_range, scores)
 72 | plt.xlabel('Value of k')
 73 | plt.ylabel('Accuracy Score')
 74 | plt.title('Accuracy Scores for different values of k')
 75 | plt.show()
 76 | 
 77 | # 60% of the data will be randomly selected at training data
 78 | # remaining 40% as testing data
 79 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
 80 | 
 81 | """## Initial model"""
 82 | 
 83 | # Initial model with nearest neighbor as 1(k-value)
 84 | # further, k will be replaced with optimal value
 85 | knn = KNeighborsClassifier(n_neighbors=1)
 86 | 
 87 | knn.fit(X_train, y_train)
 88 | print(knn.score(X_test, y_test))
 89 | 
 90 | """## Finding the right k-value"""
 91 | 
 92 | # checking accuracy score for k-value rangin from 1 to 26
 93 | k_range = list(range(1,26))
 94 | scores = []
 95 | 
 96 | # model fitting and calculating accuracy score
 97 | # for each k-value in the range 1-26
 98 | for k in k_range:
 99 |     knn = KNeighborsClassifier(n_neighbors=k)
100 |     knn.fit(X_train, y_train)
101 |     y_pred = knn.predict(X_test)
102 |     scores.append(metrics.accuracy_score(y_test, y_pred))
103 | 
104 | plt.plot(k_range, scores)
105 | plt.xlabel('Value of k')
106 | plt.ylabel('Accuracy Score')
107 | plt.title('Accuracy Scores for different values of k')
108 | plt.show()
109 | 
110 | """## Accuracy for optimal k-value"""
111 | 
112 | # 11 is the optimal k-value for this dataset
113 | knn = KNeighborsClassifier(n_neighbors=11)
114 | knn.fit(X_train, y_train)
115 | print(knn.score(X_test, y_test))
116 | 
117 | """## Predicting class of new data"""
118 | 
119 | knn = KNeighborsClassifier(n_neighbors=11)
120 | 
121 | # fitting the entire data without splitting
122 | # into train and test
123 | knn.fit(iris.drop(['target'], axis=1), iris['target'])
124 | 
125 | # new data to be classified
126 | X_new = np.array([[1, 2.9, 10, 0.2]])
127 | prediction = knn.predict(X_new)
128 | print(prediction)
129 | 
130 | if prediction[0] == 0.0:
131 |   print('Setosa')
132 | elif prediction[0] == 1.0:
133 |   print('Versicolor')
134 | else:
135 |   print('Virginica')
136 | 


--------------------------------------------------------------------------------
/linear-algebra-for-ml-and-deep-learning/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/linear-algebra-for-ml-and-deep-learning/house_price.csv:
--------------------------------------------------------------------------------
1 | square_feet,price
2 | 150,6450
3 | 200,7450
4 | 250,8450
5 | 300,9450
6 | 350,11450
7 | 400,15450
8 | 600,18450
9 | 


--------------------------------------------------------------------------------
/linear-algebra-for-ml-and-deep-learning/linear_regression.py:
--------------------------------------------------------------------------------
 1 | # import important libraries
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | 
 6 | df = pd.read_csv('house_price.csv')
 7 | 
 8 | print(df.head())
 9 | 
10 | def get_mean(value):
11 |     total = sum(value)
12 |     length = len(value)
13 |     mean = total/length
14 |     return mean
15 | 
16 | def get_variance(value):
17 |     mean = get_mean(value)
18 |     mean_difference_square = [pow((item - mean), 2) for item in value]
19 |     variance = sum(mean_difference_square)/float(len(value)-1)
20 |     return variance
21 | 
22 | def get_covariance(value1, value2):
23 |     value1_mean = get_mean(value1)
24 |     value2_mean = get_mean(value2)
25 |     values_size = len(value1)
26 |     covariance = 0.0
27 |     for i in range(0, values_size):
28 |         covariance += (value1[i] - value1_mean) * (value2[i] - value2_mean)
29 |         
30 |     return covariance / float(values_size - 1)
31 | 
32 | def linear_regression(df):
33 | 
34 |     X = df['square_feet']
35 |     Y = df['price']
36 |     m = len(X)
37 | 
38 |     square_feet_mean = get_mean(X)
39 |     price_mean = get_mean(Y)
40 |     
41 |     #variance of X
42 |     square_feet_variance = get_variance(X)
43 |     price_variance = get_variance(Y)
44 |     
45 |     covariance_of_price_and_square_feet = get_covariance(X, Y)
46 |     w1 = covariance_of_price_and_square_feet / float(square_feet_variance)
47 |     w0 = price_mean - w1 * square_feet_mean
48 |     
49 |     # prediction --> Linear Equation
50 |     prediction = w0 + w1 * X
51 |     
52 |     df['price (prediction)'] = prediction
53 |     return df['price (prediction)']
54 | 
55 | 
56 |     predicted = linear_regression(df)
57 | 
58 |     print(predicted)
59 |     
60 | 


--------------------------------------------------------------------------------
/linear-algebra-for-ml-and-deep-learning/pca_with_python.py:
--------------------------------------------------------------------------------
 1 | # Import important libraries
 2 | import numpy as np
 3 | import pylab as plt
 4 | import pandas as pd
 5 | from sklearn import datasets
 6 | import matplotlib.pyplot as plt
 7 | from sklearn.preprocessing import StandardScaler
 8 | 
 9 | load_iris = datasets.load_iris()
10 | iris_df = pd.DataFrame(load_iris.data, columns=[load_iris.feature_names])
11 | 
12 | print(iris_df.head())
13 | 
14 | print(load_iris.data.shape)
15 | 
16 | standardized_x = StandardScaler().fit_transform(load_iris.data)
17 | print(standardized_x[:2])
18 | 
19 | print(standardized_x.T)
20 | 
21 | covariance_matrix_x = np.cov(standardized_x.T)
22 | print(covariance_matrix_x)
23 | 
24 | eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix_x)
25 | 
26 | print(eigenvalues)
27 | 
28 | print(eigenvectors)
29 | 
30 | total_of_eigenvalues = sum(eigenvalues)
31 | varariance = [(i / total_of_eigenvalues)*100 for i in sorted(eigenvalues, reverse=True)]
32 | 
33 | print(varariance)
34 | 
35 | eigenpairs = [(np.abs(eigenvalues[i]), eigenvectors[:,i]) for i in range(len(eigenvalues))]
36 | 
37 | # Sorting from Higher values to lower value
38 | eigenpairs.sort(key=lambda x: x[0], reverse=True)
39 | print(eigenpairs)
40 | 
41 | matrix_weighing = np.hstack((eigenpairs[0][1].reshape(4,1),
42 |                       eigenpairs[1][1].reshape(4,1)))
43 | print(matrix_weighing)
44 | 
45 | Y = standardized_x.dot(matrix_weighing)
46 | print(Y)
47 | 
48 | plt.figure()
49 | target_names = load_iris.target_names
50 | y = load_iris.target
51 | for c, i, target_name in zip("rgb", [0, 1, 2], target_names):
52 |     plt.scatter(Y[y==i,0], Y[y==i,1], c=c, label=target_name)
53 | 
54 | plt.xlabel('PCA 1')
55 | plt.ylabel('PCA 2')
56 | plt.legend()
57 | plt.title('PCA')
58 | plt.show()
59 | 
60 | 


--------------------------------------------------------------------------------
/logic/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/machine_learning_algorithms_for_beginners/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/machine_learning_algorithms_for_beginners/exponential_regression.py:
--------------------------------------------------------------------------------
 1 | # Import required libraries:
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from scipy.optimize import curve_fit
 5 |  
 6 | # Dataset values :
 7 | day = np.arange(0,8)
 8 | weight = np.array([251,209,157,129,103,81,66,49])
 9 | 
10 | # Exponential Function :
11 | def expo_func(x, a, b):
12 |   return a * b ** x
13 |  
14 | #popt :Optimal values for the parameters
15 | #pcov :The estimated covariance of popt
16 | 
17 | popt, pcov = curve_fit(expo_func, day, weight)
18 | weight_pred = expo_func(day,popt[0],popt[1])
19 | 
20 | # Plotting the data
21 | plt.plot(day, weight_pred, 'r-')
22 | plt.scatter(day,weight,label='Day vs Weight')
23 | plt.title("Day vs Weight a*b^x")
24 | plt.xlabel('Day')
25 | plt.ylabel('Weight')
26 | plt.legend()
27 | plt.show()
28 | 
29 | # Equation
30 | a=popt[0].round(4)
31 | b=popt[1].round(4)
32 | print(f'The equation of regression line is y={a}*{b}^x')
33 | 


--------------------------------------------------------------------------------
/machine_learning_algorithms_for_beginners/linear_regression_example.py:
--------------------------------------------------------------------------------
 1 | # Import required libraries:
 2 | import pandas as pd
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt 
 5 | from sklearn import linear_model
 6 | 
 7 | # Read the CSV file :
 8 | data = pd.read_csv("https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/ML0101ENv3/labs/FuelConsumptionCo2.csv")
 9 | data.head()
10 | 
11 | # Let's select some features to explore more :
12 | data = data[["ENGINESIZE","CO2EMISSIONS"]]
13 | 
14 | # ENGINESIZE vs CO2EMISSIONS:
15 | plt.scatter(data["ENGINESIZE"] , data["CO2EMISSIONS"] , color="blue")
16 | plt.xlabel("ENGINESIZE")
17 | plt.ylabel("CO2EMISSIONS")
18 | plt.show()
19 | 
20 | # Generating training and testing data from our data:
21 | # We are using 80% data for training.
22 | train = data[:(int((len(data)*0.8)))]
23 | test = data[(int((len(data)*0.8))):]
24 | 
25 | # Modeling:
26 | 
27 | # Using sklearn package to model data :
28 | regr = linear_model.LinearRegression()
29 | train_x = np.array(train[["ENGINESIZE"]])
30 | train_y = np.array(train[["CO2EMISSIONS"]])
31 | regr.fit(train_x,train_y)
32 | 
33 | # The coefficients:
34 | print ("coefficients : ",regr.coef_)       #Slope
35 | print ("Intercept : ",regr.intercept_)     #Intercept
36 | 
37 | # Plotting the regression line:
38 | plt.scatter(train["ENGINESIZE"], train["CO2EMISSIONS"], color='blue')
39 | plt.plot(train_x, regr.coef_*train_x + regr.intercept_, '-r')
40 | plt.xlabel("Engine size")
41 | plt.ylabel("Emission")
42 | 
43 | # Predicting values:
44 | 
45 | # Function for predicting future values :
46 | def get_regression_predictions(input_features,intercept,slope):
47 |  predicted_values = input_features*slope + intercept
48 |  return predicted_values
49 | 
50 | # Predicting emission for future car:
51 | my_engine_size = 3.5
52 | estimatd_emission = get_regression_predictions(my_engine_size,regr.intercept_[0],regr.coef_[0][0])
53 | print ("Estimated Emission :",estimatd_emission)
54 | 
55 | # Checking various accuracy:
56 | from sklearn.metrics import r2_score
57 | test_x = np.array(test[['ENGINESIZE']])
58 | test_y = np.array(test[['CO2EMISSIONS']])
59 | test_y_ = regr.predict(test_x)
60 | print("Mean absolute error: %.2f" % np.mean(np.absolute(test_y_ - test_y)))
61 | print("Mean sum of squares (MSE): %.2f" % np.mean((test_y_ - test_y) ** 2))
62 | print("R2-score: %.2f" % r2_score(test_y_ , test_y) )
63 | 


--------------------------------------------------------------------------------
/machine_learning_algorithms_for_beginners/logarithmic_regression.py:
--------------------------------------------------------------------------------
 1 | # Import required libraries:
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from sklearn.metrics import r2_score
 5 | 
 6 | # Dataset:
 7 | # Y = a + b*ln(X)
 8 | 
 9 | X = np.arange(1,50,0.5)
10 | Y = 10 + 2*np.log(X)
11 | 
12 | #Adding some noise to calculate error!
13 | Y_noise = np.random.rand(len(Y))
14 | Y = Y +Y_noise
15 | plt.scatter(X,Y)
16 | 
17 | # 1st column of our X matrix should be 1:
18 | n = len(X)
19 | x_bias = np.ones((n,1))
20 | print (X.shape)
21 | print (x_bias.shape)
22 | 
23 | # Reshaping X :
24 | X = np.reshape(X,(n,1))
25 | print (X.shape)
26 | 
27 | # Going with the formula:
28 | # Y = a + b*ln(X)
29 | X_log = np.log(X)
30 | 
31 | # Append the X_log to X_bias:
32 | x_new = np.append(x_bias,X_log,axis=1)
33 | 
34 | # Transpose of a matrix:
35 | x_new_transpose = np.transpose(x_new)
36 | 
37 | # Matrix multiplication:
38 | x_new_transpose_dot_x_new = x_new_transpose.dot(x_new)
39 | 
40 | # Find inverse:
41 | temp_1 = np.linalg.inv(x_new_transpose_dot_x_new)
42 | 
43 | # Matrix Multiplication:
44 | temp_2 = x_new_transpose.dot(Y)
45 | 
46 | # Find the coefficient values:
47 | theta = temp_1.dot(temp_2)
48 | 
49 | # Plot the data:
50 | a = theta[0]
51 | b = theta[1]
52 | Y_plot = a + b*np.log(X)
53 | plt.scatter(X,Y)
54 | plt.plot(X,Y_plot,c="r")
55 | 
56 | # Check the accuracy:
57 | Accuracy = r2_score(Y,Y_plot)
58 | print (Accuracy)
59 | 


--------------------------------------------------------------------------------
/machine_learning_algorithms_for_beginners/ml_algorithms_1.py:
--------------------------------------------------------------------------------
 1 | # Import required libraries:
 2 | import pandas as pd
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | from sklearn import linear_model
 6 | 
 7 | # Read the CSV file :
 8 | data = pd.read_csv("https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/ML0101ENv3/labs/FuelConsumptionCo2.csv")
 9 | data.head()
10 | 
11 | # Let's select some features to explore more :
12 | data = data[["ENGINESIZE","CO2EMISSIONS"]]
13 | 
14 | # ENGINESIZE vs CO2EMISSIONS:
15 | plt.scatter(data["ENGINESIZE"] , data["CO2EMISSIONS"] , color="blue")
16 | plt.xlabel("ENGINESIZE")
17 | plt.ylabel("CO2EMISSIONS")
18 | plt.show()
19 | 
20 | # Generating training and testing data from our data:
21 | # We are using 80% data for training.
22 | train = data[:(int((len(data)*0.8)))]
23 | test = data[(int((len(data)*0.8))):]
24 | 
25 | # Modeling:
26 | 
27 | # Using sklearn package to model data :
28 | regr = linear_model.LinearRegression()
29 | train_x = np.array(train[["ENGINESIZE"]])
30 | train_y = np.array(train[["CO2EMISSIONS"]])
31 | regr.fit(train_x,train_y)
32 | 
33 | # The coefficients:
34 | print ("coefficients : ",regr.coef_)       #Slope
35 | print ("Intercept : ",regr.intercept_)     #Intercept
36 | 
37 | # Plotting the regression line:
38 | plt.scatter(train["ENGINESIZE"], train["CO2EMISSIONS"], color='blue')
39 | plt.plot(train_x, regr.coef_*train_x + regr.intercept_, '-r')
40 | plt.xlabel("Engine size")
41 | plt.ylabel("Emission")
42 | 
43 | # Predicting values:
44 | 
45 | # Function for predicting future values :
46 | def get_regression_predictions(input_features,intercept,slope):
47 |  predicted_values = input_features*slope + intercept
48 |  return predicted_values
49 |  
50 | # Predicting emission for future car:
51 | my_engine_size = 3.5
52 | estimatd_emission = get_regression_predictions(my_engine_size,regr.intercept_[0],regr.coef_[0][0])
53 | print ("Estimated Emission :",estimatd_emission)
54 | 
55 | # Checking various accuracy:
56 | from sklearn.metrics import r2_score
57 | test_x = np.array(test[['ENGINESIZE']])
58 | test_y = np.array(test[['CO2EMISSIONS']])
59 | test_y_ = regr.predict(test_x)
60 | print("Mean absolute error: %.2f" % np.mean(np.absolute(test_y_ - test_y)))
61 | print("Mean sum of squares (MSE): %.2f" % np.mean((test_y_ - test_y) ** 2))
62 | print("R2-score: %.2f" % r2_score(test_y_ , test_y) )
63 | 


--------------------------------------------------------------------------------
/machine_learning_algorithms_for_beginners/multivariable_linear_regression.py:
--------------------------------------------------------------------------------
 1 | # Import the required libraries:
 2 | import pandas as pd
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | from sklearn import linear_model
 6 | 
 7 | # Read the CSV file:
 8 | data = pd.read_csv("https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/ML0101ENv3/labs/FuelConsumptionCo2.csv")
 9 | data.head()
10 | 
11 | # Consider features we want to work on:
12 | X = data[[ 'ENGINESIZE', 'CYLINDERS', 'FUELCONSUMPTION_CITY','FUELCONSUMPTION_HWY', 
13 |            'FUELCONSUMPTION_COMB','FUELCONSUMPTION_COMB_MPG']]
14 | Y = data["CO2EMISSIONS"]
15 | 
16 | # Generating training and testing data from our data:
17 | # We are using 80% data for training.
18 | train = data[:(int((len(data)*0.8)))]
19 | test = data[(int((len(data)*0.8))):]
20 | 
21 | #Modeling:
22 | 
23 | #Using sklearn package to model data :
24 | 
25 | regr = linear_model.LinearRegression()
26 | train_x = np.array(train[[ 'ENGINESIZE', 'CYLINDERS', 'FUELCONSUMPTION_CITY',
27 |                            'FUELCONSUMPTION_HWY', 'FUELCONSUMPTION_COMB','FUELCONSUMPTION_COMB_MPG']])
28 | train_y = np.array(train["CO2EMISSIONS"])
29 | regr.fit(train_x,train_y)
30 | test_x = np.array(test[[ 'ENGINESIZE', 'CYLINDERS', 'FUELCONSUMPTION_CITY',
31 |                          'FUELCONSUMPTION_HWY', 'FUELCONSUMPTION_COMB','FUELCONSUMPTION_COMB_MPG']])
32 | test_y = np.array(test["CO2EMISSIONS"])
33 | 
34 | # print the coefficient values:
35 | coeff_data = pd.DataFrame(regr.coef_ , X.columns , columns=["Coefficients"])
36 | coeff_data
37 | 
38 | #Now let's do prediction of data:
39 | Y_pred = regr.predict(test_x)
40 | 
41 | # Check accuracy:
42 | from sklearn.metrics import r2_score
43 | R = r2_score(test_y , Y_pred)
44 | print ("R² :",R)
45 | 


--------------------------------------------------------------------------------
/machine_learning_algorithms_for_beginners/polynomial_regression.py:
--------------------------------------------------------------------------------
 1 | # Import required libraries:
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # Generate datapoints:
 6 | x = np.arange(-5,5,0.1)
 7 | y_noise = 20 * np.random.normal(size = len(x))
 8 | y = 1*(x**3) + 1*(x**2) + 1*x + 3+y_noise
 9 | plt.scatter(x,y)
10 | 
11 | # Make polynomial data:
12 | x1 = x
13 | x2 = np.power(x1,2)
14 | x3 = np.power(x1,3)
15 | n = len(x1)
16 | 
17 | # Reshaping data:
18 | x1_new = np.reshape(x1,(n,1))
19 | x2_new = np.reshape(x2,(n,1))
20 | x3_new = np.reshape(x3,(n,1))
21 | 
22 | # First column of matrix X:
23 | x_bias = np.ones((n,1))
24 | 
25 | # Form the complete x matrix:
26 | x_new = np.append(x_bias,x1_new,axis=1)
27 | x_new = np.append(x_new,x2_new,axis=1)
28 | x_new = np.append(x_new,x3_new,axis=1)
29 | 
30 | # Finding transpose:
31 | x_new_transpose = np.transpose(x_new)
32 | 
33 | # Finding dot product of original and transposed matrix :
34 | x_new_transpose_dot_x_new = x_new_transpose.dot(x_new)
35 | 
36 | # Finding Inverse:
37 | temp_1 = np.linalg.inv(x_new_transpose_dot_x_new)# Finding the dot product of transposed x and y :
38 | temp_2 = x_new_transpose.dot(y)
39 | 
40 | # Finding coefficients:
41 | theta = temp_1.dot(temp_2)
42 | theta
43 | 
44 | # Store coefficient values in different variables:
45 | beta_0 = theta[0]
46 | beta_1 = theta[1]
47 | beta_2 = theta[2]
48 | beta_3 = theta[3]
49 | 
50 | # Plot the polynomial curve:
51 | plt.scatter(x,y)
52 | plt.plot(x,beta_0 + beta_1*x1 + beta_2*x2 + beta_3*x3,c="red")
53 | 
54 | # Prediction function:
55 | def prediction(x1,x2,x3,beta_0,beta_1,beta_2,beta_3):
56 |   y_pred = beta_0 + beta_1*x1 + beta_2*x2 + beta_3*x3
57 |   return y_pred
58 |  
59 | # Making predictions:
60 | pred = prediction(x1,x2,x3,beta_0,beta_1,beta_2,beta_3)
61 |  
62 | # Calculate accuracy of model:
63 | def err(y_pred,y):
64 |   var = (y - y_pred)
65 |   var = var*var
66 |   n = len(var)
67 |   MSE = var.sum()
68 |   MSE = MSE/n
69 |  
70 |   return MSE
71 |  
72 | # Calculating the error:
73 | error = err(pred,y)
74 | error
75 | 


--------------------------------------------------------------------------------
/machine_learning_algorithms_for_beginners/sinusoidal_regression.py:
--------------------------------------------------------------------------------
 1 | # Import required libraries:
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from scipy.optimize import curve_fit
 5 | from sklearn.metrics import r2_score
 6 | 
 7 | # Generating dataset:
 8 | # Y = A*sin(B(X + C)) + D
 9 | # A = Amplitude
10 | # Period = 2*pi/B
11 | # Period = Length of One Cycle
12 | # C = Phase Shift (In Radian)
13 | # D = Vertical Shift
14 | 
15 | X = np.linspace(0,1,100)             #(Start,End,Points)
16 | 
17 | # Here…
18 | # A = 1
19 | # B= 2*pi
20 | # B = 2*pi/Period
21 | # Period = 1
22 | # C = 0
23 | # D = 0
24 | 
25 | Y = 1*np.sin(2*np.pi*X)
26 | 
27 | 
28 | # Adding some Noise :
29 | Noise = 0.4*np.random.normal(size=100)
30 | Y_data = Y + Noise
31 | plt.scatter(X,Y_data,c="r")
32 | 
33 | # Calculate the value:
34 | def calc_sine(x,a,b,c,d):
35 |   return a * np.sin(b* ( x + np.radians(c))) + d
36 |   
37 | # Finding optimal parameters :
38 | popt,pcov = curve_fit(calc_sine,X,Y_data)
39 | 
40 | # Plot the main data :
41 | plt.scatter(X,Y_data)# Plot the best fit curve :
42 | plt.plot(X,calc_sine(X,*popt),c="r")
43 | plt.show()
44 | 
45 | # Check the accuracy :
46 | Accuracy =r2_score(Y_data,calc_sine(X,*popt))
47 | print (Accuracy)
48 | 
49 | # Function to calculate the value :
50 | def calc_line(X,m,b):
51 |   return b + X*m
52 |   
53 | # It returns optimized parametes for our function :
54 | # popt stores optimal parameters
55 | # pcov stores the covarience between each parameters.
56 | popt,pcov = curve_fit(calc_line,X,Y_data)
57 | 
58 | # Plot the main data :
59 | plt.scatter(X,Y_data)
60 | 
61 | # Plot the best fit line :
62 | plt.plot(X,calc_line(X,*popt),c="r")
63 | plt.show()
64 | 
65 | # Check the accuracy of model :
66 | Accuracy =r2_score(Y_data,calc_line(X,*popt))
67 | print ("Accuracy of Linear Model : ",Accuracy)
68 | 


--------------------------------------------------------------------------------
/moment_generating_function/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/moment_generating_function/moment_generating_function.py:
--------------------------------------------------------------------------------
  1 | #1-Dimensional Data:
  2 | 
  3 | #Import required libraries:
  4 | from scipy import stats
  5 | 
  6 | #Dataset:
  7 | d = [1,2,3,4,5]
  8 | 
  9 | #Finding 0th moment:
 10 | print("0th Moment = ",stats.moment(d,moment=0))
 11 | 
 12 | #Finding 1st moment:
 13 | print("1st Moment = ",stats.moment(d,moment=1))
 14 | 
 15 | #Finding 2nd moment:
 16 | print("2nd Moment = ",stats.moment(d,moment=2))
 17 | 
 18 | #Finding 3nd moment:
 19 | print("3nd Moment = ",stats.moment(d,moment=3))
 20 | 
 21 | #Finding 4th moment:
 22 | print("4th Moment = ",stats.moment(d,moment=4))
 23 | 
 24 | 
 25 | #============================================================
 26 | 
 27 | 
 28 | #2-Dimensional Data:
 29 | 
 30 | #Import required libraries:
 31 | from scipy import stats
 32 | 
 33 | #Dataset:
 34 | d = [[5,6,9,11,3],[21,4,8,15,2]]
 35 | 
 36 | #Finding 0th moment:
 37 | print("0th Moment = ",stats.moment(d,moment=0))
 38 | 
 39 | #Finding 1st moment:
 40 | print("1st Moment = ",stats.moment(d,moment=1))
 41 | 
 42 | #Finding 2nd moment:
 43 | print("2nd Moment = ",stats.moment(d,moment=2))
 44 | 
 45 | #Finding 3nd moment:
 46 | print("3nd Moment = ",stats.moment(d,moment=3))
 47 | 
 48 | #Finding 4th moment:
 49 | print("4th Moment = ",stats.moment(d,moment=4))
 50 | 
 51 | 
 52 | #============================================================
 53 | 
 54 | 
 55 | #2-Dimensional Data:
 56 | #Set axis=1 (Horizonatal):
 57 | 
 58 | #Import required libraries:
 59 | from scipy import stats
 60 | 
 61 | #Dataset:
 62 | d = [[5,6,9,11,3],[21,4,8,15,2]]
 63 | 
 64 | #Finding 0th moment:
 65 | print("0th Moment = ",stats.moment(d,moment=0,axis=1))
 66 | 
 67 | #Finding 1st moment:
 68 | print("1st Moment = ",stats.moment(d,moment=1,axis=1))
 69 | 
 70 | #Finding 2nd moment:
 71 | print("2nd Moment = ",stats.moment(d,moment=2,axis=1))
 72 | 
 73 | #Finding 3nd moment:
 74 | print("3nd Moment = ",stats.moment(d,moment=3,axis=1))
 75 | 
 76 | #Finding 4th moment:
 77 | print("4th Moment = ",stats.moment(d,moment=4,axis=1))
 78 | 
 79 | 
 80 | #============================================================
 81 | 
 82 | 
 83 | #Multi-Dimensional Data:
 84 | 
 85 | #Import required libraries:
 86 | from scipy import stats
 87 | 
 88 | #Dataset:
 89 | d = [[5,6,9,11,3],
 90 |      [21,4,8,15,2],
 91 |     [15,23,42,1,36]]
 92 | 
 93 | #Finding 0th moment:
 94 | print("0th Moment = ",stats.moment(d,moment=0))
 95 | 
 96 | #Finding 1st moment:
 97 | print("1st Moment = ",stats.moment(d,moment=1))
 98 | 
 99 | #Finding 2nd moment:
100 | print("2nd Moment = ",stats.moment(d,moment=2))
101 | 
102 | #Finding 3nd moment:
103 | print("3nd Moment = ",stats.moment(d,moment=3))
104 | 
105 | #Finding 4th moment:
106 | print("4th Moment = ",stats.moment(d,moment=4))
107 | 
108 | 
109 | #============================================================
110 | 
111 | 
112 | #2-Dimensional Data:
113 | #Set axis=1 (Horizonatal):
114 | #Higher Order Moments:
115 | 
116 | #Import required libraries:
117 | from scipy import stats
118 | 
119 | #Dataset:
120 | d = [[5,6,9,11,3],[21,4,8,15,2]]
121 | 
122 | #Finding 10th moment:
123 | print("10th Moment = ",stats.moment(d,moment=10,axis=1))
124 | 
125 | #Finding 12th moment:
126 | print("12th Moment = ",stats.moment(d,moment=12,axis=1))
127 | 


--------------------------------------------------------------------------------
/monte_carlo_simulation/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/monte_carlo_simulation/monte_carlo_buffon's_needle_problem.py:
--------------------------------------------------------------------------------
 1 | #Import required libraries :
 2 | import random
 3 | import math
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | #Main function to estimate PI value :
 7 | def monte_carlo(runs,needles,n_length,b_width):
 8 |     #Empty list to store pi values :
 9 |     pi_values = []
10 |     
11 |     #Horizontal line for actual value of PI :
12 |     plt.axhline(y=math.pi, color='r', linestyle='-')
13 |     
14 |     #For all runs :
15 |     for i in range(runs):
16 |         #Initialize number of hits as 0.
17 |         nhits = 0
18 |         
19 |         #For all needles :
20 |         for j in range(needles):
21 |             #We will find the distance from the nearest vertical line :
22 |             #Min = 0     Max = b_width/2
23 |             x = random.uniform(0,b_width/2.0)
24 |             
25 |             #The theta value will be from 0 to pi/2 :
26 |             theta = random.uniform(0,math.pi/2)
27 |             
28 |             #Checking if the needle crosses the line or not :
29 |             xtip  = x - (n_length/2.0)*math.cos(theta)  
30 |             if xtip < 0 :
31 |                 nhits += 1
32 |                 
33 |         #Going with the formula :
34 |         numerator = 2.0 * n_length * needles
35 |         denominator = b_width * nhits
36 |        
37 |         #Append the final value of pi :
38 |         pi_values.append((numerator/denominator))
39 |     
40 |     #Final pi value after all iterations :
41 |     print(pi_values[-1])
42 |     
43 |     #Plotting the graph :
44 |     plt.plot(pi_values)    
45 |         
46 | #Total number of runs :
47 | runs = 100
48 | 
49 | #Total number of needles :
50 | needles = 100000
51 | 
52 | #Length of needle :
53 | n_length = 2  
54 | 
55 | #space between 2 verical lines :
56 | b_width =2
57 | 
58 | #Calling the main function :
59 | monte_carlo(runs,needles,n_length,b_width)
60 | 


--------------------------------------------------------------------------------
/monte_carlo_simulation/monte_carlo_casino_example.py:
--------------------------------------------------------------------------------
  1 | #Import required libraries :
  2 | 
  3 | import random
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | """RULES : 
  7 | 
  8 | 1) There are chits containing numbers ranging from 1-100 in a bag.
  9 | 2) Users can bet on even or odd.
 10 | 3) In this game 10 and 11 are special numbers. 10 will be counted as an odd number and 11 will be counted as an even number.
 11 | 4) If you bet on even number and if you get 10 then you lose.
 12 | 5) If you bet on odd number and if you get 11 then you lose.
 13 | """
 14 | 
 15 | #Place your bet:
 16 | 
 17 | #User can choose even or odd number :
 18 | choice = input("Do you want to bet on Even number or odd number \n")
 19 | 
 20 | #For even :
 21 | if choice=="Even":
 22 |     def pickNote():
 23 |         #Get random number between 1-100.
 24 |         note = random.randint(1,100)
 25 |        
 26 |         #Check for our game conditions.
 27 |         
 28 |         #Notice that 10 isn't considered as even number.
 29 |         if note%2!=0 or note==10:
 30 |             return False
 31 |         elif note%2==0:
 32 |             return True
 33 | 
 34 | #For odd :        
 35 | elif choice=="Odd":
 36 |     def pickNote():
 37 |         #Get random number between 1-100.
 38 |         note = random.randint(1,100)
 39 |         
 40 |         #Check for our game conditions.
 41 |         
 42 |         #Notice that 11 isn't considered as odd number.
 43 |         if note%2==0 or note==11:
 44 |             return False
 45 |         elif note%2==1:
 46 |             return True  
 47 |             
 48 | #Main function :
 49 | def play(total_money, bet_money, total_plays):
 50 | 
 51 |     num_of_plays = []
 52 |     money = []
 53 |     
 54 |     #Start with play number 1
 55 |     play = 1
 56 |   
 57 |     for play in range(total_plays):
 58 |         #Win :
 59 |         if pickNote():
 60 |             #Add the money to our funds
 61 |             total_money = total_money + bet_money
 62 |             #Append the play number
 63 |             num_of_plays.append(play)
 64 |             #Append the new fund amount
 65 |             money.append(total_money)
 66 |         
 67 |         #Lose :
 68 |         else:
 69 |             #Add the money to our funds
 70 |             total_money = total_money - bet_money 
 71 |             #Append the play number
 72 |             num_of_plays.append(play)
 73 |             #Append the new fund amount
 74 |             money.append(total_money)
 75 |     
 76 |     #Plot the data :
 77 |     plt.ylabel('Player Money in $')
 78 |     plt.xlabel('Number of bets')
 79 |     plt.plot(num_of_plays,money)
 80 | 
 81 |     #Final value after all the iterations :
 82 |     final_funds.append(money[-1])
 83 |     return(final_funds)
 84 |     
 85 |     #Create a list for calculating final funds
 86 | final_funds= []
 87 | 
 88 | #Run 10 iterations :
 89 | for i in range(10):
 90 |     ending_fund = play(10000,100,50)
 91 |     
 92 | print(ending_fund)
 93 | print(sum(ending_fund))
 94 | 
 95 | #Print the money the player ends with
 96 | print("The player started with $10,000")
 97 | print("The player left with $",str(sum(ending_fund)/len(ending_fund)))
 98 | 
 99 | #Create a list for calculating final funds
100 | final_funds= []
101 | 
102 | #Run 1000 iterations :
103 | for i in range(1000):
104 |     ending_fund = play(10000,100,50)
105 | 
106 | #Print the money the player ends with
107 | print("The player started with $10,000")
108 | print("The player left with $",str(sum(ending_fund)/len(ending_fund)))
109 | 
110 | 
111 | #Create a list for calculating final funds
112 | final_funds= []
113 | 
114 | #Run 10 iterations :
115 | for i in range(10):
116 |     ending_fund = play(10000,100,5)
117 | 
118 | #Print the money the player ends with
119 | print("Number of bets = 5")
120 | print("The player started with $10,000")
121 | print("The player left with $",str(sum(ending_fund)/len(ending_fund)))
122 | 
123 | 
124 | #Create a list for calculating final funds
125 | final_funds= []
126 | 
127 | #Run 10 iterations :
128 | for i in range(10):
129 |     ending_fund = play(10000,100,10)
130 | 
131 | #Print the money the player ends with
132 | print("Number of bets = 10")
133 | print("The player started with $10,000")
134 | print("The player left with $",str(sum(ending_fund)/len(ending_fund)))
135 | 
136 | 
137 | #Create a list for calculating final funds
138 | final_funds= []
139 | 
140 | #Run 10 iterations :
141 | for i in range(10):
142 |     ending_fund = play(10000,100,100)
143 | 
144 | #Print the money the player ends with
145 | print("Number of bets = 100")
146 | print("The player started with $10,000")
147 | print("The player left with $",str(sum(ending_fund)/len(ending_fund)))
148 | 
149 | 
150 | #Create a list for calculating final funds
151 | final_funds= []
152 | 
153 | #Run 10 iterations :
154 | for i in range(10):
155 |     ending_fund = play(10000,100,1000)
156 | 
157 | #Print the money the player ends with
158 | print("Number of bets = 1000")
159 | print("The player started with $10,000")
160 | print("The player left with $",str(sum(ending_fund)/len(ending_fund)))
161 | 
162 | 
163 | #Create a list for calculating final funds
164 | final_funds= []
165 | 
166 | #Run 10 iterations :
167 | for i in range(10):
168 |     ending_fund = play(10000,100,5000)
169 | 
170 | #Print the money the player ends with
171 | print("Number of bets = 5000")
172 | print("The player started with $10,000")
173 | print("The player left with $",str(sum(ending_fund)/len(ending_fund)))
174 | 
175 | 
176 | #Create a list for calculating final funds
177 | final_funds= []
178 | 
179 | #Run 10 iterations :
180 | for i in range(10):
181 |     ending_fund = play(10000,100,10000)
182 | 
183 | #Print the money the player ends with
184 | print("Number of bets = 10000")
185 | print("The player started with $10,000")
186 | print("The player left with $",str(sum(ending_fund)/len(ending_fund)))
187 | 
188 | 
189 | 


--------------------------------------------------------------------------------
/monte_carlo_simulation/monte_carlo_coin_flip.py:
--------------------------------------------------------------------------------
 1 | #Import required libraries :
 2 | 
 3 | import random
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | #Coin flip function :
 8 | 
 9 | #0 --> Heads
10 | #1 --> Tails
11 | 
12 | def coin_flip():
13 |     return random.randint(0,1) 
14 |     
15 | #Check the return value of coin_flip() :
16 | 
17 | coin_flip()
18 | 
19 | #Monte Carlo Simulation :
20 | 
21 | #Empty list to store the probability values.
22 | list1 = []
23 | 
24 | def monte_carlo(n):
25 |     results = 0
26 |     for i in range(n):
27 |         flip_result = coin_flip()
28 |         results = results + flip_result
29 |         
30 |         #Calculating probability value :
31 |         prob_value = results/(i+1)
32 |         
33 |         #Append the probability values to the list :
34 |         list1.append(prob_value)
35 | 
36 |         #Plot the results :
37 |         plt.axhline(y=0.5, color='r', linestyle='-')
38 |         plt.xlabel("Iterations")
39 |         plt.ylabel("Probability")
40 |         plt.plot(list1)
41 |        
42 |     return results/n
43 |     
44 |     #Calling the function :
45 | 
46 | answer = monte_carlo(5000)
47 | print("Final value :",answer)
48 | 


--------------------------------------------------------------------------------
/monte_carlo_simulation/monte_carlo_estimating_pi_using_circle_and_square.py:
--------------------------------------------------------------------------------
 1 | #Import required libraries :
 2 | import turtle
 3 | import random
 4 | import matplotlib.pyplot as plt
 5 | import math 
 6 | 
 7 | #To visualize the random points :
 8 | myPen = turtle.Turtle()
 9 | myPen.hideturtle()
10 | myPen.speed(0)
11 | 
12 | #Drawing a square :
13 | myPen.up()
14 | myPen.setposition(-100,-100)
15 | myPen.down()
16 | myPen.fd(200)
17 | myPen.left(90)
18 | myPen.fd(200)
19 | 
20 | myPen.left(90)
21 | myPen.fd(200)
22 | myPen.left(90)
23 | myPen.fd(200)
24 | myPen.left(90)
25 | 
26 | #Drawing a circle :
27 | myPen.up()
28 | myPen.setposition(0,-100)
29 | myPen.down()
30 | myPen.circle(100)
31 | 
32 | #To count the points inside and outside the circle :
33 | in_circle = 0
34 | out_circle = 0
35 | 
36 | #To store the values of PI :
37 | pi_values = []
38 | 
39 | #Running for 5 times :
40 | for i in range(5):
41 |     for j in range(1000):
42 | 
43 |         #Generate random numbers :
44 |         x=random.randrange(-100,100)
45 |         y=random.randrange(-100,100)
46 | 
47 |         #Check if the number lies outside the circle :
48 |         if (x**2+y**2>100**2):
49 |             myPen.color("black")
50 |             myPen.up()
51 |             myPen.goto(x,y)
52 |             myPen.down()
53 |             myPen.dot()
54 |             out_circle = out_circle+1
55 | 
56 |         else:
57 |             myPen.color("red")
58 |             myPen.up()
59 |             myPen.goto(x,y)
60 |             myPen.down()
61 |             myPen.dot()
62 |             in_circle = in_circle+1
63 | 
64 |         #Calculating the value of PI :
65 |         pi = 4.0 * in_circle / (in_circle + out_circle)
66 | 
67 |         #Append the values of PI in list :
68 |         pi_values.append(pi)
69 |         
70 |         #Calculating the errors :
71 |         avg_pi_errors = [abs(math.pi - pi) for pi in pi_values]
72 |     
73 |     #Print the final value of PI for each iterations :
74 |     print (pi_values[-1])
75 | 
76 | #Plot the PI values :
77 | plt.axhline(y=math.pi, color='g', linestyle='-')
78 | plt.plot(pi_values)
79 | plt.xlabel("Iterations")
80 | plt.ylabel("Value of PI")
81 | plt.show()
82 | 
83 | #Plot the error in calculation :
84 | plt.axhline(y=0.0, color='g', linestyle='-')
85 | plt.plot(avg_pi_errors)
86 | plt.xlabel("Iterations")
87 | plt.ylabel("Error")
88 | plt.show()
89 | 
90 | 


--------------------------------------------------------------------------------
/monte_carlo_simulation/monte_carlo_monty_hall_problem.py:
--------------------------------------------------------------------------------
 1 | #Import required libraries :
 2 | import random
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | #We are going with 3 doors :
 6 | #1 - Car
 7 | #2 - Goats
 8 | doors = ["goat","goat","car"]
 9 | 
10 | #Empty lists to store probability values :
11 | switch_win_probability = []
12 | stick_win_probability = []
13 | 
14 | plt.axhline(y=0.66666, color='r', linestyle='-')
15 | plt.axhline(y=0.33333, color='g', linestyle='-')
16 | 
17 | #Monte_Carlo Simulation :
18 | def monte_carlo(n):
19 |     
20 |     #Calculating switch and stick wins :
21 |     switch_wins = 0
22 |     stick_wins = 0
23 |     
24 |     for i in range(n):
25 |     
26 |         #Randomly placing the car and goats behind the three doors :
27 |         random.shuffle(doors) 
28 |         
29 |         #Contestant's choice :
30 |         k = random.randrange(2)  
31 | 
32 |         #If the contestant doesn't get car :
33 |         if doors[k] != 'car': 
34 |             switch_wins += 1
35 |             
36 |         #If the contestant got car :
37 |         else: 
38 |             stick_wins += 1
39 | 
40 |         #Updating the list values :
41 |         switch_win_probability.append(switch_wins/(i+1))
42 |         stick_win_probability.append(stick_wins/(i+1))
43 |         
44 |         #Plotting the data :
45 |         plt.plot(switch_win_probability)
46 |         plt.plot(stick_win_probability)
47 |         
48 |     #Print the probability values :
49 |     print('Winning probability if you always switch:',switch_win_probability[-1])
50 |     print('Winning probability if you always stick to your original choice:', stick_win_probability[-1])
51 | 
52 | 
53 | #Calling the function :
54 | monte_carlo(1000)
55 | 


--------------------------------------------------------------------------------
/natural_language_processing/Natural_Language_Processing_Text.txt:
--------------------------------------------------------------------------------
1 | Once upon a time there was an old mother pig who had three little pigs and not enough food to feed them. So when they were old enough, she sent them out into the world to seek their fortunes.
2 | 
3 | The first little pig was very lazy. He didn't want to work at all and he built his house out of straw. The second little pig worked a little bit harder but he was somewhat lazy too and he built his house out of sticks. Then, they sang and danced and played together the rest of the day.
4 | 
5 | The third little pig worked hard all day and built his house with bricks. It was a sturdy house complete with a fine fireplace and chimney. It looked like it could withstand the strongest winds.


--------------------------------------------------------------------------------
/natural_language_processing/circle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/towardsai/tutorials/cc12fe183d50ce6095f044d7346f30d5d0522584/natural_language_processing/circle.png


--------------------------------------------------------------------------------
/natural_language_processing/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/natural_language_processing/natural_language_processing_code.py:
--------------------------------------------------------------------------------
  1 | #Open the text file :
  2 | text_file = open("Natural_Language_Processing_Text.txt")
  3 | 
  4 | #Read the data :
  5 | text = text_file.read()
  6 | 
  7 | #Datatype of the data read :
  8 | print (type(text))
  9 | print("\n")
 10 | 
 11 | #Print the text :
 12 | print(text)
 13 | print("\n")
 14 | #Length of the text :
 15 | print (len(text))
 16 | 
 17 | #Import required libraries :
 18 | import nltk
 19 | from nltk import sent_tokenize
 20 | from nltk import word_tokenize
 21 | 
 22 | #Tokenize the text by sentences :
 23 | sentences = sent_tokenize(text)
 24 | 
 25 | #How many sentences are there? :
 26 | print (len(sentences))
 27 | 
 28 | #Print the sentences :
 29 | #print(sentences)
 30 | print(sentences)
 31 | 
 32 | #Tokenize the text with words :
 33 | words = word_tokenize(text)
 34 | 
 35 | #How many words are there? :
 36 | print (len(words))
 37 | print("\n")
 38 | 
 39 | #Print words :
 40 | print (words)
 41 | 
 42 | #Import required libraries :
 43 | from nltk.probability import FreqDist
 44 | 
 45 | #Find the frequency :
 46 | fdist = FreqDist(words)
 47 | 
 48 | #Print 10 most common words :
 49 | fdist.most_common(10)
 50 | 
 51 | #Plot the graph for fdist :
 52 | import matplotlib.pyplot as plt
 53 | 
 54 | fdist.plot(10)
 55 | 
 56 | #Empty list to store words:
 57 | words_no_punc = []
 58 | 
 59 | #Removing punctuation marks :
 60 | for w in words:
 61 |     if w.isalpha():
 62 |         words_no_punc.append(w.lower())
 63 | 
 64 | #Print the words without punctution marks :
 65 | print (words_no_punc)
 66 | 
 67 | print ("\n")
 68 | 
 69 | #Length :
 70 | print (len(words_no_punc))
 71 | 
 72 | #Frequency distribution :
 73 | fdist = FreqDist(words_no_punc)
 74 | 
 75 | fdist.most_common(10)
 76 | 
 77 | 
 78 | #Plot the most common words on grpah:
 79 | 
 80 | fdist.plot(10)
 81 | 
 82 | from nltk.corpus import stopwords
 83 | 
 84 | #List of stopwords
 85 | stopwords = stopwords.words("english")
 86 | print(stopwords)
 87 | 
 88 | #Empty list to store clean words :
 89 | clean_words = []
 90 | 
 91 | for w in words_no_punc:
 92 |     if w not in stopwords:
 93 |         clean_words.append(w)
 94 |         
 95 | print(clean_words)
 96 | print("\n")
 97 | print(len(clean_words))
 98 | 
 99 | #Frequency distribution :
100 | fdist = FreqDist(clean_words)
101 | 
102 | fdist.most_common(10)
103 | 
104 | 
105 | #Plot the most common words on grpah:
106 | 
107 | fdist.plot(10)
108 | 
109 | #Library to form wordcloud :
110 | from wordcloud import WordCloud
111 | 
112 | #Library to plot the wordcloud :
113 | import matplotlib.pyplot as plt
114 | 
115 | #Generating the wordcloud :
116 | wordcloud = WordCloud().generate(text)
117 | 
118 | #Plot the wordcloud :
119 | plt.figure(figsize = (12, 12)) 
120 | plt.imshow(wordcloud) 
121 | 
122 | #To remove the axis value :
123 | plt.axis("off") 
124 | plt.show()
125 | 
126 | #Import required libraries :
127 | import numpy as np
128 | from PIL import Image
129 | from wordcloud import WordCloud
130 | 
131 | #Here we are going to use a circle image as mask :
132 | char_mask = np.array(Image.open("circle.png"))
133 | 
134 | #Generating wordcloud :
135 | wordcloud = WordCloud(background_color="black",mask=char_mask).generate(text)
136 | 
137 | #Plot the wordcloud :
138 | plt.figure(figsize = (8,8))
139 | plt.imshow(wordcloud)
140 | 
141 | #To remove the axis value :
142 | plt.axis("off")
143 | plt.show()
144 | 
145 | #Stemming Example :
146 | 
147 | #Import stemming library :
148 | from nltk.stem import PorterStemmer
149 | 
150 | porter = PorterStemmer()
151 | 
152 | #Word-list for stemming :
153 | word_list = ["Study","Studying","Studies","Studied"]
154 | 
155 | for w in word_list:
156 |     print(porter.stem(w))
157 |     
158 | #Stemming Example :
159 | 
160 | #Import stemming library :
161 | from nltk.stem import SnowballStemmer
162 | 
163 | snowball = SnowballStemmer("english")
164 | 
165 | #Word-list for stemming :
166 | word_list = ["Study","Studying","Studies","Studied"]
167 | 
168 | for w in word_list:
169 |     print(snowball.stem(w))
170 |     
171 | #Stemming Example :
172 | 
173 | #Import stemming library :
174 | from nltk.stem import SnowballStemmer
175 | 
176 | #Print languages supported :
177 | print(SnowballStemmer.languages)
178 | 
179 | from nltk import WordNetLemmatizer
180 | 
181 | lemma = WordNetLemmatizer()
182 | word_list = ["Study","Studying","Studies","Studied"]
183 | 
184 | for w in word_list:
185 |     print(lemma.lemmatize(w ,pos="v"))
186 | 
187 | from nltk import WordNetLemmatizer
188 | 
189 | lemma = WordNetLemmatizer()
190 | word_list = ["am","is","are","was","were"]
191 | 
192 | for w in word_list:
193 |     print(lemma.lemmatize(w ,pos="v"))
194 |     
195 | from nltk.stem import PorterStemmer
196 |  
197 | stemmer = PorterStemmer()
198 |  
199 | print(stemmer.stem('studies'))
200 | 
201 | from nltk.stem import WordNetLemmatizer
202 |  
203 | lemmatizer = WordNetLemmatizer()
204 |  
205 | print(lemmatizer.lemmatize('studies'))
206 | 
207 | 
208 | from nltk.stem import WordNetLemmatizer
209 |  
210 | lemmatizer = WordNetLemmatizer()
211 | print(lemmatizer.lemmatize('studying', pos="v"))
212 | print(lemmatizer.lemmatize('studying', pos="n"))
213 | print(lemmatizer.lemmatize('studying', pos="a"))
214 | print(lemmatizer.lemmatize('studying', pos="r"))
215 | 
216 | from nltk import WordNetLemmatizer
217 | 
218 | lemma = WordNetLemmatizer()
219 | word_list = ["studies","leaves","decreases","plays"]
220 | 
221 | for w in word_list:
222 |     print(lemma.lemmatize(w))
223 |     
224 | #PoS tagging :
225 | tag = nltk.pos_tag(["Studying","Study"])
226 | print (tag)
227 | 
228 | #PoS tagging example :
229 | 
230 | sentence = "A very beautiful young lady is walking on the beach"
231 | 
232 | #Tokenizing words :
233 | tokenized_words = word_tokenize(sentence)
234 | 
235 | for words in tokenized_words:
236 |     tagged_words = nltk.pos_tag(tokenized_words)
237 |     
238 | tagged_words
239 | 
240 | #Extracting Noun Phrase from text :
241 | 
242 | # ? - optional character
243 | # * - 0 or more repetations
244 | grammar = "NP : {<DT>?<JJ>*<NN>} "
245 | import matplotlib.pyplot as plt
246 | #Creating a parser :
247 | parser = nltk.RegexpParser(grammar)
248 | 
249 | #Parsing text :
250 | output = parser.parse(tagged_words)
251 | print (output)
252 | 
253 | #To visualize :
254 | #output.draw()
255 | 
256 | 
257 | #Chinking example :
258 | # * - 0 or more repetations
259 | # + - 1 or more repetations
260 | 
261 | #Here we are taking the whole string and then
262 | #excluding adjectives from that chunk.
263 | 
264 | grammar = r""" NP: {<.*>+} 
265 |                }<JJ>+{"""
266 | 
267 | #Creating parser :
268 | parser = nltk.RegexpParser(grammar)
269 | 
270 | #parsing string :
271 | output = parser.parse(tagged_words)
272 | print(output)
273 | 
274 | #To visualize :
275 | #output.draw()
276 | 
277 | 
278 | #Sentence for NER :
279 | sentence = "Mr. Smith made a deal on a beach of Switzerland near WHO."
280 | 
281 | #Tokenizing words :
282 | tokenized_words = word_tokenize(sentence)
283 | 
284 | #PoS tagging :
285 | for w in tokenized_words:
286 |     tagged_words = nltk.pos_tag(tokenized_words)
287 | 
288 | #print (tagged_words)
289 | 
290 | #Named Entity Recognition :
291 | N_E_R = nltk.ne_chunk(tagged_words,binary=False)
292 | print(N_E_R)
293 | 
294 | #To visualize :
295 | #N_E_R.draw()
296 | 
297 | 
298 | #Sentence for NER :
299 | sentence = "Mr. Smith made a deal on a beach of Switzerland near WHO."
300 | 
301 | #Tokenizing words :
302 | tokenized_words = word_tokenize(sentence)
303 | 
304 | #PoS tagging :
305 | for w in tokenized_words:
306 |     tagged_words = nltk.pos_tag(tokenized_words)
307 | 
308 | #print (tagged_words)
309 | 
310 | #Named Entity Recognition :
311 | N_E_R = nltk.ne_chunk(tagged_words,binary=True)
312 | 
313 | print(N_E_R)
314 | 
315 | #To visualize :
316 | #N_E_R.draw()
317 | 
318 | #Import wordnet :
319 | from nltk.corpus import wordnet
320 | 
321 | for words in wordnet.synsets("Fun"): 
322 |     print(words)      
323 |     
324 | #Word meaning with definitions :
325 | for words in wordnet.synsets("Fun"): 
326 |     print(words.name())
327 |     print(words.definition())
328 |     print(words.examples())
329 |     
330 |     for lemma in words.lemmas(): 
331 |         print(lemma)
332 |     print("\n")
333 |     
334 |     
335 | #How many differnt meanings :
336 | for words in wordnet.synsets("Fun"): 
337 |     for lemma in words.lemmas(): 
338 |         print(lemma)
339 |     print("\n")
340 |     
341 |     
342 | word = wordnet.synsets("Play")[0]
343 | 
344 | #Checking name :
345 | print(word.name())
346 | 
347 | #Checking definition :
348 | print(word.definition())
349 | 
350 | #Checking examples:
351 | print(word.examples())
352 | 
353 | word = wordnet.synsets("Play")[0]
354 | 
355 | #Find more abstract term :
356 | print(word.hypernyms())
357 | 
358 | word = wordnet.synsets("Play")[0]
359 | 
360 | #Find more specific term :
361 | word.hyponyms()
362 | 
363 | word = wordnet.synsets("Play")[0]
364 | 
365 | #Get only name :
366 | print(word.lemmas()[0].name())
367 | 
368 | #Finding synonyms :
369 | 
370 | #Empty list to store synonyms :
371 | synonyms = []
372 | 
373 | for words in wordnet.synsets('Fun'):
374 |     for lemma in words.lemmas():
375 |         synonyms.append(lemma.name())
376 |         
377 | print(synonyms)
378 | 
379 | #Finding antonyms :
380 | 
381 | #Empty list to store antonyms :
382 | antonyms = []
383 | 
384 | for words in wordnet.synsets('Natural'):
385 |     for lemma in words.lemmas():
386 |         if lemma.antonyms():
387 |             antonyms.append(lemma.antonyms()[0].name())
388 |             
389 | #Print antonyms :            
390 | print(antonyms)
391 | 
392 | 
393 | #Finding synonyms and antonyms :
394 | 
395 | #Empty lists to store synonyms/antonynms : 
396 | synonyms = []
397 | antonyms = []
398 | 
399 | for words in wordnet.synsets('New'):
400 |     for lemma in words.lemmas():
401 |         synonyms.append(lemma.name())
402 |         if lemma.antonyms():
403 |             antonyms.append(lemma.antonyms()[0].name())
404 |             
405 | #Print lists :
406 | print(synonyms)
407 | print("\n")
408 | print(antonyms)
409 | 
410 | 
411 | #Similarity in words :
412 | word1 = wordnet.synsets("ship","n")[0]
413 | 
414 | word2 = wordnet.synsets("boat","n")[0] 
415 | 
416 | #Check similarity :
417 | print(word1.wup_similarity(word2)) 
418 | 
419 | #Similarity in words :
420 | word1 = wordnet.synsets("ship","n")[0]
421 | 
422 | word2 = wordnet.synsets("bike","n")[0] 
423 | 
424 | #Check similarity :
425 | print(word1.wup_similarity(word2)) 
426 | 
427 | 
428 | #Import required libraries :
429 | from sklearn.feature_extraction.text import CountVectorizer
430 | 
431 | #Text for analysis :
432 | sentences = ["Jim and Pam travelled by the bus:",
433 |              "The train was late",
434 |              "The flight was full.Travelling by flight is expensive"]
435 | 
436 | #Create an object :
437 | cv = CountVectorizer()
438 | 
439 | #Generating output for Bag of Words :
440 | B_O_W = cv.fit_transform(sentences).toarray()
441 | 
442 | #Total words with their index in model :
443 | print(cv.vocabulary_)
444 | print("\n")
445 | 
446 | #Features :
447 | print(cv.get_feature_names())
448 | print("\n")
449 | 
450 | #Show the output :
451 | print(B_O_W)
452 | 
453 | 
454 | #Import required libraries :
455 | from sklearn.feature_extraction.text import TfidfVectorizer
456 | 
457 | #Sentences for analysis :
458 | sentences = ['This is the first document','This document is the second document']
459 | 
460 | #Create an object :
461 | vectorizer = TfidfVectorizer(norm = None)
462 | 
463 | #Generating output for TF_IDF :
464 | X = vectorizer.fit_transform(sentences).toarray()
465 | 
466 | #Total words with their index in model :
467 | print(vectorizer.vocabulary_)
468 | print("\n")
469 | 
470 | #Features :
471 | print(vectorizer.get_feature_names())
472 | print("\n")
473 | 
474 | #Show the output :
475 | print(X)
476 | 


--------------------------------------------------------------------------------
/natural_language_processing/semantic-analysis.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 
 4 | #Semantic Analysis Using Python - NLP
 5 | 
 6 | * Tutorial: http://news.towardsai.net/nls
 7 | * Github: https://github.com/towardsai/tutorials/tree/master/natural_language_processing/semantic-analysis.py
 8 | """
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | import matplotlib.pyplot as plt
13 | import seaborn as sns
14 | 
15 | from sklearn.datasets import fetch_20newsgroups
16 | 
17 | dataset = fetch_20newsgroups(shuffle=True, random_state=5, remove=('headers', 'footers', 'quotes'))
18 | df = dataset.data
19 | df
20 | 
21 | new_df = pd.DataFrame({'document':df})
22 | 
23 | # removing everything except alphabets
24 | new_df['clean_doc'] = new_df['document'].str.replace("[^a-zA-Z#]", " ")
25 | 
26 | # removing short words
27 | new_df['clean_doc'] = new_df['clean_doc'].apply(lambda x: ' '.join([w for w in x.split() if len(w)>4]))
28 | 
29 | # make all text lowercase
30 | new_df['clean_doc'] = new_df['clean_doc'].apply(lambda x: x.lower())
31 | 
32 | from nltk.corpus import stopwords
33 | swords = stopwords.words('english')
34 | 
35 | # tokenization
36 | tokenized_doc = new_df['clean_doc'].apply(lambda x: x.split())
37 | 
38 | # remove stop-words
39 | tokenized_doc = tokenized_doc.apply(lambda x: [item for item in x if item not in swords])
40 | 
41 | # de-tokenization
42 | detokenized_doc = []
43 | for i in range(len(news_df)):
44 |     t = ' '.join(tokenized_doc[i])
45 |     detokenized_doc.append(t)
46 | 
47 | new_df['clean_doc'] = detokenized_doc
48 | 
49 | from sklearn.feature_extraction.text import TfidfVectorizer
50 | 
51 | vectorizer = TfidfVectorizer(stop_words='english', max_features= 300, max_df = 0.5, smooth_idf=True)
52 | 
53 | X = vectorizer.fit_transform(news_df['clean_doc'])
54 | 
55 | X.shape
56 | 
57 | from sklearn.decomposition import TruncatedSVD
58 | 
59 | svd_model = TruncatedSVD(n_components=20, algorithm='randomized', n_iter=120, random_state=100)
60 | 
61 | svd_model.fit(X)
62 | len(svd_model.components_)
63 | 
64 | terms = vectorizer.get_feature_names()
65 | 
66 | for i, comp in enumerate(svd_model.components_):
67 |     terms_comp = zip(terms, comp)
68 |     sorted_terms = sorted(terms_comp, key= lambda x:x[1], reverse=True)[:7]
69 |     print("Topic "+str(i)+": ")
70 |     for t in sorted_terms:
71 |         print(t[0])
72 |         print(" ")
73 | 


--------------------------------------------------------------------------------
/neural_networks_tutorial_part_1/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/neural_networks_tutorial_part_1/neural_network_part1_1.py:
--------------------------------------------------------------------------------
  1 | #Import required libraries:
  2 | import numpy as np
  3 | 
  4 | #Define input features:
  5 | input_features = np.array([[0,0],[0,1],[1,0],[1,1]])
  6 | print (input_features.shape)
  7 | print (input_features)
  8 | 
  9 | #Define target output:
 10 | target_output = np.array([[0,1,1,1]])
 11 | 
 12 | #Reshaping our target output into vector:
 13 | target_output = target_output.reshape(4,1)
 14 | print(target_output.shape)
 15 | print (target_output)
 16 | 
 17 | #Define weights:
 18 | weights = np.array([[0.1],[0.2]])
 19 | print(weights.shape)
 20 | print (weights)
 21 | 
 22 | #Bias weight:
 23 | bias = 0.3
 24 | 
 25 | #Learning Rate:
 26 | lr = 0.05
 27 | 
 28 | #Sigmoid function:
 29 | def sigmoid(x):
 30 |   return 1/(1+np.exp(-x))
 31 |   
 32 | #Derivative of sigmoid function:
 33 | def sigmoid_der(x):
 34 |   return sigmoid(x)*(1-sigmoid(x))
 35 |   
 36 | #Main logic for neural network:
 37 |  
 38 | # Running our code 10000 times:
 39 | for epoch in range(10000):
 40 |  inputs = input_features
 41 |  
 42 |  #Feedforward input:
 43 |  in_o = np.dot(inputs, weights) + bias 
 44 |  
 45 |  #Feedforward output:
 46 |  out_o = sigmoid(in_o) 
 47 |  
 48 |  #Backpropogation 
 49 |  
 50 |  #Calculating error
 51 |  error = out_o - target_output
 52 |  
 53 |  #Going with the formula:
 54 |  x = error.sum()
 55 |  print(x)
 56 |  
 57 |  #Calculating derivative:
 58 |  derror_douto = error
 59 |  douto_dino = sigmoid_der(out_o)
 60 |  
 61 |  #Multiplying individual derivatives:
 62 |  
 63 |  deriv = derror_douto * douto_dino 
 64 |  
 65 |  #Multiplying with the 3rd individual derivative:
 66 |  #Finding the transpose of input_features:
 67 |  inputs = input_features.T
 68 |  deriv_final = np.dot(inputs,deriv)
 69 |  
 70 |  #Updating the weights values:
 71 |  weights -= lr * deriv_final
 72 |  
 73 |  #Updating the bias weight value:
 74 |  for i in deriv:
 75 |   bias -= lr * i #
 76 |   
 77 | #Check the final values for weight and biasprint (weights)
 78 | print (bias) 
 79 | 
 80 | #Taking inputs:
 81 | single_point = np.array([1,0]) 
 82 | 
 83 | #1st step:
 84 | result1 = np.dot(single_point, weights) + bias 
 85 | 
 86 | #2nd step:
 87 | result2 = sigmoid(result1) 
 88 | 
 89 | #Print final result
 90 | print(result2) 
 91 | 
 92 | #Taking inputs:
 93 | single_point = np.array([1,1])
 94 | 
 95 | #1st step:
 96 | result1 = np.dot(single_point, weights) + bias
 97 | 
 98 | #2nd step:
 99 | result2 = sigmoid(result1) #Print final result
100 | print(result2) 
101 | 
102 | #Taking inputs:
103 | single_point = np.array([0,0])
104 | 
105 | #1st step:
106 | result1 = np.dot(single_point, weights) + bias 
107 | 
108 | #2nd step:
109 | result2 = sigmoid(result1)
110 | 
111 | #Print final result
112 | print(result2)
113 | 


--------------------------------------------------------------------------------
/neural_networks_tutorial_part_1/neural_network_part1_2.py:
--------------------------------------------------------------------------------
 1 | # Import required libraries:
 2 | import numpy as np
 3 | 
 4 | # Define input features:
 5 | input_features = np.array([[0,0],[0,1],[1,0],[1,1]])
 6 | print (input_features.shape)
 7 | print (input_features)
 8 | 
 9 | # Define target output:
10 | target_output = np.array([[0,1,1,1]])
11 | 
12 | # Reshaping our target output into vector:
13 | target_output = target_output.reshape(4,1)
14 | print(target_output.shape)
15 | print (target_output)
16 | 
17 | # Define weights:
18 | weights = np.array([[0.1],[0.2]])
19 | print(weights.shape)
20 | print (weights)
21 | 
22 | # Define learning rate:
23 | lr = 0.05
24 | 
25 | # Sigmoid function:
26 | def sigmoid(x):
27 |   return 1/(1+np.exp(-x))
28 |  
29 |  # Derivative of sigmoid function:
30 | def sigmoid_der(x):
31 |   return sigmoid(x)*(1-sigmoid(x))
32 |  
33 | # Main logic for neural network:
34 | # Running our code 10000 times:
35 | 
36 | for epoch in range(10000):
37 |   inputs = input_features
38 |   
39 |   #Feedforward input:
40 |   pred_in = np.dot(inputs, weights)
41 |   
42 |   #Feedforward output:
43 |   pred_out = sigmoid(pred_in)
44 |   
45 |   #Backpropogation 
46 |   #Calculating error
47 |   error = pred_out - target_output
48 |   x = error.sum()
49 |  
50 |   #Going with the formula:
51 |   print(x)
52 |  
53 |   #Calculating derivative:
54 |   dcost_dpred = error
55 |   dpred_dz = sigmoid_der(pred_out)
56 |  
57 |   #Multiplying individual derivatives:
58 |   z_delta = dcost_dpred * dpred_dz#Multiplying with the 3rd individual derivative:
59 |   inputs = input_features.T
60 |   weights -= lr * np.dot(inputs, z_delta)
61 |  
62 |  
63 | #Taking inputs:
64 | single_point = np.array([1,0])
65 | 
66 | #1st step:
67 | result1 = np.dot(single_point, weights)
68 | 
69 | #2nd step:
70 | result2 = sigmoid(result1)
71 | 
72 | #Print final result
73 | print(result2)
74 | 
75 | #Taking inputs:
76 | single_point = np.array([0,0])
77 | 
78 | #1st step:
79 | result1 = np.dot(single_point, weights)
80 | 
81 | #2nd step:
82 | result2 = sigmoid(result1)
83 | 
84 | #Print final result
85 | print(result2)
86 | 
87 | #Taking inputs:
88 | single_point = np.array([1,1])
89 | 
90 | #1st step:
91 | result1 = np.dot(single_point, weights)
92 | 
93 | #2nd step:
94 | result2 = sigmoid(result1)
95 | 
96 | #Print final result
97 | print(result2)
98 | 


--------------------------------------------------------------------------------
/neural_networks_tutorial_part_1/neural_network_part1_3.py:
--------------------------------------------------------------------------------
  1 | # Import required libraries:
  2 | import numpy as np
  3 | 
  4 | # Define input features:
  5 | input_features = np.array([[1,0,0,1],[1,0,0,0],[0,0,1,1],
  6 |                            [0,1,0,0],[1,1,0,0],[0,0,1,1],
  7 |                            [0,0,0,1],[0,0,1,0]])
  8 | print (input_features.shape)
  9 | print (input_features)
 10 | 
 11 | # Define target output:
 12 | target_output = np.array([[1,1,0,0,1,1,0,0]])
 13 | 
 14 | # Reshaping our target output into vector:
 15 | target_output = target_output.reshape(8,1)
 16 | print(target_output.shape)
 17 | print (target_output)
 18 | 
 19 | # Define weights:
 20 | weights = np.array([[0.1],[0.2],[0.3],[0.4]])
 21 | print(weights.shape)
 22 | print (weights)
 23 | 
 24 | # Bias weight:
 25 | bias = 0.3
 26 | 
 27 | # Learning Rate:
 28 | lr = 0.05
 29 | 
 30 | # Sigmoid function:
 31 | def sigmoid(x):
 32 |  return 1/(1+np.exp(-x))
 33 |  
 34 | # Derivative of sigmoid function:
 35 | def sigmoid_der(x):
 36 |  return sigmoid(x)*(1-sigmoid(x))
 37 |  
 38 | # Main logic for neural network:
 39 | # Running our code 10000 times:
 40 | for epoch in range(10000):
 41 |  inputs = input_features
 42 |  
 43 |  #Feedforward input:
 44 |  pred_in = np.dot(inputs, weights) + bias
 45 |  
 46 |  #Feedforward output:
 47 |  pred_out = sigmoid(pred_in)
 48 |  
 49 |  #Backpropogation 
 50 |  #Calculating error
 51 |  error = pred_out - target_output
 52 |  
 53 |  #Going with the formula:
 54 |  x = error.sum()
 55 |  print(x)
 56 |  
 57 |  #Calculating derivative:
 58 |  dcost_dpred = error
 59 |  dpred_dz = sigmoid_der(pred_out)
 60 |  
 61 |  #Multiplying individual derivatives:
 62 |  z_delta = dcost_dpred * dpred_dz
 63 |  
 64 |  #Multiplying with the 3rd individual derivative:
 65 |  inputs = input_features.T
 66 |  weights -= lr * np.dot(inputs, z_delta)#Updating the bias weight value:
 67 |  for i in z_delta:
 68 |   bias -= lr * i
 69 |   
 70 | #Printing final weights: 
 71 | 
 72 | print (weights)
 73 | print ("\n\n")
 74 | print (bias)
 75 | 
 76 | #Taking inputs:
 77 | single_point = np.array([1,0,0,1])
 78 | 
 79 | #1st step:
 80 | result1 = np.dot(single_point, weights) + bias
 81 | 
 82 | #2nd step:
 83 | result2 = sigmoid(result1)
 84 | 
 85 | #Print final result
 86 | print(result2)
 87 | 
 88 | #Taking inputs:
 89 | single_point = np.array([0,0,1,0])
 90 | 
 91 | #1st step:
 92 | result1 = np.dot(single_point, weights) + bias
 93 | 
 94 | #2nd step:
 95 | result2 = sigmoid(result1)
 96 | 
 97 | #Print final result
 98 | print(result2)
 99 | 
100 | #Taking inputs:
101 | single_point = np.array([1,0,1,0])
102 | 
103 | #1st step:
104 | result1 = np.dot(single_point, weights) + bias
105 | 
106 | #2nd step:
107 | result2 = sigmoid(result1)
108 | 
109 | #Print final result
110 | print(result2)
111 | 


--------------------------------------------------------------------------------
/neural_networks_tutorial_part_1/neural_networks_tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "neural-networks-tutorial.ipynb",
  7 |       "provenance": [],
  8 |       "authorship_tag": "ABX9TyMigAWqeY7WvQBiWBJFFzXD",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "markdown",
 19 |       "metadata": {
 20 |         "id": "view-in-github",
 21 |         "colab_type": "text"
 22 |       },
 23 |       "source": [
 24 |         "<a href=\"https://colab.research.google.com/github/towardsai/tutorials/blob/master/neural_networks_tutorial_part_1/neural_networks_tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 25 |       ]
 26 |     },
 27 |     {
 28 |       "cell_type": "markdown",
 29 |       "metadata": {
 30 |         "id": "ScvnRKjsV0zS"
 31 |       },
 32 |       "source": [
 33 |         "# Neural Networks from Scratch with Python Code and Math in Detail\n",
 34 |         "\n",
 35 |         "* Tutorial: https://towardsai.net/p/machine-learning/building-neural-networks-from-scratch-with-python-code-and-math-in-detail-i-536fae5d7bbf \n",
 36 |         "\n",
 37 |         "* Github: https://github.com/towardsai/tutorials/tree/master/neural_networks_tutorial_part_1 \n",
 38 |         "\n"
 39 |       ]
 40 |     },
 41 |     {
 42 |       "cell_type": "code",
 43 |       "metadata": {
 44 |         "id": "TfbSw80eVNCS",
 45 |         "colab": {
 46 |           "base_uri": "https://localhost:8080/",
 47 |           "height": 312
 48 |         },
 49 |         "outputId": "ebfaf520-2318-4a80-ddec-8240ca14bafc"
 50 |       },
 51 |       "source": [
 52 |         "# Import required libraries:\n",
 53 |         "import numpy as np# Define input features:\n",
 54 |         "input_features = np.array([[0,0],[0,1],[1,0],[1,1]])\n",
 55 |         "print (input_features.shape)\n",
 56 |         "print (input_features)# Define target output:\n",
 57 |         "target_output = np.array([[0,1,1,1]])# Reshaping our target output into vector:\n",
 58 |         "target_output = target_output.reshape(4,1)\n",
 59 |         "print(target_output.shape)\n",
 60 |         "print (target_output)# Define weights:\n",
 61 |         "weights = np.array([[0.1],[0.2]])\n",
 62 |         "print(weights.shape)\n",
 63 |         "print (weights)# Bias weight:\n",
 64 |         "bias = 0.3# Learning Rate:\n",
 65 |         "lr = 0.05# Sigmoid function:\n",
 66 |         "def sigmoid(x):\n",
 67 |         " return 1/(1+np.exp(-x))# Derivative of sigmoid function:\n",
 68 |         "def sigmoid_der(x):\n",
 69 |         " return sigmoid(x)*(1-sigmoid(x))# Main logic for neural network:\n",
 70 |         " # Running our code 10000 times:for epoch in range(10000):\n",
 71 |         " inputs = input_features#Feedforward input:\n",
 72 |         " in_o = np.dot(inputs, weights) + bias #Feedforward output:\n",
 73 |         " out_o = sigmoid(in_o) #Backpropogation \n",
 74 |         " #Calculating error\n",
 75 |         " error = out_o - target_output\n",
 76 |         " \n",
 77 |         " #Going with the formula:\n",
 78 |         " x = error.sum()\n",
 79 |         " print(x)\n",
 80 |         " \n",
 81 |         " #Calculating derivative:\n",
 82 |         " derror_douto = error\n",
 83 |         " douto_dino = sigmoid_der(out_o)\n",
 84 |         " \n",
 85 |         " #Multiplying individual derivatives:\n",
 86 |         " deriv = derror_douto * douto_dino #Multiplying with the 3rd individual derivative:\n",
 87 |         " #Finding the transpose of input_features:\n",
 88 |         " inputs = input_features.T\n",
 89 |         " deriv_final = np.dot(inputs,deriv)\n",
 90 |         " \n",
 91 |         " #Updating the weights values:\n",
 92 |         " weights -= lr * deriv_final #Updating the bias weight value:\n",
 93 |         " for i in deriv:\n",
 94 |         "  bias -= lr * i #Check the final values for weight and biasprint (weights)\n",
 95 |         "  \n",
 96 |         "print (bias) #Taking inputs:\n",
 97 |         "single_point = np.array([1,0]) #1st step:\n",
 98 |         "result1 = np.dot(single_point, weights) + bias #2nd step:\n",
 99 |         "result2 = sigmoid(result1) #Print final result\n",
100 |         "print(result2) #Taking inputs:\n",
101 |         "single_point = np.array([1,1]) #1st step:\n",
102 |         "result1 = np.dot(single_point, weights) + bias #2nd step:\n",
103 |         "result2 = sigmoid(result1) #Print final result\n",
104 |         "print(result2) #Taking inputs:\n",
105 |         "single_point = np.array([0,0]) #1st step:\n",
106 |         "result1 = np.dot(single_point, weights) + bias #2nd step:\n",
107 |         "result2 = sigmoid(result1) #Print final result\n",
108 |         "print(result2)"
109 |       ],
110 |       "execution_count": null,
111 |       "outputs": [
112 |         {
113 |           "output_type": "stream",
114 |           "text": [
115 |             "(4, 2)\n",
116 |             "[[0 0]\n",
117 |             " [0 1]\n",
118 |             " [1 0]\n",
119 |             " [1 1]]\n",
120 |             "(4, 1)\n",
121 |             "[[0]\n",
122 |             " [1]\n",
123 |             " [1]\n",
124 |             " [1]]\n",
125 |             "(2, 1)\n",
126 |             "[[0.1]\n",
127 |             " [0.2]]\n",
128 |             "0.3\n",
129 |             "[0.59868766]\n",
130 |             "[0.64565631]\n",
131 |             "[0.57444252]\n"
132 |           ],
133 |           "name": "stdout"
134 |         }
135 |       ]
136 |     },
137 |     {
138 |       "cell_type": "code",
139 |       "metadata": {
140 |         "id": "YrRCooGFcb0f",
141 |         "colab": {
142 |           "base_uri": "https://localhost:8080/",
143 |           "height": 295
144 |         },
145 |         "outputId": "275a1ab6-cd9b-475f-9775-f198e161a9e3"
146 |       },
147 |       "source": [
148 |         "# Import required libraries:\n",
149 |         "import numpy as np# Define input features:\n",
150 |         "input_features = np.array([[0,0],[0,1],[1,0],[1,1]])\n",
151 |         "print (input_features.shape)\n",
152 |         "print (input_features)# Define target output:\n",
153 |         "target_output = np.array([[0,1,1,1]])# Reshaping our target output into vector:\n",
154 |         "target_output = target_output.reshape(4,1)\n",
155 |         "print(target_output.shape)\n",
156 |         "print (target_output)# Define weights:\n",
157 |         "weights = np.array([[0.1],[0.2]])\n",
158 |         "print(weights.shape)\n",
159 |         "print (weights)# Define learning rate:\n",
160 |         "lr = 0.05# Sigmoid function:\n",
161 |         "def sigmoid(x):\n",
162 |         " return 1/(1+np.exp(-x))# Derivative of sigmoid function:\n",
163 |         "def sigmoid_der(x):\n",
164 |         " return sigmoid(x)*(1-sigmoid(x))# Main logic for neural network:\n",
165 |         "# Running our code 10000 times:for epoch in range(10000):\n",
166 |         " inputs = input_features#Feedforward input:\n",
167 |         " pred_in = np.dot(inputs, weights)#Feedforward output:\n",
168 |         " pred_out = sigmoid(pred_in)#Backpropogation \n",
169 |         " #Calculating error\n",
170 |         " error = pred_out - target_output\n",
171 |         " x = error.sum()\n",
172 |         " \n",
173 |         " #Going with the formula:\n",
174 |         " print(x)\n",
175 |         " \n",
176 |         " #Calculating derivative:\n",
177 |         " dcost_dpred = error\n",
178 |         " dpred_dz = sigmoid_der(pred_out)\n",
179 |         " \n",
180 |         " #Multiplying individual derivatives:\n",
181 |         " z_delta = dcost_dpred * dpred_dz#Multiplying with the 3rd individual derivative:\n",
182 |         " inputs = input_features.T\n",
183 |         " weights -= lr * np.dot(inputs, z_delta)\n",
184 |         " \n",
185 |         " \n",
186 |         "#Taking inputs:\n",
187 |         "single_point = np.array([1,0])#1st step:\n",
188 |         "result1 = np.dot(single_point, weights)#2nd step:\n",
189 |         "result2 = sigmoid(result1)#Print final result\n",
190 |         "print(result2)#Taking inputs:\n",
191 |         "single_point = np.array([0,0])#1st step:\n",
192 |         "result1 = np.dot(single_point, weights)#2nd step:\n",
193 |         "result2 = sigmoid(result1)#Print final result\n",
194 |         "print(result2)#Taking inputs:\n",
195 |         "single_point = np.array([1,1])#1st step:\n",
196 |         "result1 = np.dot(single_point, weights)#2nd step:\n",
197 |         "result2 = sigmoid(result1)#Print final result\n",
198 |         "print(result2)"
199 |       ],
200 |       "execution_count": null,
201 |       "outputs": [
202 |         {
203 |           "output_type": "stream",
204 |           "text": [
205 |             "(4, 2)\n",
206 |             "[[0 0]\n",
207 |             " [0 1]\n",
208 |             " [1 0]\n",
209 |             " [1 1]]\n",
210 |             "(4, 1)\n",
211 |             "[[0]\n",
212 |             " [1]\n",
213 |             " [1]\n",
214 |             " [1]]\n",
215 |             "(2, 1)\n",
216 |             "[[0.1]\n",
217 |             " [0.2]]\n",
218 |             "[0.52497919]\n",
219 |             "[0.5]\n",
220 |             "[0.57444252]\n"
221 |           ],
222 |           "name": "stdout"
223 |         }
224 |       ]
225 |     },
226 |     {
227 |       "cell_type": "code",
228 |       "metadata": {
229 |         "id": "ES5UHf2ufWXc",
230 |         "colab": {
231 |           "base_uri": "https://localhost:8080/",
232 |           "height": 607
233 |         },
234 |         "outputId": "508da63f-9aaa-4bb0-8c56-294ab7fc0ce6"
235 |       },
236 |       "source": [
237 |         "# Import required libraries:\n",
238 |         "import numpy as np# Define input features:\n",
239 |         "input_features = np.array([[1,0,0,1],[1,0,0,0],[0,0,1,1],\n",
240 |         " [0,1,0,0],[1,1,0,0],[0,0,1,1],\n",
241 |         " [0,0,0,1],[0,0,1,0]])\n",
242 |         "print (input_features.shape)\n",
243 |         "print (input_features)# Define target output:\n",
244 |         "target_output = np.array([[1,1,0,0,1,1,0,0]])# Reshaping our target output into vector:\n",
245 |         "target_output = target_output.reshape(8,1)\n",
246 |         "print(target_output.shape)\n",
247 |         "print (target_output)# Define weights:\n",
248 |         "weights = np.array([[0.1],[0.2],[0.3],[0.4]])\n",
249 |         "print(weights.shape)\n",
250 |         "print (weights)# Bias weight:\n",
251 |         "bias = 0.3# Learning Rate:\n",
252 |         "lr = 0.05# Sigmoid function:\n",
253 |         "def sigmoid(x):\n",
254 |         " return 1/(1+np.exp(-x))# Derivative of sigmoid function:\n",
255 |         "def sigmoid_der(x):\n",
256 |         " return sigmoid(x)*(1-sigmoid(x))# Main logic for neural network:\n",
257 |         "# Running our code 10000 times:for epoch in range(10000):\n",
258 |         " inputs = input_features#Feedforward input:\n",
259 |         " pred_in = np.dot(inputs, weights) + bias#Feedforward output:\n",
260 |         " pred_out = sigmoid(pred_in)#Backpropogation \n",
261 |         " #Calculating error\n",
262 |         " error = pred_out - target_output\n",
263 |         " \n",
264 |         " #Going with the formula:\n",
265 |         " x = error.sum()\n",
266 |         " print(x)\n",
267 |         " \n",
268 |         " #Calculating derivative:\n",
269 |         " dcost_dpred = error\n",
270 |         " dpred_dz = sigmoid_der(pred_out)\n",
271 |         " \n",
272 |         " #Multiplying individual derivatives:\n",
273 |         " z_delta = dcost_dpred * dpred_dz#Multiplying with the 3rd individual derivative:\n",
274 |         " inputs = input_features.T\n",
275 |         " weights -= lr * np.dot(inputs, z_delta)#Updating the bias weight value:\n",
276 |         " for i in z_delta:\n",
277 |         "  bias -= lr * i#Printing final weights: \n",
278 |         "\n",
279 |         "print (weights)\n",
280 |         "print (\"\\n\\n\")\n",
281 |         "print (bias)#Taking inputs:\n",
282 |         "single_point = np.array([1,0,0,1])#1st step:\n",
283 |         "result1 = np.dot(single_point, weights) + bias#2nd step:\n",
284 |         "result2 = sigmoid(result1)#Print final result\n",
285 |         "print(result2)#Taking inputs:\n",
286 |         "single_point = np.array([0,0,1,0])#1st step:\n",
287 |         "result1 = np.dot(single_point, weights) + bias#2nd step:\n",
288 |         "result2 = sigmoid(result1)#Print final result\n",
289 |         "print(result2)#Taking inputs:\n",
290 |         "single_point = np.array([1,0,1,0])#1st step:\n",
291 |         "result1 = np.dot(single_point, weights) + bias#2nd step:\n",
292 |         "result2 = sigmoid(result1)#Print final result\n",
293 |         "print(result2)"
294 |       ],
295 |       "execution_count": null,
296 |       "outputs": [
297 |         {
298 |           "output_type": "stream",
299 |           "text": [
300 |             "(8, 4)\n",
301 |             "[[1 0 0 1]\n",
302 |             " [1 0 0 0]\n",
303 |             " [0 0 1 1]\n",
304 |             " [0 1 0 0]\n",
305 |             " [1 1 0 0]\n",
306 |             " [0 0 1 1]\n",
307 |             " [0 0 0 1]\n",
308 |             " [0 0 1 0]]\n",
309 |             "(8, 1)\n",
310 |             "[[1]\n",
311 |             " [1]\n",
312 |             " [0]\n",
313 |             " [0]\n",
314 |             " [1]\n",
315 |             " [1]\n",
316 |             " [0]\n",
317 |             " [0]]\n",
318 |             "(4, 1)\n",
319 |             "[[0.1]\n",
320 |             " [0.2]\n",
321 |             " [0.3]\n",
322 |             " [0.4]]\n",
323 |             "[[0.1]\n",
324 |             " [0.2]\n",
325 |             " [0.3]\n",
326 |             " [0.4]]\n",
327 |             "\n",
328 |             "\n",
329 |             "\n",
330 |             "0.3\n",
331 |             "[0.68997448]\n",
332 |             "[0.64565631]\n",
333 |             "[0.66818777]\n"
334 |           ],
335 |           "name": "stdout"
336 |         }
337 |       ]
338 |     }
339 |   ]
340 | }


--------------------------------------------------------------------------------
/neural_networks_tutorial_part_2/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/neural_networks_tutorial_part_2/neural_networks_part2_1.py:
--------------------------------------------------------------------------------
  1 | # Import required libraries :
  2 | import numpy as np
  3 | 
  4 | # Define input features :
  5 | input_features = np.array([[0,0],[0,1],[1,0],[1,1]])
  6 | print (input_features.shape)
  7 | print (input_features)
  8 | 
  9 | # Define target output :
 10 | target_output = np.array([[0,1,1,1]])
 11 | 
 12 | # Reshaping our target output into vector :
 13 | target_output = target_output.reshape(4,1)
 14 | print(target_output.shape)
 15 | print (target_output)
 16 | 
 17 | # Define weights :
 18 | # 6 for hidden layer
 19 | # 3 for output layer
 20 | # 9 total
 21 | weight_hidden = np.array([[0.1,0.2,0.3],
 22 |                          [0.4,0.5,0.6]])
 23 | weight_output = np.array([[0.7],[0.8],[0.9]])
 24 | 
 25 | # Learning Rate :
 26 | lr = 0.05
 27 | 
 28 | # Sigmoid function :
 29 | def sigmoid(x):
 30 |  return 1/(1+np.exp(-x))
 31 |  
 32 | # Derivative of sigmoid function :
 33 | def sigmoid_der(x):
 34 |  return sigmoid(x)*(1-sigmoid(x))
 35 |  
 36 | for epoch in range(200000):
 37 |  # Input for hidden layer :
 38 |  input_hidden = np.dot(input_features, weight_hidden)
 39 |  
 40 |  # Output from hidden layer :
 41 |  output_hidden = sigmoid(input_hidden)
 42 |  
 43 |  # Input for output layer :
 44 |  input_op = np.dot(output_hidden, weight_output)
 45 |  
 46 |  # Output from output layer :
 47 |  output_op = sigmoid(input_op)
 48 |  
 49 | #========================================================
 50 | 
 51 |  # Phase1
 52 |  
 53 |  # Calculating Mean Squared Error :
 54 |  error_out = ((1 / 2) * (np.power((output_op - target_output), 2)))
 55 |  print(error_out.sum())
 56 |  
 57 |  # Derivatives for phase 1 :
 58 |  derror_douto = output_op - target_output
 59 |  douto_dino = sigmoid_der(input_op) 
 60 |  dino_dwo = output_hidden
 61 |  derror_dwo = np.dot(dino_dwo.T, derror_douto * douto_dino)
 62 |  
 63 | #===========================================================
 64 |  
 65 |  # Phase 2 
 66 |  # derror_w1 = derror_douth * douth_dinh * dinh_dw1
 67 |  # derror_douth = derror_dino * dino_outh
 68 |  
 69 |  # Derivatives for phase 2 :
 70 |  derror_dino = derror_douto * douto_dino
 71 |  dino_douth = weight_output
 72 |  derror_douth = np.dot(derror_dino , dino_douth.T)
 73 |  douth_dinh = sigmoid_der(input_hidden) 
 74 |  dinh_dwh = input_features
 75 |  derror_wh = np.dot(dinh_dwh.T, douth_dinh * derror_douth)
 76 |   
 77 |  # Update Weights
 78 |  weight_hidden -= lr * derror_wh
 79 |  weight_output -= lr * derror_dwo 
 80 |  
 81 | # Final hidden layer weight values :
 82 | print (weight_hidden)
 83 | 
 84 | # Final output layer weight values :
 85 | print (weight_output)
 86 | 
 87 | # Predictions :
 88 | #Taking inputs :
 89 | single_point = np.array([1,1])
 90 | 
 91 | #1st step :
 92 | result1 = np.dot(single_point, weight_hidden) 
 93 | 
 94 | #2nd step :
 95 | result2 = sigmoid(result1)
 96 | 
 97 | #3rd step :
 98 | result3 = np.dot(result2,weight_output)
 99 | 
100 | #4th step :
101 | result4 = sigmoid(result3)
102 | print(result4)
103 | 
104 | #=================================================
105 | 
106 | #Taking inputs :
107 | single_point = np.array([0,0])
108 | 
109 | #1st step :
110 | result1 = np.dot(single_point, weight_hidden) 
111 | 
112 | #2nd step :
113 | result2 = sigmoid(result1)
114 | 
115 | #3rd step :
116 | result3 = np.dot(result2,weight_output)
117 | 
118 | #4th step :
119 | result4 = sigmoid(result3)
120 | print(result4)
121 | 
122 | #=====================================================
123 | 
124 | #Taking inputs :
125 | single_point = np.array([1,0])
126 | 
127 | #1st step :
128 | result1 = np.dot(single_point, weight_hidden) 
129 | 
130 | #2nd step :
131 | result2 = sigmoid(result1)
132 | 
133 | #3rd step :
134 | result3 = np.dot(result2,weight_output)
135 | 
136 | #4th step :
137 | result4 = sigmoid(result3)
138 | print(result4)
139 | 


--------------------------------------------------------------------------------
/neural_networks_tutorial_part_2/neural_networks_part2_2.py:
--------------------------------------------------------------------------------
  1 | # Import required libraries :
  2 | import numpy as np
  3 | 
  4 | # Define input features :
  5 | input_features = np.array([[0,0],[0,1],[1,0],[1,1]])
  6 | print (input_features.shape)
  7 | print (input_features)# Define target output :
  8 | target_output = np.array([[0,1,1,1]])
  9 | 
 10 | # Reshaping our target output into vector :
 11 | target_output = target_output.reshape(4,1)
 12 | print(target_output.shape)
 13 | print (target_output)
 14 | 
 15 | # Define weights :
 16 | weights = np.array([[0.1],[0.2]])
 17 | print(weights.shape)
 18 | print (weights)
 19 | 
 20 | # Define learning rate :
 21 | lr = 0.05
 22 | 
 23 | # Sigmoid function :
 24 | def sigmoid(x):
 25 |     return 1/(1+np.exp(-x))
 26 |     
 27 | # Derivative of sigmoid function :
 28 | def sigmoid_der(x):
 29 |     return sigmoid(x)*(1-sigmoid(x))
 30 |     
 31 | # Main logic for neural network :
 32 | # Running our code 10000 times :
 33 | for epoch in range(10000):
 34 |     inputs = input_features
 35 |     
 36 |     #Feedforward input :
 37 |     pred_in = np.dot(inputs, weights)
 38 |     
 39 |     #Feedforward output :
 40 |     pred_out = sigmoid(pred_in)
 41 |     
 42 |     #Backpropogation 
 43 |     #Calculating error
 44 |     error = pred_out - target_output
 45 |     x = error.sum()
 46 |     
 47 |     #Going with the formula :
 48 |     print(x)
 49 |     
 50 |     #Calculating derivative :
 51 |     dcost_dpred = error
 52 |     dpred_dz = sigmoid_der(pred_out)
 53 |     
 54 |     #Multiplying individual derivatives :
 55 |     z_delta = dcost_dpred * dpred_dz
 56 |     
 57 |     #Multiplying with the 3rd individual derivative :
 58 |     inputs = input_features.T
 59 |     weights -= lr * np.dot(inputs, z_delta)
 60 |     
 61 | #Predictions :
 62 | 
 63 | #Taking inputs :
 64 | single_point = np.array([1,0])
 65 | 
 66 | #1st step :
 67 | result1 = np.dot(single_point, weights) 
 68 | 
 69 | #2nd step :
 70 | result2 = sigmoid(result1)
 71 | 
 72 | #Print final result
 73 | print(result2)
 74 | 
 75 | #====================================
 76 | 
 77 | #Taking inputs :
 78 | single_point = np.array([0,0])
 79 | 
 80 | #1st step :
 81 | result1 = np.dot(single_point, weights) 
 82 | 
 83 | #2nd step :
 84 | result2 = sigmoid(result1)
 85 | 
 86 | #Print final result
 87 | print(result2)
 88 | 
 89 | #===================================
 90 | #Taking inputs :
 91 | single_point = np.array([1,1])
 92 | 
 93 | #1st step :
 94 | result1 = np.dot(single_point, weights) 
 95 | 
 96 | #2nd step :
 97 | result2 = sigmoid(result1)
 98 | 
 99 | #Print final result
100 | print(result2)
101 | 


--------------------------------------------------------------------------------
/neural_networks_tutorial_part_2/neural_networks_part2_3.py:
--------------------------------------------------------------------------------
  1 | # Import required libraries :
  2 | import numpy as np
  3 | 
  4 | # Define input features :
  5 | input_features = np.array([[0,0],[0,1],[1,0],[1,1]])
  6 | print (input_features.shape)
  7 | print (input_features)
  8 | 
  9 | # Define target output :
 10 | target_output = np.array([[0,1,1,0]])
 11 | 
 12 | # Reshaping our target output into vector :
 13 | target_output = target_output.reshape(4,1)
 14 | print(target_output.shape)
 15 | print (target_output)
 16 | 
 17 | # Define weights :
 18 | # 8 for hidden layer
 19 | # 4 for output layer
 20 | # 12 total 
 21 | weight_hidden = np.random.rand(2,4)
 22 | weight_output = np.random.rand(4,1)
 23 | 
 24 | # Learning Rate :
 25 | lr = 0.05
 26 | 
 27 | # Sigmoid function :
 28 | def sigmoid(x):
 29 |   return 1/(1+np.exp(-x))
 30 | 
 31 | # Derivative of sigmoid function :
 32 | def sigmoid_der(x):
 33 |   return sigmoid(x)*(1-sigmoid(x))
 34 | 
 35 | # Main logic :
 36 | for epoch in range(200000):
 37 |  
 38 |  # Input for hidden layer :
 39 |  input_hidden = np.dot(input_features, weight_hidden)
 40 |  
 41 |  # Output from hidden layer :
 42 |  output_hidden = sigmoid(input_hidden)
 43 |  
 44 |  # Input for output layer :
 45 |  input_op = np.dot(output_hidden, weight_output)
 46 |  
 47 |  # Output from output layer :
 48 |  output_op = sigmoid(input_op)
 49 | 
 50 | #========================================================================
 51 |  # Phase1
 52 |  
 53 |  # Calculating Mean Squared Error :
 54 |  error_out = ((1 / 2) * (np.power((output_op - target_output), 2)))
 55 |  print(error_out.sum())
 56 |  
 57 |  # Derivatives for phase 1 :
 58 |  derror_douto = output_op - target_output
 59 |  douto_dino = sigmoid_der(input_op) 
 60 |  dino_dwo = output_hidden
 61 |  derror_dwo = np.dot(dino_dwo.T, derror_douto * douto_dino)
 62 | 
 63 | # ========================================================================
 64 |  # Phase 2
 65 |  # derror_w1 = derror_douth * douth_dinh * dinh_dw1
 66 |  # derror_douth = derror_dino * dino_outh
 67 |  
 68 |  # Derivatives for phase 2 :
 69 |  derror_dino = derror_douto * douto_dino
 70 |  dino_douth = weight_output
 71 |  derror_douth = np.dot(derror_dino , dino_douth.T)
 72 |  douth_dinh = sigmoid_der(input_hidden) 
 73 |  dinh_dwh = input_features
 74 |  derror_dwh = np.dot(dinh_dwh.T, douth_dinh * derror_douth)
 75 | 
 76 |  # Update Weights
 77 |  weight_hidden -= lr * derror_dwh
 78 |  weight_output -= lr * derror_dwo
 79 |  
 80 | # Final values of weight in hidden layer :
 81 | print (weight_hidden)
 82 | 
 83 | # Final values of weight in output layer :
 84 | print (weight_output)
 85 | 
 86 | #Taking inputs :
 87 | single_point = np.array([0,-1])
 88 | 
 89 | #1st step :
 90 | result1 = np.dot(single_point, weight_hidden) 
 91 | 
 92 | #2nd step :
 93 | result2 = sigmoid(result1)
 94 | 
 95 | #3rd step :
 96 | result3 = np.dot(result2,weight_output)
 97 | 
 98 | #4th step :
 99 | result4 = sigmoid(result3)
100 | print(result4)
101 | 
102 | #Taking inputs :
103 | single_point = np.array([0,5])
104 | 
105 | #1st step :
106 | result1 = np.dot(single_point, weight_hidden) 
107 | 
108 | #2nd step :
109 | result2 = sigmoid(result1)
110 | 
111 | #3rd step :
112 | result3 = np.dot(result2,weight_output)
113 | 
114 | #4th step :
115 | result4 = sigmoid(result3)
116 | print(result4)
117 | 
118 | #Taking inputs :
119 | single_point = np.array([1,1.2])
120 | 
121 | #1st step :
122 | result1 = np.dot(single_point, weight_hidden) 
123 | 
124 | #2nd step :
125 | result2 = sigmoid(result1)
126 | 
127 | #3rd step :
128 | result3 = np.dot(result2,weight_output)
129 | 
130 | #4th step :
131 | result4 = sigmoid(result3)
132 | print(result4)
133 | 


--------------------------------------------------------------------------------
/pandas/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/pandas/pd-melt.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | #Understanding Pandas Melt - pd.melt()
  4 | 
  5 | * Tutorial: https://news.towardsai.net/pdm
  6 | * Github: https://github.com/towardsai/tutorials/tree/master/pandas/pd-melt.py
  7 | """
  8 | 
  9 | #Import Required Libraries:
 10 | import pandas as pd
 11 | 
 12 | #Raw data in form of dictionary:
 13 | data = {"Person":["Alan","Berta","Charlie","Danielle"], #Name of Person
 14 |         "House":["A","B","A","C"],                      #Name of houses they live in
 15 |         "Age":[32,46,35,28],                            #Age of Person
 16 |         "Books":[100,30,20,40],                         #Number of books owned
 17 |         "Movies":[10,20,80,60]                          #Number of movie watched
 18 |         }
 19 | 
 20 | #Converting the raw data into pandas DataFrame:
 21 | data_wide = pd.DataFrame(data)
 22 | 
 23 | #Printing the pandas DataFrame:
 24 | data_wide
 25 | 
 26 | #Melting the DataFrame from wide to long format:
 27 | #Without specifying any parameters:
 28 | 
 29 | data_wide.melt()
 30 | 
 31 | #Melting the DataFrame from wide to long format:
 32 | #id_vars
 33 | 
 34 | data_wide.melt(id_vars=["Person","House"]) #Identifier columns
 35 | 
 36 | #Melting the DataFrame from wide to long format:
 37 | #id_vars
 38 | #value_vars
 39 | 
 40 | data_wide.melt(id_vars=["Person","House"],           #Identifier columns
 41 |                value_vars=["Age","Books","Movies"])  #Columns to be melted
 42 | 
 43 | #Melting the DataFrame from wide to long format:
 44 | #id_vars
 45 | #value_vars
 46 | 
 47 | data_wide.melt(id_vars=["Person"],             #Identifier columns
 48 |                value_vars=["Books","Movies"])  #Columns to be melted
 49 | 
 50 | #Melting the DataFrame from wide to long format:
 51 | #id_vars
 52 | #value_vars
 53 | #var_name
 54 | #value_name
 55 | 
 56 | data_wide.melt(id_vars=["Person","House"],          #Identifier columns
 57 |                value_vars=["Age","Books","Movies"], #Columns to be melted
 58 |                var_name="Info",                     #Renaming the variable column name
 59 |                value_name="Numerical")              #Renaming the value column name
 60 | 
 61 | #Melting the DataFrame from wide to long format:
 62 | #id_vars
 63 | #value_vars
 64 | #var_name
 65 | #value_name
 66 | 
 67 | data_wide.melt(id_vars=["Person"],            #Identifier columns
 68 |                value_vars=["Books","Movies"], #Columns to be melted
 69 |                var_name="Info",               #Renaming the variable column name
 70 |                value_name="Numerical")        #Renaming the value column name
 71 | 
 72 | #Melting the DataFrame from wide to long format:
 73 | #id_vars
 74 | #value_vars
 75 | #var_name
 76 | #value_name
 77 | #ignore_index
 78 | 
 79 | data_wide.melt(id_vars=["Person","House"],          #Identifier columns
 80 |                value_vars=["Age","Books","Movies"], #Columns to be melted
 81 |                var_name="Info",                     #Renaming the variable column name
 82 |                value_name="Numerical",              #Renaming the value column name
 83 |                ignore_index=False)                  #Using the original index
 84 | 
 85 | #Creating multiple indexes for columns:
 86 | data_wide.columns = [["Person","House","Age","Books","Movies"],
 87 |                      ["Name","Flat","Old","Text","Video"]]
 88 | 
 89 | #Printing the DataFrame:
 90 | data_wide
 91 | 
 92 | #Melting the DataFrame from wide to long format:
 93 | #id_vars
 94 | #value_vars
 95 | #var_name
 96 | #value_name
 97 | #col_level
 98 | 
 99 | data_wide.melt(id_vars=["Person","House"],          #Identifier columns
100 |                value_vars=["Age","Books","Movies"], #Columns to be melted
101 |                var_name="Info",                     #Renaming the variable column name
102 |                value_name="Numerical",              #Renaming the value column name
103 |                col_level=0)                         #Using the 0th column level index
104 | 
105 | #Melting the DataFrame from wide to long format:
106 | #id_vars
107 | #value_vars
108 | #var_name
109 | #value_name
110 | #col_level
111 | 
112 | data_wide.melt(id_vars=["Name","Flat"],             #Identifier columns
113 |                value_vars=["Old","Text","Video"],   #Columns to be melted
114 |                var_name="Info",                     #Renaming the variable column name
115 |                value_name="Numerical",              #Renaming the value column name
116 |                col_level=1)                         #Using the 1st column level index
117 | 
118 | #Melting the DataFrame from wide to long format:
119 | #id_vars
120 | #value_vars
121 | #var_name
122 | #value_name
123 | #col_level
124 | #ignore_index
125 | 
126 | data_wide.melt(id_vars=["Name","Flat"],             #Identifier columns
127 |                value_vars=["Old","Text","Video"],   #Columns to be melted
128 |                var_name="Info",                     #Renaming the variable column name
129 |                value_name="Numerical",              #Renaming the value column name
130 |                ignore_index=False,                  #Using the original index
131 |                col_level=1)                         #Using the 1st column level index
132 | 


--------------------------------------------------------------------------------
/pandas/pd_dropna().py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | #Handling Missing Values in Pandas
  4 | 
  5 | * Tutorial: https://news.towardsai.net/hmv
  6 | * Github: https://github.com/towardsai/tutorials/tree/master/pandas
  7 | """
  8 | 
  9 | #Import Required Libraries:
 10 | import pandas as pd
 11 | 
 12 | #Raw data in form of dictionary:
 13 | info = {"Person":["Alan","Berta","Charlie","Danielle","Euler",pd.NA], #Name of Person.
 14 |         "Age":[32,45,35,28,30,pd.NA],                                 #Age of Person.
 15 |         "Degree":["CS","Biology","Physics",pd.NA,"Physics","CS"],     #Major.
 16 |         "Country":["USA","Mexico","USA","Canada","USA","Canada"],     #Country of study.
 17 |         "Books":[10,pd.NA,30,40,50,60],                               #Books owned.
 18 |         "Batch Size":[200,100,50,200,50,pd.NA]                        #Batch Size.
 19 |         }
 20 | 
 21 | #Converting the raw data into DataFrame:
 22 | data = pd.DataFrame(info)
 23 | 
 24 | #Printing the DataFrame:
 25 | data
 26 | 
 27 | #Dropping the rows where at least one element is missing.
 28 | 
 29 | data.dropna()
 30 | 
 31 | #Drop the rows where at least one element is missing.
 32 | 
 33 | data.dropna(axis=0)
 34 | 
 35 | #Drop the rows where at least one element is missing.
 36 | 
 37 | data.dropna(axis="rows")
 38 | 
 39 | #Drop the columns where at least one element is missing.
 40 | 
 41 | data.dropna(axis=1)
 42 | 
 43 | #Drop the columns where at least one element is missing.
 44 | 
 45 | data.dropna(axis="columns")
 46 | 
 47 | #Drop the rows where at least one element is missing.
 48 | 
 49 | data.dropna(how="any")
 50 | 
 51 | #Import Required Libraries:
 52 | import pandas as pd
 53 | 
 54 | #Raw data in form of dictionary:
 55 | info = {"Person":["Alan","Berta",pd.NA,"Charlie","Danielle","Euler"], #Name of Person.
 56 |         "Age":[32,45,pd.NA,35,28,30],                                 #Age of Person.
 57 |         "Degree":["CS","Biology",pd.NA,"Physics",pd.NA,"Physics"],    #Major.
 58 |         "Country":["USA","Mexico",pd.NA,"USA","Canada","USA"],        #Country of study.
 59 |         "Books":[10,pd.NA,pd.NA,30,40,50],                            #Books owned.
 60 |         "Batch Size":[200,100,pd.NA,50,200,50]                        #Batch Size.
 61 |         }
 62 | 
 63 | #Converting the raw data into DataFrame:
 64 | data = pd.DataFrame(info)
 65 | 
 66 | #Printing the DataFrame:
 67 | data
 68 | 
 69 | #Drop the rows if all elements are missing.
 70 | 
 71 | data.dropna(how="all")
 72 | 
 73 | #Keep the rows with at least 5 non missing elements.
 74 | 
 75 | data.dropna(thresh=5)
 76 | 
 77 | #Import Required Libraries:
 78 | import pandas as pd
 79 | 
 80 | #Raw data in form of dictionary:
 81 | info = {"Person":["Alan","Berta",pd.NA,"Charlie","Danielle","Euler"], #Name of Person.
 82 |         "Age":[32,pd.NA,pd.NA,35,pd.NA,30],                           #Age of Person.
 83 |         "Degree":["CS","Biology",pd.NA,"Physics",pd.NA,"Physics"],    #Major.
 84 |         "Country":["USA",pd.NA,pd.NA,"USA","Canada","USA"],           #Country of study.
 85 |         "Books":[10,pd.NA,pd.NA,30,40,50],                            #Books owned.
 86 |         "Batch Size":[200,100,pd.NA,50,200,50]                        #Batch Size.
 87 |         }
 88 | 
 89 | #Converting the raw data into DataFrame:
 90 | data = pd.DataFrame(info)
 91 | 
 92 | #Printing the DataFrame:
 93 | data
 94 | 
 95 | #Define in which columns to look for missing elements.
 96 | 
 97 | data.dropna(subset=["Person","Degree","Country"])
 98 | 
 99 | #Import Required Libraries:
100 | import pandas as pd
101 | 
102 | #Raw data in form of dictionary:
103 | info = {"Person":["Alan","Berta","Charlie","Danielle","Euler",pd.NA], #Name of Person.
104 |         "Age":[32,45,35,28,30,pd.NA],                                 #Age of Person.
105 |         "Degree":["CS","Biology","Physics",pd.NA,"Physics","CS"],     #Major.
106 |         "Country":["USA","Mexico","USA","Canada","USA","Canada"],     #Country of study.
107 |         "Books":[10,pd.NA,30,40,50,60],                               #Books owned.
108 |         "Batch Size":[200,100,50,200,50,pd.NA]                        #Batch Size.
109 |         }
110 | 
111 | #Converting the raw data into DataFrame:
112 | data = pd.DataFrame(info)
113 | 
114 | #Printing the DataFrame:
115 | data
116 | 
117 | #Inplace=True will make changes in the original DataFrame.
118 | #It will return nothing.
119 | 
120 | data.dropna(inplace=True)
121 | data
122 | 


--------------------------------------------------------------------------------
/pandas/pd_fillna().py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | #Handling Missing Values in Pandas
  4 | 
  5 | * Tutorial: https://news.towardsai.net/hmv
  6 | * Github https://github.com/towardsai/tutorials/tree/master/pandas
  7 | """
  8 | 
  9 | #Import Required Libraries:
 10 | import pandas as pd
 11 | 
 12 | #Raw data in form of dictionary:
 13 | info = {"Person":["Alan","Berta","Charlie","Danielle","Euler",pd.NA], #Name of Person.
 14 |         "Age":[32,45,35,28,30,pd.NA],                                 #Age of Person.
 15 |         "Degree":["CS",pd.NA,pd.NA,pd.NA,"Physics",pd.NA],            #Major.
 16 |         "Country":["USA","Mexico","USA",pd.NA,"USA",pd.NA],           #Country of study.
 17 |         "Books":[10,pd.NA,30,pd.NA,50,60],                            #Books owned.
 18 |         "Batch Size":[200,100,50,200,50,pd.NA]                        #Batch Size.
 19 |         }
 20 | 
 21 | #Converting the raw data into DataFrame:
 22 | data = pd.DataFrame(info)
 23 | 
 24 | #Printing the DataFrame:
 25 | data
 26 | 
 27 | #Replacing all missing values with 0:
 28 | 
 29 | data.fillna(value=0)
 30 | 
 31 | #Replacing values:
 32 | 
 33 | #Values to be used for specific column:
 34 | values = {"Person":"---", "Age":0, "Degree":"---","Books":0,"Batch Size":0}
 35 | 
 36 | #Replacing the values:
 37 | data.fillna(value=values)
 38 | 
 39 | #Using method="ffill":
 40 | 
 41 | data.fillna(method="ffill")
 42 | 
 43 | #Using method="pad":
 44 | #Same as method="ffill"
 45 | 
 46 | data.fillna(method="pad")
 47 | 
 48 | #Using method="ffill":
 49 | #Using axis=0 (Default)
 50 | 
 51 | data.fillna(method="ffill", axis=0)
 52 | 
 53 | #Using method="ffill":
 54 | #Using axis=1
 55 | 
 56 | data.fillna(method="ffill", axis=1)
 57 | 
 58 | #Using method="bfill":
 59 | 
 60 | data.fillna(method="bfill")
 61 | 
 62 | #Using method="backfill":
 63 | #Same as method="bfill"
 64 | 
 65 | data.fillna(method="backfill")
 66 | 
 67 | #Using method="bfill":
 68 | #Using axis=0 (Default)
 69 | 
 70 | data.fillna(method="bfill",axis=0)
 71 | 
 72 | #Using method="bfill":
 73 | #Using axis=1:
 74 | 
 75 | data.fillna(method="bfill",axis=1)
 76 | 
 77 | #Using method="ffill":
 78 | #Using axis=0:
 79 | #Using limit=1:
 80 | 
 81 | data.fillna(method="ffill",axis=0, limit=1)
 82 | 
 83 | #Using method="ffill":
 84 | #Using axis=1:
 85 | #Using limit=1:
 86 | 
 87 | data.fillna(method="ffill",axis=1, limit=1)
 88 | 
 89 | #Using method="bfill":
 90 | #Using axis=0:
 91 | #Using limit=1:
 92 | 
 93 | data.fillna(method="bfill",axis=0, limit=1)
 94 | 
 95 | #Using method="bfill":
 96 | #Using axis=1:
 97 | #Using limit=1:
 98 | 
 99 | data.fillna(method="bfill",axis=1, limit=1)
100 | 
101 | #Import Required Libraries:
102 | import pandas as pd
103 | 
104 | #Raw data in form of dictionary:
105 | info = {"Age":[32.0,45.0,35.0,28.0,30.0,40.0],
106 |         "Books":[10.0,pd.NA,30.0,40.0,50.0,60.0],
107 |         "Batch Size":[200,100,50,200,50,300]
108 |       }
109 | 
110 | #Converting the raw data into DataFrame:
111 | data = pd.DataFrame(info)
112 | 
113 | #Printing the DataFrame:
114 | data
115 | 
116 | data.dtypes
117 | 
118 | a = data.fillna(0,downcast="infer")
119 | a
120 | 
121 | a.dtypes
122 | 
123 | #Inplace=True will make changes in the original DataFrame.
124 | #It will return nothing.
125 | 
126 | data.fillna(value=0,inplace=True)
127 | data
128 | 
129 | #Import Required Libraries:
130 | import pandas as pd
131 | 
132 | #Raw data in form of dictionary:
133 | info = {"Person":["Alan","Berta","Charlie","Danielle","Euler",pd.NA], #Name of Person.
134 |         "Age":[32,45,35,28,30,pd.NA],                                 #Age of Person.
135 |         "Degree":["CS",pd.NA,pd.NA,pd.NA,"Physics",pd.NA],            #Major.
136 |         "Country":["USA","Mexico","USA",pd.NA,"USA",pd.NA],           #Country of study.
137 |         "Books":[10,pd.NA,30,pd.NA,50,60],                            #Books owned.
138 |         "Batch Size":[200,100,50,200,50,pd.NA]                        #Batch Size.
139 |         }
140 | 
141 | #Converting the raw data into DataFrame:
142 | data = pd.DataFrame(info)
143 | 
144 | #Printing the DataFrame:
145 | data
146 | 
147 | #Using pd.DataFrame.bfill():
148 | 
149 | data.bfill()
150 | 
151 | #Using pd.DataFrame.backfill():
152 | 
153 | data.backfill()
154 | 
155 | #Using pd.DataFrame.ffill():
156 | 
157 | data.ffill()
158 | 
159 | #Using pd.DataFrame.pad():
160 | 
161 | data.pad()
162 | 


--------------------------------------------------------------------------------
/pandas/pd_isna().py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | #Handling Missing Values in Pandas
  4 | 
  5 | * Tutorial: https://news.towardsai.net/hmv
  6 | * Github
  7 | """
  8 | 
  9 | #Import Required Libraries:
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | 
 14 | #Scalar arguments:
 15 | #Numerical value
 16 | 
 17 | pd.isna(28)
 18 | 
 19 | #Scalar arguments:
 20 | #String value
 21 | 
 22 | pd.isna("Pratik")
 23 | 
 24 | #Scalar arguments:
 25 | #Empty strings are not considered as NA values
 26 | 
 27 | pd.isna("")
 28 | 
 29 | #Scalar arguments:
 30 | #Infinite values are not considered as NA values
 31 | 
 32 | pd.isna(np.inf)
 33 | 
 34 | #Scalar arguments:
 35 | #NaN: Not a Number
 36 | 
 37 | pd.isna(np.NaN)
 38 | 
 39 | #Scalar arguments:
 40 | #None
 41 | 
 42 | pd.isna(None)
 43 | 
 44 | #Scalar arguments:
 45 | #NA: Not Available
 46 | 
 47 | pd.isna(pd.NA)
 48 | 
 49 | #Scalar arguments:
 50 | #NaT: Not a Timestamp
 51 | 
 52 | pd.isna(pd.NaT)
 53 | 
 54 | #nd-arrays:
 55 | 
 56 | arr = np.array([1,2,"Blue"])
 57 | print(arr)
 58 | print("\n")
 59 | pd.isna(arr)
 60 | 
 61 | #nd-arrays:
 62 | #Empty strings are not considered as NA values
 63 | 
 64 | arr = np.array([[1,2,None],
 65 |                 [3,4,pd.NA],
 66 |                 [5,np.NaN,6],
 67 |                 ["",7,8],
 68 |                 ["Blue",pd.NaT,"Red"]])
 69 | 
 70 | print(arr)
 71 | print("\n")
 72 | pd.isna(arr)
 73 | 
 74 | #For index values:
 75 | 
 76 | id = pd.Index([1,2,np.NaN,"Blue"])
 77 | print(id)
 78 | print("\n")
 79 | pd.isna(id)
 80 | 
 81 | #For index values:
 82 | 
 83 | id = pd.DatetimeIndex([pd.Timestamp("2020-10-28"),
 84 |                       pd.Timestamp(""),
 85 |                       None,
 86 |                       np.NaN,
 87 |                       pd.NA,
 88 |                       pd.NaT])
 89 | 
 90 | print(id)
 91 | print("\n")
 92 | pd.isna(id)
 93 | 
 94 | #Series:
 95 | 
 96 | s = pd.Series([1,2,3,None,4,np.NaN,pd.NA,pd.NaT,"Blue"])
 97 | print(s)
 98 | print("\n")
 99 | pd.isna(s)
100 | 
101 | #DataFrame:
102 | 
103 | df = pd.DataFrame({"Name":["Alan","Berta","Charlie",None],
104 |                    "Age":[32,45,np.NaN,28],
105 |                    "Birthday":[pd.NaT,None,pd.Timestamp("1975-10-28"),np.NaN],
106 |                    "Country":["USA","","USA","Canada"]
107 |                    })
108 | 
109 | print(df)
110 | print("\n")
111 | pd.isna(df)
112 | 


--------------------------------------------------------------------------------
/pandas/pd_isnull().py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | #Handling Missing Values in Pandas
  4 | 
  5 | * Tutorial: https://news.towardsai.net/hmv
  6 | * Github
  7 | """
  8 | 
  9 | #Import Required Libraries:
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | 
 14 | #Scalar arguments:
 15 | #Numerical value
 16 | 
 17 | pd.isnull(28)
 18 | 
 19 | #Scalar arguments:
 20 | #String value
 21 | 
 22 | pd.isnull("Pratik")
 23 | 
 24 | #Scalar arguments:
 25 | #Empty strings are not considered as null values
 26 | 
 27 | pd.isnull("")
 28 | 
 29 | #Scalar arguments:
 30 | #Infinite values are not considered as null values
 31 | 
 32 | pd.isnull(np.inf)
 33 | 
 34 | #Scalar arguments:
 35 | #NaN: Not a Number
 36 | 
 37 | pd.isnull(np.NaN)
 38 | 
 39 | #Scalar arguments:
 40 | #None
 41 | 
 42 | pd.isnull(None)
 43 | 
 44 | #Scalar arguments:
 45 | #NA: Not Available
 46 | 
 47 | pd.isnull(pd.NA)
 48 | 
 49 | #Scalar arguments:
 50 | #NaT: Not a Timestamp
 51 | 
 52 | pd.isnull(pd.NaT)
 53 | 
 54 | #nd-arrays:
 55 | 
 56 | arr = np.array([1,2,"Blue"])
 57 | print(arr)
 58 | print("\n")
 59 | pd.isnull(arr)
 60 | 
 61 | #nd-arrays:
 62 | #Empty strings are not considered as NA values
 63 | 
 64 | arr = np.array([[1,2,None],
 65 |                 [3,4,pd.NA],
 66 |                 [5,np.NaN,6],
 67 |                 ["",7,8],
 68 |                 ["Blue",pd.NaT,"Red"]])
 69 | 
 70 | print(arr)
 71 | print("\n")
 72 | pd.isnull(arr)
 73 | 
 74 | #For index values:
 75 | 
 76 | id = pd.Index([1,2,np.NaN,"Blue"])
 77 | print(id)
 78 | print("\n")
 79 | pd.isnull(id)
 80 | 
 81 | #For index values:
 82 | 
 83 | id = pd.DatetimeIndex([pd.Timestamp("2020-10-28"),
 84 |                       pd.Timestamp(""),
 85 |                       None,
 86 |                       np.NaN,
 87 |                       pd.NA,
 88 |                       pd.NaT])
 89 | 
 90 | print(id)
 91 | print("\n")
 92 | pd.isnull(id)
 93 | 
 94 | #Series:
 95 | 
 96 | s = pd.Series([1,2,3,None,4,np.NaN,pd.NA,pd.NaT,"Blue"])
 97 | print(s)
 98 | print("\n")
 99 | pd.isnull(s)
100 | 
101 | #DataFrame:
102 | 
103 | df = pd.DataFrame({"Name":["Alan","Berta","Charlie",None],
104 |                    "Age":[32,45,np.NaN,28],
105 |                    "Birthday":[pd.NaT,None,pd.Timestamp("1975-10-28"),np.NaN],
106 |                    "Country":["USA","","USA","Canada"]
107 |                    })
108 | 
109 | print(df)
110 | print("\n")
111 | pd.isnull(df)
112 | 


--------------------------------------------------------------------------------
/pandas/pd_join().py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | #Diving Into Pandas Join - pd.join()
  4 | 
  5 | * Tutorial: https://news.towardsai.net/xfr
  6 | * Github: https://github.com/towardsai/tutorials/tree/master/pandas
  7 | """
  8 | 
  9 | #Import Required Libraries:
 10 | 
 11 | import pandas as pd
 12 | import numpy as np
 13 | 
 14 | #Creating first DataFrame:
 15 | 
 16 | data1 = {"Name":["Alan","Berta","Charlie","Danielle","Euler"],
 17 |         "Age":[32,45,35,28,30]}
 18 | 
 19 | df1 = pd.DataFrame(data1)
 20 | df1
 21 | 
 22 | #Creating second DataFrame:
 23 | 
 24 | data2 = {"Name":["Berta","Charlie","Danielle","Euler","Frank"],
 25 |          "Money":[5000,20000,15000,10000,5000]}
 26 | 
 27 | df2 = pd.DataFrame(data2)
 28 | df2
 29 | 
 30 | #Creating Third DataFrame:
 31 | 
 32 | data3 = {"Name":["Berta","Charlie","Danielle"],
 33 |          "Books":[10,20,30]}
 34 | 
 35 | df3 = pd.DataFrame(data3)
 36 | df3
 37 | 
 38 | #Creating fourth DataFrame:
 39 | 
 40 | data4 = {"Name":["Alan","Berta","Charlie","Danielle","Euler"],
 41 |         "Age":[32,45,35,28,30]}
 42 | 
 43 | df4 = pd.DataFrame(data4)
 44 | df4
 45 | 
 46 | #Creating fifth DataFrame:
 47 | 
 48 | data5 = {"Money":[5000,20000,15000,10000,5000],
 49 |          "Books":[10,20,30,40,50]}
 50 | 
 51 | df5 = pd.DataFrame(data5)
 52 | df5
 53 | 
 54 | #Creating sixth DataFrame:
 55 | 
 56 | data6 = {"Name":["Euler","Danielle","Charlie","Berta","Alan"],
 57 |         "Age":[30,28,35,45,32]}
 58 | 
 59 | df6 = pd.DataFrame(data6)
 60 | df6
 61 | 
 62 | #Creating seventh DataFrame:
 63 | 
 64 | data7 = {"Name":["Rose","Patrick","Euler","Danielle"],
 65 |         "Money":[10,20,30,40]}
 66 | 
 67 | df7 = pd.DataFrame(data7)
 68 | df7
 69 | 
 70 | #Using the join() function:
 71 | 
 72 | df1.join(df2)
 73 | 
 74 | #Using the join() function:
 75 | 
 76 | df1.join(df2,lsuffix="_Left",rsuffix="_Right")
 77 | 
 78 | #Using the join() function:
 79 | 
 80 | df1.join(df2,lsuffix="_Left")
 81 | 
 82 | #Using the join() function:
 83 | 
 84 | df1.join(df2,rsuffix="_Right")
 85 | 
 86 | #Using join() function:
 87 | 
 88 | df4.join(df5)
 89 | 
 90 | #Using the join() Function:
 91 | 
 92 | df1.set_index("Name").join(df2.set_index("Name"))
 93 | 
 94 | #Using the join() function:
 95 | 
 96 | df1.join(df2.set_index("Name"),on="Name")
 97 | 
 98 | #Using the join() function:
 99 | 
100 | df1.set_index("Name").join([df2.set_index("Name"),df3.set_index("Name")])
101 | 
102 | #Creating a Series:
103 | 
104 | s = pd.Series(["A","B","C","D"],name="Initial")
105 | s
106 | 
107 | #Using the join() Function:
108 | 
109 | df1.join(s)
110 | 
111 | #Using the join() Function:
112 | 
113 | df1.set_index("Name").join(df2.set_index("Name"),how="left")
114 | 
115 | #Using the join() Function:
116 | 
117 | df1.set_index("Name").join(df2.set_index("Name"),how="right")
118 | 
119 | #Using join() function:
120 | 
121 | df1.set_index("Name").join(df2.set_index("Name"), how="outer")
122 | 
123 | #Using join() function:
124 | 
125 | df1.set_index("Name").join(df2.set_index("Name"), how="inner")
126 | 
127 | #Default how = left:
128 | #how=left
129 | #sort=False
130 | 
131 | df6.set_index("Name").join(other=df7.set_index("Name"),how="left",sort=False)
132 | 
133 | #Default how = left:
134 | #how=left
135 | #sort=True
136 | 
137 | df6.set_index("Name").join(other=df7.set_index("Name"),how="left",sort=True)
138 | 
139 | #how=right
140 | #sort=False
141 | 
142 | df6.set_index("Name").join(other=df7.set_index("Name"),how="right",sort=False)
143 | 
144 | #how=right
145 | #sort=True
146 | 
147 | df6.set_index("Name").join(other=df7.set_index("Name"),how="right",sort=True)
148 | 
149 | #how=outer
150 | #sort=False
151 | 
152 | df6.set_index("Name").join(other=df7.set_index("Name"),how="outer",sort=False)
153 | 
154 | #how=outer
155 | #sort=True
156 | 
157 | df6.set_index("Name").join(other=df7.set_index("Name"),how="outer",sort=True)
158 | 
159 | #how=inner
160 | #sort=False
161 | 
162 | df6.set_index("Name").join(other=df7.set_index("Name"),how="inner",sort=False)
163 | 
164 | #how=inner
165 | #sort=True
166 | 
167 | df6.set_index("Name").join(other=df7.set_index("Name"),how="inner",sort=True)
168 | 


--------------------------------------------------------------------------------
/pandas/pd_notna().py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | #Handling Missing Values in Pandas
  4 | 
  5 | * Tutorial: https://news.towardsai.net/hmv
  6 | * Github
  7 | """
  8 | 
  9 | #Import Required Libraries:
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | 
 14 | #Scalar arguments:
 15 | #Numerical value
 16 | 
 17 | pd.notna(28)
 18 | 
 19 | #Scalar arguments:
 20 | #String value
 21 | 
 22 | pd.notna("Pratik")
 23 | 
 24 | #Scalar arguments:
 25 | #Empty strings are not considered as NA values
 26 | 
 27 | pd.notna("")
 28 | 
 29 | #Scalar arguments:
 30 | #Infinite values are not considered as NA values
 31 | 
 32 | pd.notna(np.inf)
 33 | 
 34 | #Scalar arguments:
 35 | #NaN: Not a Number
 36 | 
 37 | pd.notna(np.NaN)
 38 | 
 39 | #Scalar arguments:
 40 | #None
 41 | 
 42 | pd.notna(None)
 43 | 
 44 | #Scalar arguments:
 45 | #NA: Not Available
 46 | 
 47 | pd.notna(pd.NA)
 48 | 
 49 | #Scalar arguments:
 50 | #NaT: Not a Timestamp
 51 | 
 52 | pd.notna(pd.NaT)
 53 | 
 54 | #nd-arrays:
 55 | 
 56 | arr = np.array([1,2,"Blue"])
 57 | print(arr)
 58 | print("\n")
 59 | pd.notna(arr)
 60 | 
 61 | #nd-arrays:
 62 | #Empty strings are not considered as NA values
 63 | 
 64 | arr = np.array([[1,2,None],
 65 |                 [3,4,pd.NA],
 66 |                 [5,np.NaN,6],
 67 |                 ["",7,8],
 68 |                 ["Blue",pd.NaT,"Red"]])
 69 | 
 70 | print(arr)
 71 | print("\n")
 72 | pd.notna(arr)
 73 | 
 74 | #For index values:
 75 | 
 76 | id = pd.Index([1,2,np.NaN,"Blue"])
 77 | print(id)
 78 | print("\n")
 79 | pd.notna(id)
 80 | 
 81 | #For index values:
 82 | 
 83 | id = pd.DatetimeIndex([pd.Timestamp("2020-10-28"),
 84 |                       pd.Timestamp(""),
 85 |                       None,
 86 |                       np.NaN,
 87 |                       pd.NA,
 88 |                       pd.NaT])
 89 | 
 90 | print(id)
 91 | print("\n")
 92 | pd.notna(id)
 93 | 
 94 | #Series:
 95 | 
 96 | s = pd.Series([1,2,3,None,4,np.NaN,pd.NA,pd.NaT,"Blue"])
 97 | print(s)
 98 | print("\n")
 99 | pd.notna(s)
100 | 
101 | #DataFrame:
102 | 
103 | df = pd.DataFrame({"Name":["Alan","Berta","Charlie",None],
104 |                    "Age":[32,45,np.NaN,28],
105 |                    "Birthday":[pd.NaT,None,pd.Timestamp("1975-10-28"),np.NaN],
106 |                    "Country":["USA","","USA","Canada"]
107 |                    })
108 | 
109 | print(df)
110 | print("\n")
111 | pd.notna(df)
112 | 
113 | pd.isnull
114 | 
115 | pd.notnull
116 | 


--------------------------------------------------------------------------------
/pandas/pd_notnull().py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | #Handling Missing Values in Pandas
  4 | 
  5 | * Tutorial: https://news.towardsai.net/hmv
  6 | * Github
  7 | """
  8 | 
  9 | #Import Required Libraries:
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | 
 14 | #Scalar arguments:
 15 | #Numerical value
 16 | 
 17 | pd.notnull(28)
 18 | 
 19 | #Scalar arguments:
 20 | #String value
 21 | 
 22 | pd.notnull("Pratik")
 23 | 
 24 | #Scalar arguments:
 25 | #Empty strings are not considered as null values
 26 | 
 27 | pd.notnull("")
 28 | 
 29 | #Scalar arguments:
 30 | #Infinite values are not considered as null values
 31 | 
 32 | pd.notnull(np.inf)
 33 | 
 34 | #Scalar arguments:
 35 | #NaN: Not a Number
 36 | 
 37 | pd.notnull(np.NaN)
 38 | 
 39 | #Scalar arguments:
 40 | #None
 41 | 
 42 | pd.notnull(None)
 43 | 
 44 | #Scalar arguments:
 45 | #NA: Not Available
 46 | 
 47 | pd.notnull(pd.NA)
 48 | 
 49 | #Scalar arguments:
 50 | #NaT: Not a Timestamp
 51 | 
 52 | pd.notnull(pd.NaT)
 53 | 
 54 | #nd-arrays:
 55 | 
 56 | arr = np.array([1,2,"Blue"])
 57 | print(arr)
 58 | print("\n")
 59 | pd.notnull(arr)
 60 | 
 61 | #nd-arrays:
 62 | #Empty strings are not considered as NA values
 63 | 
 64 | arr = np.array([[1,2,None],
 65 |                 [3,4,pd.NA],
 66 |                 [5,np.NaN,6],
 67 |                 ["",7,8],
 68 |                 ["Blue",pd.NaT,"Red"]])
 69 | 
 70 | print(arr)
 71 | print("\n")
 72 | pd.notnull(arr)
 73 | 
 74 | #For index values:
 75 | 
 76 | id = pd.Index([1,2,np.NaN,"Blue"])
 77 | print(id)
 78 | print("\n")
 79 | pd.notnull(id)
 80 | 
 81 | #For index values:
 82 | 
 83 | id = pd.DatetimeIndex([pd.Timestamp("2020-10-28"),
 84 |                       pd.Timestamp(""),
 85 |                       None,
 86 |                       np.NaN,
 87 |                       pd.NA,
 88 |                       pd.NaT])
 89 | 
 90 | print(id)
 91 | print("\n")
 92 | pd.notnull(id)
 93 | 
 94 | #Series:
 95 | 
 96 | s = pd.Series([1,2,3,None,4,np.NaN,pd.NA,pd.NaT,"Blue"])
 97 | print(s)
 98 | print("\n")
 99 | pd.notnull(s)
100 | 
101 | #DataFrame:
102 | 
103 | df = pd.DataFrame({"Name":["Alan","Berta","Charlie",None],
104 |                    "Age":[32,45,np.NaN,28],
105 |                    "Birthday":[pd.NaT,None,pd.Timestamp("1975-10-28"),np.NaN],
106 |                    "Country":["USA","","USA","Canada"]
107 |                    })
108 | 
109 | print(df)
110 | print("\n")
111 | pd.notnull(df)
112 | 


--------------------------------------------------------------------------------
/poisson-distribution-process/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/poisson-distribution-process/poisson.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """poisson-distribution-and-poisson-process-tutorial.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/drive/1u-t6oSxbMd2FrzaIMimXnPMR5dfV6Gel
 8 | 
 9 | # Diving Into the Poisson Distribution and Poisson Process
10 | 
11 | * Tutorial: https://news.towardsai.net/pd
12 | * Github: https://github.com/towardsai/tutorials/tree/master/poisson-distribution-process
13 | 
14 | A TV Assembly unit performs a defects analysis task to understand the number of defects that could happen for a given defective TV. Using past quality and audit data is noted that 12 defects are marked on an average for a defective TV, calculate below:
15 | 
16 | * The probability that a defective laptop has exactly 5 defects.
17 | * The probability that a defective laptop has less than 5 defects.
18 | """
19 | 
20 | import numpy as np
21 | import scipy.stats as stats
22 | import matplotlib.pyplot as plt
23 | 
24 | n = np.arange(0,30)
25 | n
26 | 
27 | rate = 12
28 | poisson = stats.poisson.pmf(n,rate)
29 | 
30 | poisson
31 | 
32 | poisson[5]
33 | 
34 | poisson[0] + poisson[1] + poisson[2] + poisson[3] + poisson[4]
35 | 
36 | plt.plot(n,poisson, 'o-')
37 | plt.show()


--------------------------------------------------------------------------------
/principal_component_analysis/correlation_matrix_covariance_matrix.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 
 4 | # Implementation of Correlation Matrix and covariance Matrix
 5 | 
 6 | *   This is used with Eigenvalues and Eigenvectors of PCA
 7 | 
 8 | **Import Packages**
 9 | """
10 | 
11 | import pandas as pd
12 | import numpy as np
13 | 
14 | matrix = np.array([[0, 3, 4], [1, 2, 4], [3, 4, 5]]) 
15 | print(matrix)
16 | 
17 | """**Convert to covariance**"""
18 | 
19 | np.cov(matrix)
20 | 
21 | """**Convert to Correlation Matrix**"""
22 | 
23 | matrix_a = np.array([[0.1, .32, .2,  0.4, 0.8], 
24 |              [.23, .18, .56, .61, .12], 
25 |              [.9,   .3,  .6,  .5,  .3],  
26 |              [.34, .75, .91, .19, .21]])
27 | 
28 | pd.DataFrame(matrix_a).corr()
29 | 
30 | np.corrcoef(matrix_a.T)


--------------------------------------------------------------------------------
/principal_component_analysis/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/principal_component_analysis/pca_with_python.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """pca_with_python.ipynb
 3 | 
 4 | Automatically generated by Colaboratory.
 5 | 
 6 | Original file is located at
 7 | **Import Libraries**
 8 | """
 9 | 
10 | import pandas as pd
11 | import numpy as np
12 | from sklearn.datasets import load_iris
13 | from sklearn.preprocessing import StandardScaler
14 | import matplotlib.pyplot as plt
15 | 
16 | """**Load Iris Data**"""
17 | 
18 | iris = load_iris()
19 | 
20 | df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
21 | 
22 | df['class'] = iris.target
23 | print(df)
24 | 
25 | """**Get the value of x and y**"""
26 | 
27 | x = df.drop(labels='class', axis=1).values
28 | y = df['class'].values
29 | 
30 | print(x.shape, y.shape)
31 | 
32 | """**Implementation of PCA**"""
33 | 
34 | class convers_pca():
35 |     def __init__(self, no_of_components):
36 |         self.no_of_components = no_of_components
37 |         self.eigen_values = None
38 |         self.eigen_vectors = None
39 |         
40 |     def transform(self, x):
41 |         return np.dot(x - self.mean, self.projection_matrix.T)
42 |     
43 |     def inverse_transform(self, x):
44 |         return np.dot(x, self.projection_matrix) + self.mean
45 |     
46 |     def fit(self, x):
47 |         self.no_of_components = x.shape[1] if self.no_of_components is None else self.no_of_components
48 |         self.mean = np.mean(x, axis=0)
49 |         
50 |         cov_matrix = np.cov(x - self.mean, rowvar=False)
51 |         
52 |         self.eigen_values, self.eigen_vectors = np.linalg.eig(cov_matrix)
53 |         self.eigen_vectors = self.eigen_vectors.T
54 |         
55 |         self.sorted_components = np.argsort(self.eigen_values)[::-1]
56 |         
57 |         self.projection_matrix = self.eigen_vectors[self.sorted_components[:self.no_of_components]]
58 | 
59 |         self.explained_variance = self.eigen_values[self.sorted_components]
60 |         self.explained_variance_ratio = self.explained_variance / self.eigen_values.sum()
61 | 
62 | """**Standardization**"""
63 | 
64 | std = StandardScaler()
65 | transformed = StandardScaler().fit_transform(x)
66 | 
67 | """**PCA with Component = 2**"""
68 | 
69 | pca = convers_pca(no_of_components=2)
70 | pca.fit(transformed)
71 | 
72 | """**Plotting**"""
73 | 
74 | x_std = pca.transform(transformed)
75 | 
76 | plt.figure()
77 | plt.scatter(x_std[:, 0], x_std[:, 1], c=y)


--------------------------------------------------------------------------------
/programming/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/programming/variable_swap_data_science.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | #5 Ways to Swap Two Variables in Python
  4 | 
  5 | * Tutorial: https://news.towardsai.net/hnk
  6 | * Github: https://github.com/towardsai/tutorials/tree/master/programming
  7 | """
  8 | 
  9 | #Using Naive Approach to Swap Values in Python:
 10 | 
 11 | def swap_values(x,y):
 12 |   #Printing Original Values:
 13 |   print("Original Values")
 14 |   print("X:",x)
 15 |   print("Y:",y)
 16 |   print("---------------")
 17 | 
 18 |   #Swapping Values:
 19 |   temp = x
 20 |   x = y
 21 |   y = temp
 22 | 
 23 |   #Printing Values after Swapping:
 24 |   print("Values after Swapping")
 25 |   print("X:",x)
 26 |   print("Y:",y)
 27 | 
 28 | #Function Call:
 29 | 
 30 | #Integer Values:
 31 | swap_values(10,20)
 32 | print("\n")
 33 | 
 34 | #Float Values:
 35 | swap_values(10.5,20.5)
 36 | print("\n")
 37 | 
 38 | #String Values:
 39 | swap_values("Pratik","Shukla")
 40 | 
 41 | #Using comma operator to Swap Values in Python:
 42 | 
 43 | def swap_values(x,y):
 44 |   #Printing Original Values:
 45 |   print("Original Values")
 46 |   print("X:",x)
 47 |   print("Y:",y)
 48 |   print("---------------")
 49 | 
 50 |   #Swapping Values:
 51 |   x,y = y,x
 52 | 
 53 |   #Printing Values after Swapping:
 54 |   print("Values after Swapping")
 55 |   print("X:",x)
 56 |   print("Y:",y)
 57 | 
 58 | #Function Call:
 59 | 
 60 | #Integer Values:
 61 | swap_values(10,20)
 62 | print("\n")
 63 | 
 64 | #Float Values:
 65 | swap_values(10.5,20.5)
 66 | print("\n")
 67 | 
 68 | #String Values:
 69 | swap_values("Pratik","Shukla")
 70 | 
 71 | #Using Arithmatic Operator to Swap Values in Python:
 72 | 
 73 | def swap_values(x,y):
 74 |   #Printing Original Values:
 75 |   print("Original Values")
 76 |   print("X:",x)
 77 |   print("Y:",y)
 78 |   print("---------------")
 79 | 
 80 |   #Swapping Values:
 81 |   x = x+y
 82 |   y = x-y
 83 |   x = x-y
 84 | 
 85 |   #Printing Values after Swapping:
 86 |   print("Values after Swapping")
 87 |   print("X:",x)
 88 |   print("Y:",y)
 89 | 
 90 | #Function Call:
 91 | 
 92 | #Integer Values:
 93 | swap_values(10,20)
 94 | print("\n")
 95 | 
 96 | #Float Values:
 97 | swap_values(10.5,20.5)
 98 | print("\n")
 99 | 
100 | #String Values:
101 | #It doesn't work with Strings as it uses numerical operators.
102 | #swap_values("Pratik","Shukla")
103 | 
104 | #Using Arithmatic Operator to Swap Values in Python:
105 | 
106 | def swap_values(x,y):
107 |   #Printing Original Values:
108 |   print("Original Values")
109 |   print("X:",x)
110 |   print("Y:",y)
111 |   print("---------------")
112 | 
113 |   #Swapping Values:
114 |   x = x*y
115 |   y = x/y
116 |   x = x/y
117 | 
118 |   #Printing Values after Swapping:
119 |   print("Values after Swapping")
120 |   print("X:",x)
121 |   print("Y:",y)
122 | 
123 | #Function Call:
124 | 
125 | #Integer Values:
126 | swap_values(10,20)
127 | print("\n")
128 | 
129 | #Float Values:
130 | swap_values(10.5,20.5)
131 | print("\n")
132 | 
133 | #String Values:
134 | #It doesn't work with Strings as it uses numerical operators.
135 | #swap_values("Pratik","Shukla")
136 | 
137 | #Using Arithmatic Operator to Swap Values in Python:
138 | 
139 | def swap_values(x,y):
140 |   #Printing Original Values:
141 |   print("Original Values")
142 |   print("X:",x)
143 |   print("Y:",y)
144 |   print("---------------")
145 | 
146 |   #Swapping Values:
147 |   x = x^y
148 |   y = x^y
149 |   x = x^y
150 | 
151 |   #Printing Values after Swapping:
152 |   print("Values after Swapping")
153 |   print("X:",x)
154 |   print("Y:",y)
155 | 
156 | #Function Call:
157 | 
158 | #Integer Values:
159 | swap_values(10,20)
160 | print("\n")
161 | 
162 | #Float Values:
163 | #It only works with integers.
164 | #swap_values(10.5,20.5)
165 | 
166 | #String Values:
167 | #It doesn't work with Strings as it uses numerical operators.
168 | #swap_values("Pratik","Shukla")
169 | 


--------------------------------------------------------------------------------
/random-number-generator/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/random-number-generator/random_number_generator_tutorial_with_python.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Random Number Generator Tutorial with Python.ipynb
  3 | 
  4 | Automatically generated by Colaboratory.
  5 | 
  6 | Original file is located at
  7 |     https://colab.research.google.com/drive/1qKiX3ztczxxXNSlC0IKtSr8ND4UILE-G
  8 | 
  9 | # Random Number Generator Tutorial with Python
 10 | 
 11 | ## Generating pseudorandom numbers with Python standard library
 12 | 
 13 | Python has a built-in module called random to generate a variety of pseudorandom numbers. Although it is recommended that this module should not be used for security purposes like cryptographic uses this will do for machine learning and data science. This module uses a PRNG called Mersenne Twister.
 14 | 
 15 | ### Importing module: random
 16 | """
 17 | 
 18 | import random
 19 | 
 20 | """### Random numbers within a range"""
 21 | 
 22 | #initialize the seed to 25
 23 | random.seed(25)
 24 | 
 25 | #generating random number between 10 and 20(both excluded)
 26 | print(random.randrange(10, 20))
 27 | 
 28 | #generating random number between 10 and 20(both included)
 29 | print(random.randint(10, 20))
 30 | 
 31 | """### Random element from a sequence"""
 32 | 
 33 | #initialize the seed to 2
 34 | random.seed(2)
 35 | 
 36 | #setting up the sequence
 37 | myseq = ['Towards', 'AI', 'is', 1]
 38 | 
 39 | #randomly choosing an element from the sequence
 40 | random.choice(myseq)
 41 | 
 42 | """### Multiple random selections with different possibilities"""
 43 | 
 44 | #initialize the seed to 25
 45 | random.seed(25)
 46 | 
 47 | #setting up the sequence
 48 | myseq = ['Towards', 'AI', 'is', 1]
 49 | 
 50 | #random selection of length 15
 51 | #10 time higher possibility of selecting 'Towards'
 52 | #5 time higher possibility of selecting 'AI'
 53 | #2 time higher possibility of selecting 'is'
 54 | #2 time higher possibility of selecting 1
 55 | random.choices(myseq, weights=[10, 5, 2, 2], k = 15)
 56 | 
 57 | """### Random element from a sequence without replacement"""
 58 | 
 59 | #initialize the seed to 25
 60 | random.seed(25)
 61 | 
 62 | #setting up the sequence
 63 | myseq = ['Towards', 'AI', 'is', 1]
 64 | 
 65 | #randomly choosing an element from the sequence
 66 | random.sample(myseq, 2)
 67 | 
 68 | #initialize the seed to 25
 69 | random.seed(25)
 70 | 
 71 | #setting up the sequence
 72 | myseq = ['Towards', 'AI', 'is', 1]
 73 | 
 74 | #randomly choosing an element from the sequence
 75 | #you are trying to choose 5 random elements from a sequence of lenth 4
 76 | #since the selection is without replacement it is not possible and hence the error
 77 | random.sample(myseq, 5)
 78 | 
 79 | """### Rearrange the sequence"""
 80 | 
 81 | #initialize the seed to 25
 82 | random.seed(25)
 83 | 
 84 | #setting up the sequence
 85 | myseq = ['Towards', 'AI', 'is', 1]
 86 | 
 87 | #rearranging the order of elements of the list
 88 | random.shuffle(myseq)
 89 | myseq
 90 | 
 91 | """### Floating-point random number"""
 92 | 
 93 | #initialize the seed to 25
 94 | random.seed(25)
 95 | 
 96 | #random float number between 0 and 1 
 97 | random.random()
 98 | 
 99 | """### Real-valued distributions"""
100 | 
101 | #initialize the seed to 25
102 | random.seed(25)
103 | 
104 | #random float number between 10 and 20 (both included)
105 | print(random.uniform(10, 20))
106 | 
107 | #random float number mean 10 standard deviation 4
108 | print(random.gauss(10, 4))
109 | 
110 | """## Generating pseudorandom numbers with Numpy"""
111 | 
112 | #importing random module from numpy
113 | import numpy as np
114 | 
115 | """### Uniform distributed floating values"""
116 | 
117 | #initialize the seed to 25
118 | np.random.seed(25)
119 | 
120 | #single uniformly distributed random number
121 | np.random.rand()
122 | 
123 | #initialize the seed to 25
124 | np.random.seed(25)
125 | 
126 | #uniformly distributed random numbers of length 10: 1-D array
127 | np.random.rand(10)
128 | 
129 | #initialize the seed to 25
130 | np.random.seed(25)
131 | 
132 | #uniformly distributed random numbers of 2 rows and 3 columns: 2-D array
133 | np.random.rand(2, 3)
134 | 
135 | """### Normal distributed floating values"""
136 | 
137 | #initialize the seed to 25
138 | np.random.seed(25)
139 | 
140 | #single normally distributed random number
141 | np.random.randn()
142 | 
143 | #initialize the seed to 25
144 | np.random.seed(25)
145 | 
146 | #normally distributed random numbers of length 10: 1-D array
147 | np.random.randn(10)
148 | 
149 | #initialize the seed to 25
150 | np.random.seed(25)
151 | 
152 | #normally distributed random numbers of 2 rows and 3 columns: 2-D array
153 | np.random.randn(2, 3)
154 | 
155 | """###  Uniformly distributed integers in a given range"""
156 | 
157 | #initialize the seed to 25
158 | np.random.seed(25)
159 | 
160 | #single uniformly distributed random integer between 10 and 20
161 | np.random.randint(10, 20)
162 | 
163 | #initialize the seed to 25
164 | np.random.seed(25)
165 | 
166 | #uniformly distributed random integer between 0 to 100 of length 10: 1-D array
167 | np.random.randint(100, size=(10))
168 | 
169 | #initialize the seed to 25
170 | np.random.seed(25)
171 | 
172 | #uniformly distributed random integer between 0 to 100 of 2 rows and 3 columns: 2-D array
173 | np.random.randint(100, size=(2, 3))
174 | 
175 | """### Random elements from a defined list"""
176 | 
177 | #initialize the seed to 25
178 | random.seed(25)
179 | 
180 | #setting up the sequence
181 | myseq = ['Towards', 'AI', 'is', 1]
182 | 
183 | #randomly choosing an element from the sequence
184 | np.random.choice(myseq)
185 | 
186 | #initialize the seed to 25
187 | random.seed(25)
188 | 
189 | #setting up the sequence
190 | myseq = ['Towards', 'AI', 'is', 1]
191 | 
192 | #randomly choosing elements from the sequence: 2-D array
193 | np.random.choice(myseq, size=(2, 3))
194 | 
195 | #initialize the seed to 25
196 | random.seed(25)
197 | 
198 | #setting up the sequence
199 | myseq = ['Towards', 'AI', 'is', 1]
200 | 
201 | #randomly choosing elements from the sequence with defined probabilities
202 | #The probability for the value to be 'Towards' is set to be 0.1
203 | #The probability for the value to be 'AI' is set to be 0.6
204 | #The probability for the value to be 'is' is set to be 0.05
205 | #The probability for the value to be 1 is set to be 0.25
206 | #0.1 + 0.6 + 0.05 + 0.25 = 1
207 | np.random.choice(myseq, p=[0.1, 0.6, 0.05, 0.25], size=(2, 3))
208 | 
209 | """### Binomial distributed values"""
210 | 
211 | #initialize the seed to 25
212 | np.random.seed(25)
213 | 
214 | #10 number of trials with probability of 0.5 each
215 | np.random.binomial(n=10, p=0.5, size=10)
216 | 
217 | """### Poisson Distribution values"""
218 | 
219 | #initialize the seed to 25
220 | np.random.seed(25)
221 | 
222 | #rate 2 and size 10
223 | np.random.poisson(lam=2, size=10)
224 | 
225 | """### Chi Square distribution"""
226 | 
227 | #initialize the seed to 25
228 | np.random.seed(25)
229 | 
230 | #degree of freedom 2 and size (2, 3)
231 | np.random.chisquare(df=2, size=(2, 3))
232 | 
233 | """<center><h1>Thank you</h1></center>"""


--------------------------------------------------------------------------------
/recommendation_system_tutorial/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/recommendation_system_tutorial/movie_titles.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/towardsai/tutorials/cc12fe183d50ce6095f044d7346f30d5d0522584/recommendation_system_tutorial/movie_titles.csv


--------------------------------------------------------------------------------
/recommendation_system_tutorial/new_features.csv:
--------------------------------------------------------------------------------
 1 | 212668,16604,3.4864864864864864,4,5,5,4,5,5.0,5.0,5.0,5.0,5.0,5.0,3.782608695652174,5
 2 | 242896,16604,3.4864864864864864,4,4,3,4,3,3,3.0,3.0,3.0,3.0,3.0,3.782608695652174,3
 3 | 295174,16604,3.4864864864864864,5,3,4,3,4,3,4.0,4.0,4.0,4.0,4.0,3.782608695652174,5
 4 | 398239,16604,3.4864864864864864,4,4,5,3,4,2,1,2,2.5,2.5,2.5,3.782608695652174,5
 5 | 605969,16604,3.4864864864864864,4,5,5,4,5,5.0,5.0,5.0,5.0,5.0,5.0,3.782608695652174,5
 6 | 634715,16604,3.4864864864864864,4,5,5,4,5,3.0,3.0,3.0,3.0,3.0,3.0,3.782608695652174,3
 7 | 754728,16604,3.4864864864864864,4,5,5,4,5,4.0,4.0,4.0,4.0,4.0,4.0,3.782608695652174,4
 8 | 769103,16604,3.4864864864864864,3,5,5,3,4,3,3.5,3.5,3.5,3.5,3.5,3.782608695652174,4
 9 | 1018210,16604,3.4864864864864864,4,4,3,4,3,4,4.0,4.0,4.0,4.0,4.0,3.782608695652174,4
10 | 1049467,16604,3.4864864864864864,5,3,4,3,4,3,4.0,4.0,4.0,4.0,4.0,3.782608695652174,5
11 | 1209983,16604,3.4864864864864864,5,5,4,3,4,2,2.5,2.5,2.5,2.5,2.5,3.782608695652174,3
12 | 1218016,16604,3.4864864864864864,3,3,3,4,4,4,3.0,3.0,3.0,3.0,3.0,3.782608695652174,2
13 | 1453970,16604,3.4864864864864864,4,5,5,4,5,5.0,5.0,5.0,5.0,5.0,5.0,3.782608695652174,5
14 | 1533963,16604,3.4864864864864864,4,5,5,4,5,4.0,4.0,4.0,4.0,4.0,4.0,3.782608695652174,4
15 | 1553211,16604,3.4864864864864864,4,5,5,4,5,4.0,4.0,4.0,4.0,4.0,4.0,3.782608695652174,4
16 | 1862911,16604,3.4864864864864864,4,5,4,5,5,2,2,2.3333333333333335,2.3333333333333335,2.3333333333333335,2.3333333333333335,3.782608695652174,3
17 | 2059366,16604,3.4864864864864864,3,3,4,4,2,4,3.5,3.5,3.5,3.5,3.5,3.782608695652174,3
18 | 2204342,16604,3.4864864864864864,4,5,5,4,5,3.0,3.0,3.0,3.0,3.0,3.0,3.782608695652174,3
19 | 2329676,16604,3.4864864864864864,4,5,5,4,5,2.0,2.0,2.0,2.0,2.0,2.0,3.782608695652174,2
20 | 2382060,16604,3.4864864864864864,3,4,5,5,2,3,3.5,3.5,3.5,3.5,3.5,3.782608695652174,4
21 | 2465999,16604,3.4864864864864864,4,5,5,4,5,4.0,4.0,4.0,4.0,4.0,4.0,3.782608695652174,4
22 | 2557596,16604,3.4864864864864864,2,3,3,4,4,5,4.0,4.0,4.0,4.0,4.0,3.782608695652174,3
23 | 2579003,16604,3.4864864864864864,4,4,3,4,3,4,4.0,4.0,4.0,4.0,4.0,3.782608695652174,4
24 | 2620858,16630,3.4864864864864864,4,3.5,3.5,3.5,3.5,3.5,3.0,3
25 | 782261,16795,3.4864864864864864,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5
26 | 1570292,17135,3.4864864864864864,4,3,2,4,4.0,4.0,4.0,4.0,4.0,3.0,4
27 | 1805778,17135,3.4864864864864864,4,3,2,3,3.0,3.0,3.0,3.0,3.0,3.0,3
28 | 1862911,17135,3.4864864864864864,3,3,4,2,3,2.3333333333333335,2.3333333333333335,2.3333333333333335,2.3333333333333335,3.0,2
29 | 2382060,17135,3.4864864864864864,2,3,4,4,3.5,3.5,3.5,3.5,3.5,3.0,3
30 | 35573,17149,3.4864864864864864,5,2,5,3,4,5.0,5.0,5.0,5.0,5.0,5.0,3.4390243902439024,5
31 | 115726,17149,3.4864864864864864,5,2,5,3,4,5.0,5.0,5.0,5.0,5.0,5.0,3.4390243902439024,5
32 | 193510,17149,3.4864864864864864,5,2,5,3,4,4.0,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
33 | 197313,17149,3.4864864864864864,5,2,5,3,4,5.0,5.0,5.0,5.0,5.0,5.0,3.4390243902439024,5
34 | 229421,17149,3.4864864864864864,5,2,5,3,4,2.0,2.0,2.0,2.0,2.0,2.0,3.4390243902439024,2
35 | 242896,17149,3.4864864864864864,4,4,4,3,2,3,3.0,3.0,3.0,3.0,3.0,3.4390243902439024,3
36 | 295174,17149,3.4864864864864864,3,2,3,3,4,5,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,3
37 | 398239,17149,3.4864864864864864,3,3,2,3,4,5,2,2,2.5,2.5,2.5,3.4390243902439024,1
38 | 619230,17149,3.4864864864864864,5,2,5,3,4,3.0,3.0,3.0,3.0,3.0,3.0,3.4390243902439024,3
39 | 732495,17149,3.4864864864864864,5,2,5,3,4,4.0,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
40 | 769103,17149,3.4864864864864864,2,3,3,3,4,4,3.5,3.5,3.5,3.5,3.5,3.4390243902439024,3
41 | 772116,17149,3.4864864864864864,5,2,5,3,4,4.0,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
42 | 842750,17149,3.4864864864864864,5,2,5,3,4,3.0,3.0,3.0,3.0,3.0,3.0,3.4390243902439024,3
43 | 934315,17149,3.4864864864864864,5,2,5,3,4,4.0,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
44 | 980189,17149,3.4864864864864864,5,2,5,3,4,4.0,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
45 | 1018210,17149,3.4864864864864864,4,4,4,3,2,4,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
46 | 1049467,17149,3.4864864864864864,3,2,3,3,4,5,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,3
47 | 1180569,17149,3.4864864864864864,5,2,5,3,4,3.0,3.0,3.0,3.0,3.0,3.0,3.4390243902439024,3
48 | 1209983,17149,3.4864864864864864,3,3,3,3,4,3,2.5,2.5,2.5,2.5,2.5,3.4390243902439024,2
49 | 1217165,17149,3.4864864864864864,5,2,5,3,4,2.0,2.0,2.0,2.0,2.0,2.0,3.4390243902439024,2
50 | 1218016,17149,3.4864864864864864,5,4,3,4,4,2,3.0,3.0,3.0,3.0,3.0,3.4390243902439024,4
51 | 1235495,17149,3.4864864864864864,5,2,5,3,4,3.0,3.0,3.0,3.0,3.0,3.0,3.4390243902439024,3
52 | 1413107,17149,3.4864864864864864,5,2,5,3,4,2.0,2.0,2.0,2.0,2.0,2.0,3.4390243902439024,2
53 | 1460255,17149,3.4864864864864864,5,2,5,3,4,2.0,2.0,2.0,2.0,2.0,2.0,3.4390243902439024,2
54 | 1469739,17149,3.4864864864864864,5,2,5,3,4,4.0,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
55 | 1473708,17149,3.4864864864864864,5,2,5,3,4,4.0,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
56 | 1570292,17149,3.4864864864864864,4,4,3,5,3,4,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
57 | 1615195,17149,3.4864864864864864,5,2,5,3,4,4.0,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
58 | 1683156,17149,3.4864864864864864,5,2,5,3,4,3.0,3.0,3.0,3.0,3.0,3.0,3.4390243902439024,3
59 | 1720816,17149,3.4864864864864864,5,2,5,3,4,4.0,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
60 | 1805778,17149,3.4864864864864864,4,4,3,5,3,3,3.0,3.0,3.0,3.0,3.0,3.4390243902439024,3
61 | 1814953,17149,3.4864864864864864,5,2,5,3,4,4.0,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
62 | 1895168,17149,3.4864864864864864,5,2,5,3,4,3.0,3.0,3.0,3.0,3.0,3.0,3.4390243902439024,3
63 | 1991264,17149,3.4864864864864864,5,2,5,3,4,1.0,1.0,1.0,1.0,1.0,1.0,3.4390243902439024,1
64 | 2022141,17149,3.4864864864864864,5,2,5,3,4,3.0,3.0,3.0,3.0,3.0,3.0,3.4390243902439024,3
65 | 2024916,17149,3.4864864864864864,5,2,5,3,4,4.0,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
66 | 2059366,17149,3.4864864864864864,5,3,4,4,4,3,3.5,3.5,3.5,3.5,3.5,3.4390243902439024,4
67 | 2557596,17149,3.4864864864864864,4,4,3,4,4,3,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,5
68 | 2579003,17149,3.4864864864864864,4,4,4,3,2,4,4.0,4.0,4.0,4.0,4.0,3.4390243902439024,4
69 | 2580515,17149,3.4864864864864864,5,2,5,3,4,5.0,5.0,5.0,5.0,5.0,5.0,3.4390243902439024,5
70 | 2620858,17149,3.4864864864864864,3,5,2,5,3,3,3.5,3.5,3.5,3.5,3.5,3.4390243902439024,4
71 | 2086948,17177,3.4864864864864864,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4
72 | 398239,17280,3.4864864864864864,2,2,5,1,2.5,2.5,2.5,2.0,2
73 | 1862911,17280,3.4864864864864864,2,3,2,2.3333333333333335,2.3333333333333335,2.3333333333333335,2.3333333333333335,2.0,2
74 | 398239,17315,3.4864864864864864,2,5,1,2.5,2.5,2.5,2.0,2
75 | 


--------------------------------------------------------------------------------
/recommendation_system_tutorial/recommendation_system_tutorial_netflix.py:
--------------------------------------------------------------------------------
  1 | # Recommendation System Tutorial - Netflix
  2 | # URL: https://towardsai.net/recommendation-system-tutorial
  3 | 
  4 | # Download datasets
  5 | !wget https://datasets.towardsai.net/combined_data_4.txt
  6 | !wget https://raw.githubusercontent.com/towardsai/tutorials/master/recommendation_system_tutorial/movie_titles.csv
  7 | !wget https://raw.githubusercontent.com/towardsai/tutorials/master/recommendation_system_tutorial/new_features.csv
  8 | 
  9 | !pip install scikit-surprise
 10 | 
 11 | from datetime import datetime
 12 | import pandas as pd
 13 | import numpy as np
 14 | import seaborn as sns
 15 | import os
 16 | import random
 17 | import matplotlib
 18 | import matplotlib.pyplot as plt
 19 | from scipy import sparse
 20 | from sklearn.metrics.pairwise import cosine_similarity
 21 | from sklearn.metrics import mean_squared_error
 22 | 
 23 | import xgboost as xgb
 24 | from surprise import Reader, Dataset
 25 | from surprise import BaselineOnly
 26 | from surprise import KNNBaseline
 27 | from surprise import SVD
 28 | from surprise import SVDpp
 29 | from surprise.model_selection import GridSearchCV
 30 | 
 31 | def load_data():
 32 |     netflix_csv_file = open("netflix_rating.csv", mode = "w")
 33 |     rating_files = ['combined_data_4.txt']
 34 |     for file in rating_files:
 35 |         with open(file) as f:
 36 |             for line in f:
 37 |                 line = line.strip()
 38 |                 if line.endswith(":"):
 39 |                     movie_id = line.replace(":", "")
 40 |                 else:
 41 |                     row_data = []
 42 |                     row_data = [item for item in line.split(",")]
 43 |                     row_data.insert(0, movie_id)
 44 |                     netflix_csv_file.write(",".join(row_data))
 45 |                     netflix_csv_file.write('\n')
 46 | 
 47 |     netflix_csv_file.close()
 48 |     df = pd.read_csv('netflix_rating.csv', sep=",", names = ["movie_id","customer_id", "rating", "date"])
 49 |     return df
 50 | 
 51 | netflix_rating_df = load_data()
 52 | netflix_rating_df
 53 | netflix_rating_df.head()
 54 | 
 55 | netflix_rating_df.duplicated(["movie_id","customer_id", "rating", "date"]).sum()
 56 | 
 57 | split_value = int(len(netflix_rating_df) * 0.80)
 58 | train_data = netflix_rating_df[:split_value]
 59 | test_data = netflix_rating_df[split_value:]
 60 | 
 61 | plt.figure(figsize = (12, 8))
 62 | ax = sns.countplot(x="rating", data=train_data)
 63 | 
 64 | ax.set_yticklabels([num for num in ax.get_yticks()])
 65 | 
 66 | plt.tick_params(labelsize = 15)
 67 | plt.title("Count Ratings in train data", fontsize = 20)
 68 | plt.xlabel("Ratings", fontsize = 20)
 69 | plt.ylabel("Number of Ratings", fontsize = 20)
 70 | plt.show()
 71 | 
 72 | def get_user_item_sparse_matrix(df):
 73 |     sparse_data = sparse.csr_matrix((df.rating, (df.customer_id, df.movie_id)))
 74 |     return sparse_data
 75 | 
 76 | train_sparse_data = get_user_item_sparse_matrix(train_data)
 77 | 
 78 | test_sparse_data = get_user_item_sparse_matrix(test_data)
 79 | 
 80 | global_average_rating = train_sparse_data.sum()/train_sparse_data.count_nonzero()
 81 | print("Global Average Rating: {}".format(global_average_rating))
 82 | 
 83 | def get_average_rating(sparse_matrix, is_user):
 84 |     ax = 1 if is_user else 0
 85 |     sum_of_ratings = sparse_matrix.sum(axis = ax).A1
 86 |     no_of_ratings = (sparse_matrix != 0).sum(axis = ax).A1
 87 |     rows, cols = sparse_matrix.shape
 88 |     average_ratings = {i: sum_of_ratings[i]/no_of_ratings[i] for i in range(rows if is_user else cols) if no_of_ratings[i] != 0}
 89 |     return average_ratings
 90 | 
 91 | average_rating_user = get_average_rating(train_sparse_data, True)
 92 | 
 93 | avg_rating_movie = get_average_rating(train_sparse_data, False)
 94 | 
 95 | total_users = len(np.unique(netflix_rating_df["customer_id"]))
 96 | train_users = len(average_rating_user)
 97 | uncommonUsers = total_users - train_users
 98 | 
 99 | print("Total no. of Users = {}".format(total_users))
100 | print("No. of Users in train data= {}".format(train_users))
101 | print("No. of Users not present in train data = {}({}%)".format(uncommonUsers, np.round((uncommonUsers/total_users)*100), 2))
102 | 
103 | total_movies = len(np.unique(netflix_rating_df["movie_id"]))
104 | train_movies = len(avg_rating_movie)
105 | uncommonMovies = total_movies - train_movies
106 | 
107 | print("Total no. of Movies = {}".format(total_movies))
108 | print("No. of Movies in train data= {}".format(train_movies))
109 | print("No. of Movies not present in train data = {}({}%)".format(uncommonMovies, np.round((uncommonMovies/total_movies)*100), 2))
110 | 
111 | def compute_user_similarity(sparse_matrix, limit=100):
112 |     row_index, col_index = sparse_matrix.nonzero()
113 |     rows = np.unique(row_index)
114 |     similar_arr = np.zeros(61700).reshape(617,100)
115 | 
116 |     for row in rows[:limit]:
117 |         sim = cosine_similarity(sparse_matrix.getrow(row), train_sparse_data).ravel()
118 |         similar_indices = sim.argsort()[-limit:]
119 |         similar = sim[similar_indices]
120 |         similar_arr[row] = similar
121 | 
122 |     return similar_arr
123 | 
124 | similar_user_matrix = compute_user_similarity(train_sparse_data, 100)
125 | 
126 | similar_user_matrix[0]
127 | 
128 | movie_titles_df = pd.read_csv("movie_titles.csv",sep = ",",
129 |                               header = None, names=['movie_id', 'year_of_release', 'movie_title'],
130 |                               index_col = "movie_id", encoding = "iso8859_2")
131 | movie_titles_df.head()
132 | 
133 | def compute_movie_similarity_count(sparse_matrix, movie_titles_df, movie_id):
134 |     similarity = cosine_similarity(sparse_matrix.T, dense_output = False)
135 |     no_of_similar_movies = movie_titles_df.loc[movie_id][1], similarity[movie_id].count_nonzero()
136 |     return no_of_similar_movies
137 | 
138 | similar_movies = compute_movie_similarity_count(train_sparse_data, movie_titles_df, 1775)
139 | print("Similar Movies = {}".format(similar_movies))
140 | 
141 | def get_sample_sparse_matrix(sparse_matrix, no_of_users, no_of_movies):
142 |     users, movies, ratings = sparse.find(sparse_matrix)
143 |     uniq_users = np.unique(users)
144 |     uniq_movies = np.unique(movies)
145 |     np.random.seed(15)
146 |     user = np.random.choice(uniq_users, no_of_users, replace = False)
147 |     movie = np.random.choice(uniq_movies, no_of_movies, replace = True)
148 |     mask = np.logical_and(np.isin(users, user), np.isin(movies, movie))
149 |     sparse_matrix = sparse.csr_matrix((ratings[mask], (users[mask], movies[mask])),
150 |                                                      shape = (max(user)+1, max(movie)+1))
151 |     return sparse_matrix
152 | 
153 | train_sample_sparse_matrix = get_sample_sparse_matrix(train_sparse_data, 400, 40)
154 | 
155 | test_sparse_matrix_matrix = get_sample_sparse_matrix(test_sparse_data, 200, 20)
156 | 
157 | def create_new_similar_features(sample_sparse_matrix):
158 |     global_avg_rating = get_average_rating(sample_sparse_matrix, False)
159 |     global_avg_users = get_average_rating(sample_sparse_matrix, True)
160 |     global_avg_movies = get_average_rating(sample_sparse_matrix, False)
161 |     sample_train_users, sample_train_movies, sample_train_ratings = sparse.find(sample_sparse_matrix)
162 |     new_features_csv_file = open("new_features.csv", mode = "w")
163 | 
164 |     for user, movie, rating in zip(sample_train_users, sample_train_movies, sample_train_ratings):
165 |         similar_arr = list()
166 |         similar_arr.append(user)
167 |         similar_arr.append(movie)
168 |         similar_arr.append(sample_sparse_matrix.sum()/sample_sparse_matrix.count_nonzero())
169 | 
170 |         similar_users = cosine_similarity(sample_sparse_matrix[user], sample_sparse_matrix).ravel()
171 |         indices = np.argsort(-similar_users)[1:]
172 |         ratings = sample_sparse_matrix[indices, movie].toarray().ravel()
173 |         top_similar_user_ratings = list(ratings[ratings != 0][:5])
174 |         top_similar_user_ratings.extend([global_avg_rating[movie]] * (5 - len(ratings)))
175 |         similar_arr.extend(top_similar_user_ratings)
176 | 
177 |         similar_movies = cosine_similarity(sample_sparse_matrix[:,movie].T, sample_sparse_matrix.T).ravel()
178 |         similar_movies_indices = np.argsort(-similar_movies)[1:]
179 |         similar_movies_ratings = sample_sparse_matrix[user, similar_movies_indices].toarray().ravel()
180 |         top_similar_movie_ratings = list(similar_movies_ratings[similar_movies_ratings != 0][:5])
181 |         top_similar_movie_ratings.extend([global_avg_users[user]] * (5-len(top_similar_movie_ratings)))
182 |         similar_arr.extend(top_similar_movie_ratings)
183 | 
184 |         similar_arr.append(global_avg_users[user])
185 |         similar_arr.append(global_avg_movies[movie])
186 |         similar_arr.append(rating)
187 | 
188 |         new_features_csv_file.write(",".join(map(str, similar_arr)))
189 |         new_features_csv_file.write("\n")
190 | 
191 |     new_features_csv_file.close()
192 |     new_features_df = pd.read_csv('new_features.csv', names = ["user_id", "movie_id", "gloabl_average", "similar_user_rating1",
193 |                                                                "similar_user_rating2", "similar_user_rating3",
194 |                                                                "similar_user_rating4", "similar_user_rating5",
195 |                                                                "similar_movie_rating1", "similar_movie_rating2",
196 |                                                                "similar_movie_rating3", "similar_movie_rating4",
197 |                                                                "similar_movie_rating5", "user_average",
198 |                                                                "movie_average", "rating"])
199 |     return new_features_df
200 | 
201 | train_new_similar_features = create_new_similar_features(train_sample_sparse_matrix)
202 | 
203 | train_new_similar_features = train_new_similar_features.fillna(0)
204 | train_new_similar_features.head()
205 | 
206 | test_new_similar_features = create_new_similar_features(test_sparse_matrix_matrix)
207 | 
208 | test_new_similar_features = test_new_similar_features.fillna(0)
209 | test_new_similar_features.head()
210 | 
211 | x_train = train_new_similar_features.drop(["user_id", "movie_id", "rating"], axis = 1)
212 | 
213 | x_test = test_new_similar_features.drop(["user_id", "movie_id", "rating"], axis = 1)
214 | 
215 | y_train = train_new_similar_features["rating"]
216 | 
217 | y_test = test_new_similar_features["rating"]
218 | 
219 | def error_metrics(y_true, y_pred):
220 |     rmse = np.sqrt(mean_squared_error(y_true, y_pred))
221 |     return rmse
222 | 
223 | clf = xgb.XGBRegressor(n_estimators = 100, silent = False, n_jobs  = 10)
224 | clf.fit(x_train, y_train)
225 | 
226 | y_pred_test = clf.predict(x_test)
227 | 
228 | rmse_test = error_metrics(y_test, y_pred_test)
229 | print("RMSE = {}".format(rmse_test))
230 | 
231 | def plot_importance(model, clf):
232 |     fig = plt.figure(figsize = (8, 6))
233 |     ax = fig.add_axes([0,0,1,1])
234 |     model.plot_importance(clf, ax = ax, height = 0.3)
235 |     plt.xlabel("F Score", fontsize = 20)
236 |     plt.ylabel("Features", fontsize = 20)
237 |     plt.title("Feature Importance", fontsize = 20)
238 |     plt.tick_params(labelsize = 15)
239 | 
240 |     plt.show()
241 | 
242 |  plot_importance(xgb, clf)
243 | 


--------------------------------------------------------------------------------
/sentiment_analysis_tutorial/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/simple_linear_regression_tutorial/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/simple_linear_regression_tutorial/simple_linear_regression_from_scratch.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/support-vector-machine-svm/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/support-vector-machine-svm/svm_machine_learning.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | #Support Vector Machines (SVM) Introduction - Machine Learning
 4 | 
 5 | * Tutorial: https://news.towardsai.net/svm
 6 | * Github: https://github.com/towardsai/tutorials/tree/master/support-vector-machine-svm
 7 | """
 8 | 
 9 | import pandas as pd
10 | import numpy as np
11 | import matplotlib.pyplot as plt
12 | import seaborn as sns
13 | 
14 | #classic datasets from sklearn library
15 | from sklearn import datasets
16 | 
17 | from sklearn.model_selection import train_test_split
18 | 
19 | #Support Vector Classification-wrapper around SVM
20 | from sklearn.svm import SVC
21 | 
22 | #different matrices to score model performance
23 | from sklearn import metrics
24 | from sklearn.metrics import classification_report,confusion_matrix
25 | 
26 | """## Load data"""
27 | 
28 | #loading WINE dataset
29 | cancer_data = datasets.load_wine()
30 | 
31 | #converting into DataFrame
32 | df = pd.DataFrame(cancer_data.data, columns = cancer_data.feature_names)
33 | df['target'] = cancer_data.target
34 | df.head()
35 | 
36 | """## Exploratory data analysis"""
37 | 
38 | #analysing target variable
39 | sns.countplot(df.target)
40 | plt.show()
41 | 
42 | #visualizing datapoints seperability
43 | fig, axes = plt.subplots(4, 3, figsize=(22,14))
44 | axes = [ax for axes_rows in axes for ax in axes_rows]
45 | columns = list(df.columns)
46 | columns.remove('target')
47 | columns.remove('alcohol')
48 | 
49 | #looping through every columns of data
50 | #and plotting against alcohol
51 | for i, col in enumerate(columns):
52 |   sns.scatterplot(data=df, x='alcohol', y=col, hue='target', palette="deep", ax=axes[i])
53 | 
54 | """## Splitting data"""
55 | 
56 | #splitting data into 80:20 train test ratio
57 | X = df.drop('target', axis=1)
58 | y = df.target
59 | 
60 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)
61 | 
62 | """## Model training and performance evaluation"""
63 | 
64 | #training SVM model with linear kernel
65 | model = SVC(kernel='linear',random_state = 10)
66 | model.fit(X_train, y_train)
67 | 
68 | #predicting output for test data
69 | pred = model.predict(X_test)
70 | 
71 | #building confusion matrix
72 | cm = confusion_matrix(y_test, pred)
73 | 
74 | #defining the size of the canvas
75 | plt.rcParams['figure.figsize'] = [15,8]
76 | 
77 | #confusion matrix to DataFrame
78 | conf_matrix = pd.DataFrame(data = cm,columns = ['Predicted:0','Predicted:1', 'Predicted:2'], index = ['Actual:0','Actual:1', 'Actual:2'])
79 | 
80 | #plotting the confusion matrix
81 | sns.heatmap(conf_matrix, annot = True, fmt = 'd', cmap = 'Paired', cbar = False,
82 |             linewidths = 0.1, annot_kws = {'size':25})
83 | plt.xticks(fontsize = 20)
84 | plt.yticks(fontsize = 20)
85 | plt.show()
86 | 
87 | print(classification_report(y_test,pred))
88 | 


--------------------------------------------------------------------------------
/survival_analysis_in_python/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/survival_analysis_in_python/lung.csv:
--------------------------------------------------------------------------------
  1 | "","inst","time","status","age","sex","ph.ecog","ph.karno","pat.karno","meal.cal","wt.loss"
  2 | "1",3,306,2,74,1,1,90,100,1175,NA
  3 | "2",3,455,2,68,1,0,90,90,1225,15
  4 | "3",3,1010,1,56,1,0,90,90,NA,15
  5 | "4",5,210,2,57,1,1,90,60,1150,11
  6 | "5",1,883,2,60,1,0,100,90,NA,0
  7 | "6",12,1022,1,74,1,1,50,80,513,0
  8 | "7",7,310,2,68,2,2,70,60,384,10
  9 | "8",11,361,2,71,2,2,60,80,538,1
 10 | "9",1,218,2,53,1,1,70,80,825,16
 11 | "10",7,166,2,61,1,2,70,70,271,34
 12 | "11",6,170,2,57,1,1,80,80,1025,27
 13 | "12",16,654,2,68,2,2,70,70,NA,23
 14 | "13",11,728,2,68,2,1,90,90,NA,5
 15 | "14",21,71,2,60,1,NA,60,70,1225,32
 16 | "15",12,567,2,57,1,1,80,70,2600,60
 17 | "16",1,144,2,67,1,1,80,90,NA,15
 18 | "17",22,613,2,70,1,1,90,100,1150,-5
 19 | "18",16,707,2,63,1,2,50,70,1025,22
 20 | "19",1,61,2,56,2,2,60,60,238,10
 21 | "20",21,88,2,57,1,1,90,80,1175,NA
 22 | "21",11,301,2,67,1,1,80,80,1025,17
 23 | "22",6,81,2,49,2,0,100,70,1175,-8
 24 | "23",11,624,2,50,1,1,70,80,NA,16
 25 | "24",15,371,2,58,1,0,90,100,975,13
 26 | "25",12,394,2,72,1,0,90,80,NA,0
 27 | "26",12,520,2,70,2,1,90,80,825,6
 28 | "27",4,574,2,60,1,0,100,100,1025,-13
 29 | "28",13,118,2,70,1,3,60,70,1075,20
 30 | "29",13,390,2,53,1,1,80,70,875,-7
 31 | "30",1,12,2,74,1,2,70,50,305,20
 32 | "31",12,473,2,69,2,1,90,90,1025,-1
 33 | "32",1,26,2,73,1,2,60,70,388,20
 34 | "33",7,533,2,48,1,2,60,80,NA,-11
 35 | "34",16,107,2,60,2,2,50,60,925,-15
 36 | "35",12,53,2,61,1,2,70,100,1075,10
 37 | "36",1,122,2,62,2,2,50,50,1025,NA
 38 | "37",22,814,2,65,1,2,70,60,513,28
 39 | "38",15,965,1,66,2,1,70,90,875,4
 40 | "39",1,93,2,74,1,2,50,40,1225,24
 41 | "40",1,731,2,64,2,1,80,100,1175,15
 42 | "41",5,460,2,70,1,1,80,60,975,10
 43 | "42",11,153,2,73,2,2,60,70,1075,11
 44 | "43",10,433,2,59,2,0,90,90,363,27
 45 | "44",12,145,2,60,2,2,70,60,NA,NA
 46 | "45",7,583,2,68,1,1,60,70,1025,7
 47 | "46",7,95,2,76,2,2,60,60,625,-24
 48 | "47",1,303,2,74,1,0,90,70,463,30
 49 | "48",3,519,2,63,1,1,80,70,1025,10
 50 | "49",13,643,2,74,1,0,90,90,1425,2
 51 | "50",22,765,2,50,2,1,90,100,1175,4
 52 | "51",3,735,2,72,2,1,90,90,NA,9
 53 | "52",12,189,2,63,1,0,80,70,NA,0
 54 | "53",21,53,2,68,1,0,90,100,1025,0
 55 | "54",1,246,2,58,1,0,100,90,1175,7
 56 | "55",6,689,2,59,1,1,90,80,1300,15
 57 | "56",1,65,2,62,1,0,90,80,725,NA
 58 | "57",5,5,2,65,2,0,100,80,338,5
 59 | "58",22,132,2,57,1,2,70,60,NA,18
 60 | "59",3,687,2,58,2,1,80,80,1225,10
 61 | "60",1,345,2,64,2,1,90,80,1075,-3
 62 | "61",22,444,2,75,2,2,70,70,438,8
 63 | "62",12,223,2,48,1,1,90,80,1300,68
 64 | "63",21,175,2,73,1,1,80,100,1025,NA
 65 | "64",11,60,2,65,2,1,90,80,1025,0
 66 | "65",3,163,2,69,1,1,80,60,1125,0
 67 | "66",3,65,2,68,1,2,70,50,825,8
 68 | "67",16,208,2,67,2,2,70,NA,538,2
 69 | "68",5,821,1,64,2,0,90,70,1025,3
 70 | "69",22,428,2,68,1,0,100,80,1039,0
 71 | "70",6,230,2,67,1,1,80,100,488,23
 72 | "71",13,840,1,63,1,0,90,90,1175,-1
 73 | "72",3,305,2,48,2,1,80,90,538,29
 74 | "73",5,11,2,74,1,2,70,100,1175,0
 75 | "74",2,132,2,40,1,1,80,80,NA,3
 76 | "75",21,226,2,53,2,1,90,80,825,3
 77 | "76",12,426,2,71,2,1,90,90,1075,19
 78 | "77",1,705,2,51,2,0,100,80,1300,0
 79 | "78",6,363,2,56,2,1,80,70,1225,-2
 80 | "79",3,11,2,81,1,0,90,NA,731,15
 81 | "80",1,176,2,73,1,0,90,70,169,30
 82 | "81",4,791,2,59,1,0,100,80,768,5
 83 | "82",13,95,2,55,1,1,70,90,1500,15
 84 | "83",11,196,1,42,1,1,80,80,1425,8
 85 | "84",21,167,2,44,2,1,80,90,588,-1
 86 | "85",16,806,1,44,1,1,80,80,1025,1
 87 | "86",6,284,2,71,1,1,80,90,1100,14
 88 | "87",22,641,2,62,2,1,80,80,1150,1
 89 | "88",21,147,2,61,1,0,100,90,1175,4
 90 | "89",13,740,1,44,2,1,90,80,588,39
 91 | "90",1,163,2,72,1,2,70,70,910,2
 92 | "91",11,655,2,63,1,0,100,90,975,-1
 93 | "92",22,239,2,70,1,1,80,100,NA,23
 94 | "93",5,88,2,66,1,1,90,80,875,8
 95 | "94",10,245,2,57,2,1,80,60,280,14
 96 | "95",1,588,1,69,2,0,100,90,NA,13
 97 | "96",12,30,2,72,1,2,80,60,288,7
 98 | "97",3,179,2,69,1,1,80,80,NA,25
 99 | "98",12,310,2,71,1,1,90,100,NA,0
100 | "99",11,477,2,64,1,1,90,100,910,0
101 | "100",3,166,2,70,2,0,90,70,NA,10
102 | "101",1,559,1,58,2,0,100,100,710,15
103 | "102",6,450,2,69,2,1,80,90,1175,3
104 | "103",13,364,2,56,1,1,70,80,NA,4
105 | "104",6,107,2,63,1,1,90,70,NA,0
106 | "105",13,177,2,59,1,2,50,NA,NA,32
107 | "106",12,156,2,66,1,1,80,90,875,14
108 | "107",26,529,1,54,2,1,80,100,975,-3
109 | "108",1,11,2,67,1,1,90,90,925,NA
110 | "109",21,429,2,55,1,1,100,80,975,5
111 | "110",3,351,2,75,2,2,60,50,925,11
112 | "111",13,15,2,69,1,0,90,70,575,10
113 | "112",1,181,2,44,1,1,80,90,1175,5
114 | "113",10,283,2,80,1,1,80,100,1030,6
115 | "114",3,201,2,75,2,0,90,100,NA,1
116 | "115",6,524,2,54,2,1,80,100,NA,15
117 | "116",1,13,2,76,1,2,70,70,413,20
118 | "117",3,212,2,49,1,2,70,60,675,20
119 | "118",1,524,2,68,1,2,60,70,1300,30
120 | "119",16,288,2,66,1,2,70,60,613,24
121 | "120",15,363,2,80,1,1,80,90,346,11
122 | "121",22,442,2,75,1,0,90,90,NA,0
123 | "122",26,199,2,60,2,2,70,80,675,10
124 | "123",3,550,2,69,2,1,70,80,910,0
125 | "124",11,54,2,72,1,2,60,60,768,-3
126 | "125",1,558,2,70,1,0,90,90,1025,17
127 | "126",22,207,2,66,1,1,80,80,925,20
128 | "127",7,92,2,50,1,1,80,60,1075,13
129 | "128",12,60,2,64,1,1,80,90,993,0
130 | "129",16,551,1,77,2,2,80,60,750,28
131 | "130",12,543,1,48,2,0,90,60,NA,4
132 | "131",4,293,2,59,2,1,80,80,925,52
133 | "132",16,202,2,53,1,1,80,80,NA,20
134 | "133",6,353,2,47,1,0,100,90,1225,5
135 | "134",13,511,1,55,2,1,80,70,NA,49
136 | "135",1,267,2,67,1,0,90,70,313,6
137 | "136",22,511,1,74,2,2,60,40,96,37
138 | "137",12,371,2,58,2,1,80,70,NA,0
139 | "138",13,387,2,56,1,2,80,60,1075,NA
140 | "139",1,457,2,54,1,1,90,90,975,-5
141 | "140",5,337,2,56,1,0,100,100,1500,15
142 | "141",21,201,2,73,2,2,70,60,1225,-16
143 | "142",3,404,1,74,1,1,80,70,413,38
144 | "143",26,222,2,76,1,2,70,70,1500,8
145 | "144",1,62,2,65,2,1,80,90,1075,0
146 | "145",11,458,1,57,1,1,80,100,513,30
147 | "146",26,356,1,53,2,1,90,90,NA,2
148 | "147",16,353,2,71,1,0,100,80,775,2
149 | "148",16,163,2,54,1,1,90,80,1225,13
150 | "149",12,31,2,82,1,0,100,90,413,27
151 | "150",13,340,2,59,2,0,100,90,NA,0
152 | "151",13,229,2,70,1,1,70,60,1175,-2
153 | "152",22,444,1,60,1,0,90,100,NA,7
154 | "153",5,315,1,62,2,0,90,90,NA,0
155 | "154",16,182,2,53,2,1,80,60,NA,4
156 | "155",32,156,2,55,1,2,70,30,1025,10
157 | "156",NA,329,2,69,1,2,70,80,713,20
158 | "157",26,364,1,68,2,1,90,90,NA,7
159 | "158",4,291,2,62,1,2,70,60,475,27
160 | "159",12,179,2,63,1,1,80,70,538,-2
161 | "160",1,376,1,56,2,1,80,90,825,17
162 | "161",32,384,1,62,2,0,90,90,588,8
163 | "162",10,268,2,44,2,1,90,100,2450,2
164 | "163",11,292,1,69,1,2,60,70,2450,36
165 | "164",6,142,2,63,1,1,90,80,875,2
166 | "165",7,413,1,64,1,1,80,70,413,16
167 | "166",16,266,1,57,2,0,90,90,1075,3
168 | "167",11,194,2,60,2,1,80,60,NA,33
169 | "168",21,320,2,46,1,0,100,100,860,4
170 | "169",6,181,2,61,1,1,90,90,730,0
171 | "170",12,285,2,65,1,0,100,90,1025,0
172 | "171",13,301,1,61,1,1,90,100,825,2
173 | "172",2,348,2,58,2,0,90,80,1225,10
174 | "173",2,197,2,56,1,1,90,60,768,37
175 | "174",16,382,1,43,2,0,100,90,338,6
176 | "175",1,303,1,53,1,1,90,80,1225,12
177 | "176",13,296,1,59,2,1,80,100,1025,0
178 | "177",1,180,2,56,1,2,60,80,1225,-2
179 | "178",13,186,2,55,2,1,80,70,NA,NA
180 | "179",1,145,2,53,2,1,80,90,588,13
181 | "180",7,269,1,74,2,0,100,100,588,0
182 | "181",13,300,1,60,1,0,100,100,975,5
183 | "182",1,284,1,39,1,0,100,90,1225,-5
184 | "183",16,350,2,66,2,0,90,100,1025,NA
185 | "184",32,272,1,65,2,1,80,90,NA,-1
186 | "185",12,292,1,51,2,0,90,80,1225,0
187 | "186",12,332,1,45,2,0,90,100,975,5
188 | "187",2,285,2,72,2,2,70,90,463,20
189 | "188",3,259,1,58,1,0,90,80,1300,8
190 | "189",15,110,2,64,1,1,80,60,1025,12
191 | "190",22,286,2,53,1,0,90,90,1225,8
192 | "191",16,270,2,72,1,1,80,90,488,14
193 | "192",16,81,2,52,1,2,60,70,1075,NA
194 | "193",12,131,2,50,1,1,90,80,513,NA
195 | "194",1,225,1,64,1,1,90,80,825,33
196 | "195",22,269,2,71,1,1,90,90,1300,-2
197 | "196",12,225,1,70,1,0,100,100,1175,6
198 | "197",32,243,1,63,2,1,80,90,825,0
199 | "198",21,279,1,64,1,1,90,90,NA,4
200 | "199",1,276,1,52,2,0,100,80,975,0
201 | "200",32,135,2,60,1,1,90,70,1275,0
202 | "201",15,79,2,64,2,1,90,90,488,37
203 | "202",22,59,2,73,1,1,60,60,2200,5
204 | "203",32,240,1,63,2,0,90,100,1025,0
205 | "204",3,202,1,50,2,0,100,100,635,1
206 | "205",26,235,1,63,2,0,100,90,413,0
207 | "206",33,105,2,62,1,2,NA,70,NA,NA
208 | "207",5,224,1,55,2,0,80,90,NA,23
209 | "208",13,239,2,50,2,2,60,60,1025,-3
210 | "209",21,237,1,69,1,1,80,70,NA,NA
211 | "210",33,173,1,59,2,1,90,80,NA,10
212 | "211",1,252,1,60,2,0,100,90,488,-2
213 | "212",6,221,1,67,1,1,80,70,413,23
214 | "213",15,185,1,69,1,1,90,70,1075,0
215 | "214",11,92,1,64,2,2,70,100,NA,31
216 | "215",11,13,2,65,1,1,80,90,NA,10
217 | "216",11,222,1,65,1,1,90,70,1025,18
218 | "217",13,192,1,41,2,1,90,80,NA,-10
219 | "218",21,183,2,76,1,2,80,60,825,7
220 | "219",11,211,1,70,2,2,70,30,131,3
221 | "220",2,175,1,57,2,0,80,80,725,11
222 | "221",22,197,1,67,1,1,80,90,1500,2
223 | "222",11,203,1,71,2,1,80,90,1025,0
224 | "223",1,116,2,76,1,1,80,80,NA,0
225 | "224",1,188,1,77,1,1,80,60,NA,3
226 | "225",13,191,1,39,1,0,90,90,2350,-5
227 | "226",32,105,1,75,2,2,60,70,1025,5
228 | "227",6,174,1,66,1,1,90,100,1075,1
229 | "228",22,177,1,58,2,1,80,90,1060,0
230 | 


--------------------------------------------------------------------------------
/survival_analysis_in_python/survival_analysis_1.py:
--------------------------------------------------------------------------------
  1 | #Import required libraries:
  2 | import pandas as pd
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from lifelines import KaplanMeierFitter
  6 | 
  7 | #Read the dataset:
  8 | data = pd.read_csv("lung.csv")
  9 | data.head()
 10 | 
 11 | #Print the column names of our data:
 12 | print(data.columns)
 13 | 
 14 | #Additional info about our dataset:
 15 | data.info()
 16 | 
 17 | #Statistical info about our dataset:
 18 | data.describe()
 19 | 
 20 | #Plot histogram for sex of patient:
 21 | print (data["sex"].hist())
 22 | 
 23 | #Create an object of KaplanMeierFitter:
 24 | kmf = KaplanMeierFitter() 
 25 | 
 26 | #Organize our data:
 27 | #If status = 1 , then dead = 0
 28 | #If status = 2 , then dead = 1
 29 | data.loc[data.status == 1, 'dead'] = 0
 30 | data.loc[data.status == 2, 'dead'] = 1
 31 | data.head()
 32 | 
 33 | #Fit the parameter values in our object:
 34 | kmf.fit(durations =  data["time"], event_observed = data["dead"])
 35 | 
 36 | #Print the event table:
 37 | kmf.event_table
 38 | # Removed = Observed + Censored
 39 | # Censored = Person that didn't die.(They are of no use to us!)
 40 | # Observed = Persons that died.
 41 | 
 42 | #Calculating the survival probability for a given time:
 43 | event_at_0 = kmf.event_table.iloc[0,:]
 44 | 
 45 | #Calculate the survival probability for t=0:
 46 | surv_for_0 = (event_at_0.at_risk - event_at_0.observed)/event_at_0.at_risk
 47 | surv_for_0
 48 | 
 49 | #Calculating the survival probability for a given time:
 50 | event_at_5 = kmf.event_table.iloc[1,:]
 51 | 
 52 | #Calculate the survival probability for t=5:
 53 | surv_for_5 = (event_at_5.at_risk - event_at_5.observed)/event_at_5.at_risk
 54 | surv_for_5
 55 | 
 56 | #Calculating the survival probability for a given time:
 57 | event_at_11 = kmf.event_table.iloc[2,:]
 58 | 
 59 | #Calculate the survival probability for t=11:
 60 | surv_for_11 = (event_at_11.at_risk - event_at_11.observed)/event_at_11.at_risk
 61 | surv_for_11
 62 | 
 63 | #Calculating the actual survival probability at a given time:
 64 | 
 65 | surv_after_0 = surv_for_0 
 66 | print("Survival Probability After 0 Days: ",surv_after_0)
 67 | 
 68 | #Calculating the actual survival probability at a given time:
 69 | surv_after_5 = surv_for_0 * surv_for_5
 70 | print("Survival Probability After 5 Days: ",surv_after_5)
 71 | 
 72 | 
 73 | #Calculating the actual survival probability at a given time:surv_after_11 = surv_for_0 * surv_for_5 * surv_for_11
 74 | print("Survival Probability After 11 Days: ",surv_after_11)
 75 | 
 76 | #Get the probability values the easy way!
 77 | print("Survival probability for t=0: ",kmf.predict(0))
 78 | print("Survival probability for t=5: ",kmf.predict(5))
 79 | print("Survival probability for t=11: ",kmf.predict(11))
 80 | 
 81 | #Predicting the surviaval probability for an array of value:
 82 | kmf.predict([0,5,11,12])
 83 | 
 84 | #To get the full list:
 85 | kmf.survival_function_
 86 | 
 87 | #Plot the graph:
 88 | kmf.plot()
 89 | plt.title("The Kaplan-Meier Estimate")
 90 | plt.xlabel("Number of days")
 91 | plt.ylabel("Probability of survival")
 92 | 
 93 | #The median number of days:
 94 | print("The median survival time: ",kmf.median_survival_time_)
 95 | 
 96 | #Survival probability with confidence interval:
 97 | kmf.confidence_interval_survival_function_
 98 | 
 99 | #Plot survival function with confidence interval:
100 | confidence_surv_func = kmf.confidence_interval_survival_function_
101 | plt.plot(confidence_surv_func["KM_estimate_lower_0.95"],label="Lower")
102 | plt.plot(confidence_surv_func["KM_estimate_upper_0.95"],label="Upper")
103 | plt.title("Survival Function With Confidence Interval")
104 | plt.xlabel("Number of days")
105 | plt.ylabel("Survival Probability")
106 | plt.legend()
107 | 
108 | #Probabaility of a subject dying:
109 | #p(1022) = p(0) +......+p(1022)
110 | kmf.cumulative_density_
111 | 
112 | #Plot the cumulative density graph:
113 | kmf.plot_cumulative_density()
114 | plt.title("Cumulative Density Plot")
115 | plt.xlabel("Number of days")
116 | plt.ylabel("Probability of person's death")
117 | 
118 | #Cumulative density with confidence interval:
119 | kmf.confidence_interval_cumulative_density_
120 | 
121 | #Plot cumulative density with confidence interval:
122 | confidence_cumulative_density = kmf.confidence_interval_cumulative_density_
123 | plt.plot(kmf.confidence_interval_cumulative_density_["KM_estimate_lower_0.95"],label="Lower")
124 | plt.plot(kmf.confidence_interval_cumulative_density_["KM_estimate_upper_0.95"],label="Upper")
125 | plt.title("Cumulative Density With Confidence Interval")
126 | plt.xlabel("Number of days")
127 | plt.ylabel("Cumulative Density")
128 | plt.legend()
129 | 
130 | #Find cumulative density at a specific time:
131 | kmf.cumulative_density_at_times(times=1022)
132 | 
133 | #Conditional median time to event of interest:
134 | kmf.conditional_time_to_event_
135 | 
136 | #Conditional median time left for event:
137 | median_time_to_event = kmf.conditional_time_to_event_
138 | plt.plot(median_time_to_event,label="Median Time left")
139 | plt.title("Medain time to event")
140 | plt.xlabel("Total days")
141 | plt.ylabel("Conditional median time to event")
142 | plt.legend()
143 | 
144 | #Hazard function:
145 | from lifelines import NelsonAalenFitter
146 | 
147 | #Create an object of NelsonAalenFitter:
148 | naf = NelsonAalenFitter()
149 | 
150 | #Fit our data into the object:
151 | naf.fit(data["time"], event_observed=data["dead"])
152 | 
153 | #Print the cumulative hazard:
154 | naf.cumulative_hazard_
155 | 
156 | #Plot the cumulative hazard grpah:
157 | naf.plot_cumulative_hazard()
158 | plt.title("Cumulative Probability for Event of Interest")
159 | plt.xlabel("Number of days")
160 | plt.ylabel("Cumulative Probability of person's death")
161 | 
162 | #We can predict the value at a certain point :
163 | print("Time = 500 days: ",naf.predict(500))
164 | print("Time = 1022 days: ",naf.predict(1022))
165 | 
166 | #Cumulative hazard with confidence interval:
167 | naf.confidence_interval_
168 | 
169 | #Plot cumulative hazard with confidence interval:
170 | confidence_interval = naf.confidence_interval_
171 | plt.plot(confidence_interval["NA_estimate_lower_0.95"],label="Lower")
172 | plt.plot(confidence_interval["NA_estimate_upper_0.95"],label="Upper")
173 | plt.title("Cumulative hazard With Confidence Interval")
174 | plt.xlabel("Number of days")
175 | plt.ylabel("Cumulative hazard")
176 | plt.legend()
177 | 
178 | #Plot the cumulative_hazard and cumulative density:
179 | kmf.plot_cumulative_density(label="Cumulative Hazard")
180 | naf.plot_cumulative_hazard(label="Cumulative Density")
181 | plt.xlabel("Number of Days")
182 | 


--------------------------------------------------------------------------------
/survival_analysis_in_python/survival_analysis_2.py:
--------------------------------------------------------------------------------
  1 | #Import required libraries:
  2 | import pandas as pd
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from lifelines import KaplanMeierFitter
  6 | 
  7 | #Read the dataset:
  8 | data = pd.read_csv("lung.csv")
  9 | data.head()
 10 | 
 11 | #Organize our data:
 12 | #If status = 1 , then dead = 0
 13 | #If status = 2 , then dead = 1
 14 | data.loc[data.status == 1, 'dead'] = 0
 15 | data.loc[data.status == 2, 'dead'] = 1
 16 | data.head()
 17 | 
 18 | #Create two objects for groups:
 19 | #kmf_m for male data:
 20 | #kmf_f for female data:
 21 | kmf_m = KaplanMeierFitter() 
 22 | kmf_f = KaplanMeierFitter() 
 23 | 
 24 | #Dividing data into groups:
 25 | Male = data.query("sex == 1")
 26 | Female = data.query("sex == 2")
 27 | 
 28 | #View data of Male group:
 29 | Male.head()
 30 | 
 31 | #View data of Female group:
 32 | Female.head()
 33 | 
 34 | #Fit data into objects:
 35 | kmf_m.fit(durations =  Male["time"],event_observed = Male["dead"] ,label="Male")
 36 | kmf_f.fit(durations =  Female["time"],event_observed = Female["dead"], label="Female")
 37 | 
 38 | #Event table for male group:
 39 | kmf_m.event_table
 40 | 
 41 | #Event table for female group:
 42 | kmf_f.event_table
 43 | 
 44 | #Predict value based on time:
 45 | kmf_m.predict(11)
 46 | 
 47 | #Predict value based on time:
 48 | kmf_f.predict(11)
 49 | 
 50 | #Get complete data of survival function for male group:
 51 | kmf_m.survival_function_
 52 | 
 53 | #Get complete data of survival function for female group:
 54 | kmf_f.survival_function_
 55 | 
 56 | #Plot the survival_function data:
 57 | kmf_m.plot()
 58 | kmf_f.plot()
 59 | plt.xlabel("Days Passed")
 60 | plt.ylabel("Survival Probability")
 61 | plt.title("KMF")
 62 | 
 63 | #Cumulative density for male group:
 64 | kmf_m.cumulative_density_
 65 | 
 66 | #Cumulative density for female group:
 67 | kmf_f.cumulative_density_
 68 | 
 69 | #PLot the graph for cumulative density for both groups:
 70 | kmf_m.plot_cumulative_density()
 71 | kmf_f.plot_cumulative_density()
 72 | plt.title("Cumulative Density")
 73 | plt.xlabel("Number of days")
 74 | plt.ylabel("Probability")
 75 | 
 76 | #Hazard Function:
 77 | from lifelines import NelsonAalenFitter
 78 | 
 79 | #Fitting the data into objects:
 80 | naf_m = NelsonAalenFitter()
 81 | naf_f = NelsonAalenFitter()
 82 | naf_m.fit(Male["time"],event_observed = Male["dead"])
 83 | naf_f.fit(Female["time"],event_observed = Female["dead"])
 84 | 
 85 | #Cumulative hazard for male group:
 86 | naf_m.cumulative_hazard_
 87 | 
 88 | #Cumulative hazard for female group:
 89 | naf_f.cumulative_hazard_
 90 | 
 91 | #Plot the graph for cumulative hazard:
 92 | naf_m.plot_cumulative_hazard(label="Male")
 93 | naf_f.plot_cumulative_hazard(label="Female")
 94 | plt.title("Cumulative Hazard Plot")
 95 | plt.xlabel("Number of Days")
 96 | plt.ylabel("Cumulative Hazard")
 97 | 
 98 | #Conditional median time to event of interest:
 99 | kmf_m.conditional_time_to_event_
100 | 
101 | #Conditional median time left for event for male group:
102 | median_time_to_event = kmf_m.conditional_time_to_event_
103 | plt.plot(median_time_to_event,label="Median Time left")
104 | plt.title("Medain time to event")
105 | plt.xlabel("Total days")
106 | plt.ylabel("Conditional median time to event")
107 | plt.legend()
108 | 
109 | #Conditional median time to event of interest for female group:
110 | kmf_f.conditional_time_to_event_
111 | 
112 | #Conditional median time left for event for female group:
113 | median_time_to_event = kmf_f.conditional_time_to_event_
114 | plt.plot(median_time_to_event,label="Median Time left")
115 | plt.title("Medain time to event")
116 | plt.xlabel("Total days")
117 | plt.ylabel("Conditional median time to event")
118 | plt.legend()
119 | 
120 | #Survival probability with confidence interval for male group:
121 | kmf_m.confidence_interval_survival_function_
122 | 
123 | #Plot survival function with confidence interval for male group:
124 | confidence_surv_func = kmf_m.confidence_interval_survival_function_
125 | plt.plot(confidence_surv_func["Male_lower_0.95"],label="Lower")
126 | plt.plot(confidence_surv_func["Male_upper_0.95"],label="Upper")
127 | plt.title("Survival Function With Confidence Interval")
128 | plt.xlabel("Number of days")
129 | plt.ylabel("Survival Probability")
130 | plt.legend()
131 | 
132 | #Survival probability with confidence interval for female group:
133 | kmf_f.confidence_interval_survival_function_
134 | 
135 | #Plot survival function with confidence interval for female group:
136 | confidence_surv_func = kmf_f.confidence_interval_survival_function_
137 | plt.plot(confidence_surv_func["Female_lower_0.95"],label="Lower")
138 | plt.plot(confidence_surv_func["Female_upper_0.95"],label="Upper")
139 | plt.title("Survival Function With Confidence Interval")
140 | plt.xlabel("Number of days")
141 | plt.ylabel("Survival Probability")
142 | plt.legend()
143 | 
144 | #Plot the cumulative_hazard and cumulative density:
145 | kmf_m.plot_cumulative_density(label="Male Density")
146 | naf_m.plot_cumulative_hazard(label="Male Hazard")
147 | plt.xlabel("Number of Days")
148 | 
149 | #Plot the cumulative_hazard and cumulative density:
150 | kmf_f.plot_cumulative_density(label="Female Density")
151 | naf_f.plot_cumulative_hazard(label="Female Hazard")
152 | plt.xlabel("Number of Days")
153 | 
154 | #Define variables for log-rank test:
155 | Time_A = Male['time']
156 | Event_A = Male['dead']
157 | 
158 | Time_B = Female['time']
159 | Event_B = Female['dead']
160 | 
161 | #Performing the Log-Rank test:
162 | from lifelines.statistics import logrank_test
163 | 
164 | results = logrank_test(Time_A, Time_B, event_observed_A=Event_A, event_observed_B=Event_B)
165 | results.print_summary()
166 | 
167 | #Print the P-value:
168 | print("P-value :",results.p_value)
169 | 


--------------------------------------------------------------------------------
/survival_analysis_in_python/survival_analysis_3.py:
--------------------------------------------------------------------------------
 1 | #Import required libraries:
 2 | import pandas as pd
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | from lifelines import KaplanMeierFitter
 6 | from lifelines import CoxPHFitter
 7 | 
 8 | #Read the data file:
 9 | data = pd.read_csv("lung.csv")
10 | data =  data.drop(["Unnamed: 0"],axis=1)
11 | data.head()
12 | 
13 | #Columns of dataset:
14 | data.columns
15 | 
16 | #Drop rows with null values:
17 | data= data.dropna(subset=['inst', 'time', 'status', 'age', 'sex','ph.ecog','ph.karno', 'pat.karno', 'meal.cal', 'wt.loss'])
18 | data.head()
19 | 
20 | #Create an object:
21 | kmf = KaplanMeierFitter() 
22 | 
23 | #Organize the data:
24 | data.loc[data.status == 1, 'dead'] = 0
25 | data.loc[data.status == 2, 'dead'] = 1
26 | data.head()
27 | 
28 | #Fit data into our object:
29 | kmf.fit(durations =  data["time"], event_observed = data["dead"])
30 | 
31 | #Get the event table:
32 | kmf.event_table
33 | 
34 | #Get required columns from the data:
35 | data = data[[ 'time', 'age', 'sex', 'ph.ecog','ph.karno','pat.karno', 'meal.cal', 'wt.loss', 'dead']]
36 |              
37 | #Get the summary using CoxPHFitter:
38 | cph = CoxPHFitter()
39 | cph.fit(data,"time",event_col="dead")
40 | cph.print_summary()
41 | 
42 | #Plot the result on graph:
43 | cph.plot()
44 | 
45 | data.iloc[10:15,:]
46 | 
47 | #Plotting the data:
48 | d_data = data.iloc[10:15,:]
49 | cph.predict_survival_function(d_data).plot()
50 | 


--------------------------------------------------------------------------------
/what-is-a-gpu/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files\Google\Drive\googledrivesync.exe
4 | IconIndex=16
5 |     


--------------------------------------------------------------------------------
/what-is-a-gpu/script.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | # What is a GPU? Are GPUs Needed for Deep Learning?
  4 | 
  5 | * Tutorial: https://news.towardsai.net/gpu
  6 | * Github: https://github.com/towardsai/tutorials/tree/master/what-is-a-gpu
  7 | 
  8 | **<h2>GPU Usage on Your Local Computer / Google Colab</h2>**
  9 | 
 10 | In this notebook you will connect to a GPU, and then run some basic TensorFlow operations on both the CPU and a GPU, observing the speedup provided by using the GPU.
 11 | 
 12 | #### Enabling and testing the GPU
 13 | 
 14 | First, you'll need to enable GPUs for the notebook:
 15 | 
 16 | 1.  Navigate to Edit→Notebook Settings
 17 | 2.  Select GPU from the Hardware Accelerator drop-down
 18 | 
 19 | ### Installing TensorFlow GPU
 20 | 
 21 | As a first step, we need to install *tensorflow-gpu*.
 22 | 
 23 | If you are going to install it on your computer, you should follow these steps.
 24 | 
 25 | As a result of this code, you will have the latest version tensorflow gpu.
 26 | """
 27 | 
 28 | pip install tensorflow-gpu
 29 | 
 30 | # Commented out IPython magic to ensure Python compatibility.
 31 | # %tensorflow_version 2.x
 32 | import tensorflow as tf
 33 | device_name = tf.test.gpu_device_name()
 34 | if device_name != '/device:GPU:0':
 35 |   raise SystemError('GPU device not found')
 36 | print('Found GPU at: {}'.format(device_name))
 37 | 
 38 | """If the TensorFlow version you want to use is specific, install it by entering the version name."""
 39 | 
 40 | pip install tensorflow-gpu==1.15.0
 41 | 
 42 | # Commented out IPython magic to ensure Python compatibility.
 43 | # %tensorflow_version 1.x
 44 | import tensorflow as tf
 45 | device_name = tf.test.gpu_device_name()
 46 | if device_name != '/device:GPU:0':
 47 |   raise SystemError('GPU device not found')
 48 | print('Found GPU at: {}'.format(device_name))
 49 | 
 50 | """Configuration Specific GPU on TensorFlow"""
 51 | 
 52 | import tensorflow as tf
 53 | try:
 54 |     tf.device('/job:localhost/replica:0/task:0/device:GPU:1')
 55 | except RuntimeError as e:
 56 |   print(e)
 57 | 
 58 | """### Checking installed TensorFlow GPU version"""
 59 | 
 60 | pip show tensorflow-gpu
 61 | 
 62 | """As you can see, the latest version of TensorFlow has been installed. If you want to use a specific version distribution, it is necessary to install with the version name.
 63 | 
 64 | If you are also getting warnings as above, it is because of:
 65 | - Other libraries that came with the last version of tensorflow-gpu that we installed before are not uninstall, so they have version conflicts with the newly installed version. Decide on the version you want to use and use only that version distribution.
 66 | 
 67 | To check the new TensorFlow version installed, work again with the command.
 68 | ```
 69 | pip show packagename
 70 | ```
 71 | 
 72 | ### Listing Eligible CPU and GPU Devices
 73 | 
 74 | Next, we are at the step of showing all possible devices that can be used.
 75 | """
 76 | 
 77 | from tensorflow.python.client import device_lib
 78 | device_lib.list_local_devices()
 79 | 
 80 | """4 devices are shown in the list here. 2 of them are a concept excluding CPU and GPU.
 81 | 
 82 | As mentioned in the docs, XLA stands for "accelerated linear algebra". It's Tensorflow's relatively new optimizing compiler that can further speed up your ML models' GPU operations by combining what used to be multiple CUDA kernels into one (simplifying because this isn't that important for your question).
 83 | 
 84 | In the next step, the default device name used will be listed.
 85 | """
 86 | 
 87 | import tensorflow as tf
 88 | tf.test.gpu_device_name()
 89 | 
 90 | """### Speed Comparison in Model Tutorials for GPU and CPU"""
 91 | 
 92 | import tensorflow as tf
 93 | mnist = tf.keras.datasets.fashion_mnist
 94 | (training_images, training_labels), (test_images, test_labels) = mnist.load_data()
 95 | training_images=training_images / 255.0
 96 | test_images=test_images / 255.0
 97 | model = tf.keras.models.Sequential([
 98 |   tf.keras.layers.Flatten(),
 99 |   tf.keras.layers.Dense(128, activation=tf.nn.relu),
100 |   tf.keras.layers.Dense(10, activation=tf.nn.softmax)
101 | ])
102 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
103 | model.fit(training_images, training_labels, epochs=5)
104 | 
105 | test_loss = model.evaluate(test_images, test_labels)
106 | 
107 | """As you can see, ETA times are very short. One minute to train almost this amount of data! Now, by following the steps below, let's choose the hardware CPU for the runtime and see how we will experience a difference in speed.
108 | 
109 | 
110 | ---
111 | 
112 | You'll need to enable CPUs for the notebook:
113 | 
114 | 
115 | 1.  Navigate to Edit→Notebook Settings
116 | 2.  Select None from the Hardware Accelerator drop-down
117 | """
118 | 
119 | import tensorflow as tf
120 | tf.test.gpu_device_name()
121 | 
122 | """And now when we test whether it is using GPU or not, we see that the value of None comes out. After making sure we are using a CPU, we can provide the training for this as well."""
123 | 
124 | import tensorflow as tf
125 | mnist = tf.keras.datasets.fashion_mnist
126 | (training_images, training_labels), (test_images, test_labels) = mnist.load_data()
127 | training_images=training_images / 255.0
128 | test_images=test_images / 255.0
129 | model = tf.keras.models.Sequential([
130 |   tf.keras.layers.Flatten(),
131 |   tf.keras.layers.Dense(128, activation=tf.nn.relu),
132 |   tf.keras.layers.Dense(10, activation=tf.nn.softmax)
133 | ])
134 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
135 | model.fit(training_images, training_labels, epochs=5)
136 | 
137 | test_loss = model.evaluate(test_images, test_labels)
138 | 
139 | """Here, as the number of data increases and the problem becomes more intense, the difference between training will become much wider.
140 | 
141 | Since there is not a very large data set in this example line of code, there are no big differences between CPU and GPU when processing data. However, there will be a significant difference when processing big data.
142 | 
143 | 
144 | ---
145 | 
146 | # A sample GPU setup on the local computer
147 | 
148 | If you want to learn the features of your graphics card, you can learn the features of your graphics card by typing **dxdiag**. Or instead, you can run the command below on your **computer's terminal**.
149 | 
150 | ### Controlling graphic card name
151 | """
152 | 
153 | wmic path win32_VideoController get name
154 | 
155 | """Since I do not want to work in the base area of the machine, I create a virtual environment and I do this with Conda. You can also use Mini Conda if you wish.
156 | 
157 | ### Creating virtual environment
158 | """
159 | 
160 | conda create -n virtualenv python=3.6
161 | conda activate virtualenv
162 | 
163 | """### TensorFlow GPU Installation
164 | 
165 | To use GPU with TensorFlow, it is necessary to install the tensorflow-gpu library. If loading with conda, the appropriate CUDA and cuDNN versions will also be displayed during the process.
166 | """
167 | 
168 | conda install tensorflow-gpu==1.15.0
169 | #pip install tensorflow-gpu==1.15.0
170 | 
171 | """After all these stages, TensorFlow GPU must be installed. If you wish, you can control the terminal with the following commands."""
172 | 
173 | import tensorflow as tf
174 | sess=tf.Session(config=tf.ConfigProto(log_device_placement=True))
175 | 
176 | """### Keras Installation"""
177 | 
178 | pip install keras==2.2.5
179 | 


--------------------------------------------------------------------------------