├── .gitignore ├── README.md ├── data_preprocessing_template.py ├── histogram.py ├── linear_regression.py ├── percentile.py ├── scatter.py └── udemy ├── Machine Learning A-Z ├── .DS_Store ├── Part 0 - Welcome to Machine Learning A-Z │ └── .DS_Store ├── Part 1 - Data Preprocessing │ ├── .DS_Store │ ├── Data.csv │ ├── categorical-data.py │ ├── categorical_data.R │ ├── categorical_data.py │ ├── data_preprocessing_template.R │ ├── data_preprocessing_template.py │ ├── missing-data.py │ ├── missing_data.R │ └── missing_data.py ├── Part 2 - Regression │ ├── Decision_Tree_Regression │ │ ├── .DS_Store │ │ ├── Position_Salaries.csv │ │ ├── decision_tree_regression.R │ │ ├── decision_tree_regression.py │ │ ├── regression_template.R │ │ └── regression_template.py │ ├── Multiple Linear Regression │ │ ├── .DS_Store │ │ ├── 50_Startups.csv │ │ ├── Homework_Solutions │ │ │ ├── 50_Startups.csv │ │ │ ├── data_preprocessing_template.R │ │ │ ├── data_preprocessing_template.py │ │ │ ├── multiple_linear_regression.R │ │ │ └── multiple_linear_regression.py │ │ ├── VL2019205000898_DA.pdf │ │ ├── data_preprocessing_template.R │ │ ├── data_preprocessing_template.py │ │ ├── multiple_linear_regression.R │ │ └── multiple_linear_regression.py │ ├── Polynomial_Regression │ │ ├── .DS_Store │ │ ├── Position_Salaries.csv │ │ ├── Regression_Template │ │ │ ├── .DS_Store │ │ │ ├── regression_template.R │ │ │ └── regression_template.py │ │ ├── data_preprocessing_template.R │ │ ├── data_preprocessing_template.py │ │ ├── polynomial_regression-updated.py │ │ ├── polynomial_regression.R │ │ └── polynomial_regression.py │ ├── Random Forest Regression │ │ ├── .DS_Store │ │ ├── Position_Salaries.csv │ │ ├── random_forest_regression.R │ │ ├── random_forest_regression.py │ │ ├── regression_template.R │ │ └── regression_template.py │ ├── Regression-Pros-Cons.pdf │ ├── Simple_Linear_Regression │ │ ├── Salary_Data.csv │ │ ├── data_preprocessing_template.R │ │ ├── data_preprocessing_template.py │ │ ├── simple_linear_regression.R │ │ └── simple_linear_regression.py │ └── Support Vector Regression (SVR) │ │ ├── Position_Salaries.csv │ │ ├── regression_template.R │ │ ├── regression_template.py │ │ ├── svr.R │ │ └── svr.py ├── Part 3 - Classification │ ├── Section 14 - Logistic Regression │ │ ├── Classification_Template │ │ │ ├── .DS_Store │ │ │ ├── classification_template.R │ │ │ └── classification_template.py │ │ └── Logistic_Regression │ │ │ ├── Social_Network_Ads.csv │ │ │ ├── logistic_regression.R │ │ │ └── logistic_regression.py │ ├── Section 15 - K-Nearest Neighbors (K-NN) │ │ └── K_Nearest_Neighbors │ │ │ ├── .DS_Store │ │ │ ├── Social_Network_Ads.csv │ │ │ ├── classification_template.R │ │ │ ├── classification_template.py │ │ │ ├── knn.R │ │ │ └── knn.py │ ├── Section 16 - Support Vector Machine (SVM) │ │ └── SVM │ │ │ ├── .DS_Store │ │ │ ├── Social_Network_Ads.csv │ │ │ ├── classification_template.R │ │ │ ├── classification_template.py │ │ │ ├── svm.R │ │ │ └── svm.py │ ├── Section 17 - Kernel SVM │ │ └── Kernel_SVM │ │ │ ├── .DS_Store │ │ │ ├── Social_Network_Ads.csv │ │ │ ├── classification_template.R │ │ │ ├── classification_template.py │ │ │ ├── kernel_svm.R │ │ │ └── kernel_svm.py │ ├── Section 18 - Naive Bayes │ │ └── Naive_Bayes │ │ │ ├── .DS_Store │ │ │ ├── Social_Network_Ads.csv │ │ │ ├── classification_template.R │ │ │ ├── classification_template.py │ │ │ ├── naive_bayes.R │ │ │ └── naive_bayes.py │ ├── Section 19 - Decision Tree Classification │ │ └── Decision_Tree_Classification │ │ │ ├── .DS_Store │ │ │ ├── Social_Network_Ads.csv │ │ │ ├── classification_template.R │ │ │ ├── classification_template.py │ │ │ ├── decision_tree_classification.R │ │ │ └── decision_tree_classification.py │ └── Section 20 - Random Forest Classification │ │ └── Random_Forest_Classification │ │ ├── .DS_Store │ │ ├── Social_Network_Ads.csv │ │ ├── classification_template.R │ │ ├── classification_template.py │ │ ├── random_forest_classification.R │ │ └── random_forest_classification.py └── Part 4 - Clustering │ ├── Section 24 - K-Means Clustering │ └── K_Means │ │ ├── .DS_Store │ │ ├── Mall_Customers.csv │ │ ├── data_preprocessing_template.R │ │ ├── data_preprocessing_template.py │ │ ├── kmeans.R │ │ └── kmeans.py │ └── Section 25 - Hierarchical Clustering │ └── Hierarchical_Clustering │ ├── .DS_Store │ ├── Mall_Customers.csv │ ├── data_preprocessing_template.R │ ├── data_preprocessing_template.py │ ├── hc.R │ └── hc.py └── QnA.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine-learning 2 | Beginning 3 | -------------------------------------------------------------------------------- /data_preprocessing_template.py: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Data.csv') 10 | X = dataset.iloc[:, :-1].values 11 | y = dataset.iloc[:, 3].values 12 | 13 | """# Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | from sklearn.preprocessing import Imputer 25 | imputer=Imputer(missing_values = 'NaN',strategy = 'mean',axis = 0 ) 26 | imputer.fit(X[: , 1:3 ]) 27 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 28 | -------------------------------------------------------------------------------- /histogram.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import matplotlib.pyplot as plt 3 | 4 | x = numpy.random.uniform(0.0, 5.0, 250) 5 | 6 | plt.hist(x, 5) 7 | plt.show() 8 | -------------------------------------------------------------------------------- /linear_regression.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from scipy import stats 3 | 4 | x = [5,7,8,7,2,17,2,9,4,11,12,9,6] 5 | y = [99,86,87,88,111,86,103,87,94,78,77,85,86] 6 | 7 | slope, intercept, r, p, std_err = stats.linregress(x, y) 8 | 9 | def myfunc(x): 10 | return slope * x + intercept 11 | 12 | mymodel = list(map(myfunc, x)) 13 | 14 | plt.scatter(x, y) 15 | plt.plot(x, mymodel) 16 | plt.show() 17 | 18 | speed = myfunc(10) 19 | 20 | 21 | print(speed) 22 | -------------------------------------------------------------------------------- /percentile.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | ages = [5,31,43,48,50,41,7,11,15,39,80,82,32,2,8,6,25,36,27,61,31] 4 | 5 | x = numpy.percentile(ages, 75) 6 | 7 | print(x) 8 | -------------------------------------------------------------------------------- /scatter.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | x = [5,7,8,7,2,17,2,9,4,11,12,9,6] 4 | y = [99,86,87,88,111,86,103,87,94,78,77,85,86] 5 | 6 | plt.scatter(x, y) 7 | plt.show() 8 | -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 0 - Welcome to Machine Learning A-Z/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 0 - Welcome to Machine Learning A-Z/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 1 - Data Preprocessing/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 1 - Data Preprocessing/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 1 - Data Preprocessing/Data.csv: -------------------------------------------------------------------------------- 1 | Country,Age,Salary,Purchased 2 | France,44,72000,No 3 | Spain,27,48000,Yes 4 | Germany,30,54000,No 5 | Spain,38,61000,No 6 | Germany,40,,Yes 7 | France,35,58000,Yes 8 | Spain,,52000,No 9 | France,48,79000,Yes 10 | Germany,50,83000,No 11 | France,37,67000,Yes -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 1 - Data Preprocessing/categorical-data.py: -------------------------------------------------------------------------------- 1 | # Data Preprocessing 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Data.csv') 10 | X = dataset.iloc[:, :-1].values 11 | y = dataset.iloc[:, 3].values 12 | 13 | # Taking care of missing data 14 | # Updated Imputer 15 | from sklearn.impute import SimpleImputer 16 | missingvalues = SimpleImputer(missing_values = np.nan, strategy = 'mean', verbose = 0) 17 | missingvalues = missingvalues.fit(X[:, 1:3]) 18 | X[:, 1:3]=missingvalues.transform(X[:, 1:3]) 19 | 20 | 21 | # Encoding categorical data 22 | # Encoding the Independent Variable 23 | from sklearn.preprocessing import OneHotEncoder 24 | from sklearn.compose import ColumnTransformer 25 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 26 | #labelencoder_X = LabelEncoder() 27 | #X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) 28 | #onehotencoder = OneHotEncoder(categorical_features = [0]) 29 | #X = onehotencoder.fit_transform(X).toarray() 30 | # Encoding the Dependent Variable 31 | #labelencoder_y = LabelEncoder() 32 | #y = labelencoder_y.fit_transform(y) 33 | 34 | ct = ColumnTransformer([('encoder', OneHotEncoder(), [0])], remainder='passthrough') 35 | X = np.array(ct.fit_transform(X), dtype=np.float) 36 | # Encoding Y data 37 | from sklearn.preprocessing import LabelEncoder 38 | y = LabelEncoder().fit_transform(y) 39 | # Splitting the dataset into the Training set and Test set 40 | from sklearn.model_selection import train_test_split 41 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 42 | # Feature Scaling 43 | from sklearn.preprocessing import StandardScaler 44 | sc_X = StandardScaler() 45 | X_train = sc_X.fit_transform(X_train) 46 | X_test = sc_X.transform(X_test) 47 | sc_y = StandardScaler() 48 | y_train = sc_y.fit_transform(y_train.reshape(-1,1)) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 1 - Data Preprocessing/categorical_data.R: -------------------------------------------------------------------------------- 1 | # Data Preprocessing 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Data.csv') 5 | 6 | # Taking care of missing data 7 | dataset$Age = ifelse(is.na(dataset$Age), 8 | ave(dataset$Age, FUN = function(x) mean(x, na.rm = TRUE)), 9 | dataset$Age) 10 | dataset$Salary = ifelse(is.na(dataset$Salary), 11 | ave(dataset$Salary, FUN = function(x) mean(x, na.rm = TRUE)), 12 | dataset$Salary) 13 | 14 | # Encoding categorical data 15 | dataset$Country = factor(dataset$Country, 16 | levels = c('France', 'Spain', 'Germany'), 17 | labels = c(1, 2, 3)) 18 | dataset$Purchased = factor(dataset$Purchased, 19 | levels = c('No', 'Yes'), 20 | labels = c(0, 1)) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 1 - Data Preprocessing/categorical_data.py: -------------------------------------------------------------------------------- 1 | # Data Preprocessing 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Data.csv') 10 | X = dataset.iloc[:, :-1].values 11 | y = dataset.iloc[:, 3].values 12 | 13 | # Taking care of missing data 14 | from sklearn.preprocessing import Imputer 15 | imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0) 16 | imputer = imputer.fit(X[:, 1:3]) 17 | X[:, 1:3] = imputer.transform(X[:, 1:3]) 18 | 19 | # Encoding categorical data 20 | # Encoding the Independent Variable 21 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 22 | labelencoder_X = LabelEncoder() 23 | X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) 24 | onehotencoder = OneHotEncoder(categorical_features = [0]) 25 | X = onehotencoder.fit_transform(X).toarray() 26 | # Encoding the Dependent Variable 27 | labelencoder_y = LabelEncoder() 28 | y = labelencoder_y.fit_transform(y) 29 | #splitting training set and test set 30 | from sklearn.model_selection import train_test_split 31 | X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2 , random_state =1) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 1 - Data Preprocessing/data_preprocessing_template.R: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Data.csv') 5 | 6 | # Splitting the dataset into the Training set and Test set 7 | # install.packages('caTools') 8 | library(caTools) 9 | set.seed(123) 10 | split = sample.split(dataset$DependentVariable, SplitRatio = 0.8) 11 | training_set = subset(dataset, split == TRUE) 12 | test_set = subset(dataset, split == FALSE) 13 | 14 | # Feature Scaling 15 | # training_set = scale(training_set) 16 | # test_set = scale(test_set) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 1 - Data Preprocessing/data_preprocessing_template.py: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Data.csv') 10 | X = dataset.iloc[:, :-1].values 11 | y = dataset.iloc[:, 3].values 12 | 13 | """# Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | from sklearn.preprocessing import Imputer 25 | imputer=Imputer(missing_values = 'NaN',strategy = 'mean',axis = 0 ) 26 | imputer.fit(X[: , 1:3 ]) 27 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 28 | -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 1 - Data Preprocessing/missing-data.py: -------------------------------------------------------------------------------- 1 | # Data Preprocessing 2 | 3 | # Importing the libraries 4 | #import numpy as np 5 | #import matplotlib.pyplot as plt 6 | #import pandas as pd 7 | 8 | # Importing the dataset 9 | #dataset = pd.read_csv('Data.csv') 10 | #X = dataset.iloc[:, :-1].values 11 | #y = dataset.iloc[:, 3].values 12 | 13 | # Taking care of missing data 14 | #from sklearn.preprocessing import Imputer 15 | #imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0) 16 | #imputer = imputer.fit(X[:, 1:3]) 17 | #X[:, 1:3] = imputer.transform(X[:, 1:3]) 18 | 19 | # Data Preprocessing 20 | 21 | # Importing the libraries 22 | import numpy as np 23 | import matplotlib.pyplot as plt 24 | import pandas as pd 25 | 26 | # Importing the dataset 27 | dataset = pd.read_csv('Data.csv') 28 | X = dataset.iloc[:, :-1].values 29 | y = dataset.iloc[:, 3].values 30 | 31 | # Taking care of missing data 32 | # Updated Imputer 33 | from sklearn.impute import SimpleImputer 34 | missingvalues = SimpleImputer(missing_values = np.nan, strategy = 'mean', verbose = 0) 35 | missingvalues = missingvalues.fit(X[:, 1:3]) 36 | X[:, 1:3]=missingvalues.transform(X[:, 1:3]) 37 | -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 1 - Data Preprocessing/missing_data.R: -------------------------------------------------------------------------------- 1 | # Data Preprocessing 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Data.csv') 5 | 6 | # Taking care of missing data 7 | dataset$Age = ifelse(is.na(dataset$Age), 8 | ave(dataset$Age, FUN = function(x) mean(x, na.rm = TRUE)), 9 | dataset$Age) 10 | dataset$Salary = ifelse(is.na(dataset$Salary), 11 | ave(dataset$Salary, FUN = function(x) mean(x, na.rm = TRUE)), 12 | dataset$Salary) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 1 - Data Preprocessing/missing_data.py: -------------------------------------------------------------------------------- 1 | # Data Preprocessing 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Data.csv') 10 | X = dataset.iloc[:, :-1].values 11 | y = dataset.iloc[:, 3].values 12 | 13 | # Taking care of missing data 14 | from sklearn.preprocessing import Imputer 15 | imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0) 16 | imputer = imputer.fit(X[:, 1:3]) 17 | X[:, 1:3] = imputer.transform(X[:, 1:3]) 18 | from sklearn.preprocessing import LabelEncoder 19 | labelencoder_X = LabelEncoder() 20 | X[:, 0]=labelencoder_X.fit_transform(X[:0]) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Decision_Tree_Regression/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 2 - Regression/Decision_Tree_Regression/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Decision_Tree_Regression/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Decision_Tree_Regression/decision_tree_regression.R: -------------------------------------------------------------------------------- 1 | # Decision Tree Regression 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Position_Salaries.csv') 5 | dataset = dataset[2:3] 6 | 7 | # Splitting the dataset into the Training set and Test set 8 | # # install.packages('caTools') 9 | # library(caTools) 10 | # set.seed(123) 11 | # split = sample.split(dataset$Salary, SplitRatio = 2/3) 12 | # training_set = subset(dataset, split == TRUE) 13 | # test_set = subset(dataset, split == FALSE) 14 | 15 | # Feature Scaling 16 | # training_set = scale(training_set) 17 | # test_set = scale(test_set) 18 | 19 | # Fitting Decision Tree Regression to the dataset 20 | # install.packages('rpart') 21 | library(rpart) 22 | regressor = rpart(formula = Salary ~ ., 23 | data = dataset, 24 | control = rpart.control(minsplit = 1)) 25 | 26 | # Predicting a new result with Decision Tree Regression 27 | y_pred = predict(regressor, data.frame(Level = 6.5)) 28 | 29 | # Visualising the Decision Tree Regression results (higher resolution) 30 | # install.packages('ggplot2') 31 | library(ggplot2) 32 | x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01) 33 | ggplot() + 34 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 35 | colour = 'red') + 36 | geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), 37 | colour = 'blue') + 38 | ggtitle('Truth or Bluff (Decision Tree Regression)') + 39 | xlab('Level') + 40 | ylab('Salary') 41 | 42 | # Plotting the tree 43 | plot(regressor) 44 | text(regressor) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Decision_Tree_Regression/decision_tree_regression.py: -------------------------------------------------------------------------------- 1 | # Decision Tree Regression 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Position_Salaries.csv') 10 | X = dataset.iloc[:, 1:2].values 11 | y = dataset.iloc[:, 2].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | """from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | 25 | # Fitting Decision Tree Regression to the dataset 26 | from sklearn.tree import DecisionTreeRegressor 27 | regressor = DecisionTreeRegressor(random_state = 0) 28 | regressor.fit(X, y) 29 | 30 | # Predicting a new result 31 | y_pred = regressor.predict(6.5) 32 | 33 | # Visualising the Decision Tree Regression results (higher resolution) 34 | X_grid = np.arange(min(X), max(X), 0.01) 35 | X_grid = X_grid.reshape((len(X_grid), 1)) 36 | plt.scatter(X, y, color = 'red') 37 | plt.plot(X_grid, regressor.predict(X_grid), color = 'blue') 38 | plt.title('Truth or Bluff (Decision Tree Regression)') 39 | plt.xlabel('Position level') 40 | plt.ylabel('Salary') 41 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Decision_Tree_Regression/regression_template.R: -------------------------------------------------------------------------------- 1 | # Regression Template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Position_Salaries.csv') 5 | dataset = dataset[2:3] 6 | 7 | # Splitting the dataset into the Training set and Test set 8 | # # install.packages('caTools') 9 | # library(caTools) 10 | # set.seed(123) 11 | # split = sample.split(dataset$Salary, SplitRatio = 2/3) 12 | # training_set = subset(dataset, split == TRUE) 13 | # test_set = subset(dataset, split == FALSE) 14 | 15 | # Feature Scaling 16 | # training_set = scale(training_set) 17 | # test_set = scale(test_set) 18 | 19 | # Fitting the Regression Model to the dataset 20 | # Create your regressor here 21 | 22 | # Predicting a new result 23 | y_pred = predict(regressor, data.frame(Level = 6.5)) 24 | 25 | # Visualising the Regression Model results 26 | # install.packages('ggplot2') 27 | library(ggplot2) 28 | ggplot() + 29 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 30 | colour = 'red') + 31 | geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)), 32 | colour = 'blue') + 33 | ggtitle('Truth or Bluff (Regression Model)') + 34 | xlab('Level') + 35 | ylab('Salary') 36 | 37 | # Visualising the Regression Model results (for higher resolution and smoother curve) 38 | # install.packages('ggplot2') 39 | library(ggplot2) 40 | x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1) 41 | ggplot() + 42 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 43 | colour = 'red') + 44 | geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), 45 | colour = 'blue') + 46 | ggtitle('Truth or Bluff (Regression Model)') + 47 | xlab('Level') + 48 | ylab('Salary') -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Decision_Tree_Regression/regression_template.py: -------------------------------------------------------------------------------- 1 | # Regression Template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Position_Salaries.csv') 10 | X = dataset.iloc[:, 1:2].values 11 | y = dataset.iloc[:, 2].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | """from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | 25 | # Fitting the Regression Model to the dataset 26 | from sklearn.tree import DecisionTreeRegressor 27 | regressor = DecisionTreeRegressor(random_state = 0) 28 | regressor.fit(X,y) 29 | # Predicting a new result 30 | y_pred = regressor.predict(np.array([6.5]).reshape(1, 1)) 31 | 32 | # Visualising the Regression results 33 | plt.scatter(X, y, color = 'red') 34 | plt.plot(X, regressor.predict(X), color = 'blue') 35 | plt.title('Truth or Bluff (Regression Model)') 36 | plt.xlabel('Position level') 37 | plt.ylabel('Salary') 38 | plt.show() 39 | 40 | # Visualising the Regression results (for higher resolution and smoother curve) 41 | X_grid = np.arange(min(X), max(X), 0.1) 42 | X_grid = X_grid.reshape((len(X_grid), 1)) 43 | plt.scatter(X, y, color = 'red') 44 | plt.plot(X_grid, regressor.predict(X_grid), color = 'blue') 45 | plt.title('Truth or Bluff (Regression Model)') 46 | plt.xlabel('Position level') 47 | plt.ylabel('Salary') 48 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/50_Startups.csv: -------------------------------------------------------------------------------- 1 | R&D Spend,Administration,Marketing Spend,State,Profit 2 | 165349.2,136897.8,471784.1,New York,192261.83 3 | 162597.7,151377.59,443898.53,California,191792.06 4 | 153441.51,101145.55,407934.54,Florida,191050.39 5 | 144372.41,118671.85,383199.62,New York,182901.99 6 | 142107.34,91391.77,366168.42,Florida,166187.94 7 | 131876.9,99814.71,362861.36,New York,156991.12 8 | 134615.46,147198.87,127716.82,California,156122.51 9 | 130298.13,145530.06,323876.68,Florida,155752.6 10 | 120542.52,148718.95,311613.29,New York,152211.77 11 | 123334.88,108679.17,304981.62,California,149759.96 12 | 101913.08,110594.11,229160.95,Florida,146121.95 13 | 100671.96,91790.61,249744.55,California,144259.4 14 | 93863.75,127320.38,249839.44,Florida,141585.52 15 | 91992.39,135495.07,252664.93,California,134307.35 16 | 119943.24,156547.42,256512.92,Florida,132602.65 17 | 114523.61,122616.84,261776.23,New York,129917.04 18 | 78013.11,121597.55,264346.06,California,126992.93 19 | 94657.16,145077.58,282574.31,New York,125370.37 20 | 91749.16,114175.79,294919.57,Florida,124266.9 21 | 86419.7,153514.11,0,New York,122776.86 22 | 76253.86,113867.3,298664.47,California,118474.03 23 | 78389.47,153773.43,299737.29,New York,111313.02 24 | 73994.56,122782.75,303319.26,Florida,110352.25 25 | 67532.53,105751.03,304768.73,Florida,108733.99 26 | 77044.01,99281.34,140574.81,New York,108552.04 27 | 64664.71,139553.16,137962.62,California,107404.34 28 | 75328.87,144135.98,134050.07,Florida,105733.54 29 | 72107.6,127864.55,353183.81,New York,105008.31 30 | 66051.52,182645.56,118148.2,Florida,103282.38 31 | 65605.48,153032.06,107138.38,New York,101004.64 32 | 61994.48,115641.28,91131.24,Florida,99937.59 33 | 61136.38,152701.92,88218.23,New York,97483.56 34 | 63408.86,129219.61,46085.25,California,97427.84 35 | 55493.95,103057.49,214634.81,Florida,96778.92 36 | 46426.07,157693.92,210797.67,California,96712.8 37 | 46014.02,85047.44,205517.64,New York,96479.51 38 | 28663.76,127056.21,201126.82,Florida,90708.19 39 | 44069.95,51283.14,197029.42,California,89949.14 40 | 20229.59,65947.93,185265.1,New York,81229.06 41 | 38558.51,82982.09,174999.3,California,81005.76 42 | 28754.33,118546.05,172795.67,California,78239.91 43 | 27892.92,84710.77,164470.71,Florida,77798.83 44 | 23640.93,96189.63,148001.11,California,71498.49 45 | 15505.73,127382.3,35534.17,New York,69758.98 46 | 22177.74,154806.14,28334.72,California,65200.33 47 | 1000.23,124153.04,1903.93,New York,64926.08 48 | 1315.46,115816.21,297114.46,Florida,49490.75 49 | 0,135426.92,0,California,42559.73 50 | 542.05,51743.15,0,New York,35673.41 51 | 0,116983.8,45173.06,California,14681.4 -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/Homework_Solutions/50_Startups.csv: -------------------------------------------------------------------------------- 1 | R&D Spend,Administration,Marketing Spend,State,Profit 2 | 165349.2,136897.8,471784.1,New York,192261.83 3 | 162597.7,151377.59,443898.53,California,191792.06 4 | 153441.51,101145.55,407934.54,Florida,191050.39 5 | 144372.41,118671.85,383199.62,New York,182901.99 6 | 142107.34,91391.77,366168.42,Florida,166187.94 7 | 131876.9,99814.71,362861.36,New York,156991.12 8 | 134615.46,147198.87,127716.82,California,156122.51 9 | 130298.13,145530.06,323876.68,Florida,155752.6 10 | 120542.52,148718.95,311613.29,New York,152211.77 11 | 123334.88,108679.17,304981.62,California,149759.96 12 | 101913.08,110594.11,229160.95,Florida,146121.95 13 | 100671.96,91790.61,249744.55,California,144259.4 14 | 93863.75,127320.38,249839.44,Florida,141585.52 15 | 91992.39,135495.07,252664.93,California,134307.35 16 | 119943.24,156547.42,256512.92,Florida,132602.65 17 | 114523.61,122616.84,261776.23,New York,129917.04 18 | 78013.11,121597.55,264346.06,California,126992.93 19 | 94657.16,145077.58,282574.31,New York,125370.37 20 | 91749.16,114175.79,294919.57,Florida,124266.9 21 | 86419.7,153514.11,0,New York,122776.86 22 | 76253.86,113867.3,298664.47,California,118474.03 23 | 78389.47,153773.43,299737.29,New York,111313.02 24 | 73994.56,122782.75,303319.26,Florida,110352.25 25 | 67532.53,105751.03,304768.73,Florida,108733.99 26 | 77044.01,99281.34,140574.81,New York,108552.04 27 | 64664.71,139553.16,137962.62,California,107404.34 28 | 75328.87,144135.98,134050.07,Florida,105733.54 29 | 72107.6,127864.55,353183.81,New York,105008.31 30 | 66051.52,182645.56,118148.2,Florida,103282.38 31 | 65605.48,153032.06,107138.38,New York,101004.64 32 | 61994.48,115641.28,91131.24,Florida,99937.59 33 | 61136.38,152701.92,88218.23,New York,97483.56 34 | 63408.86,129219.61,46085.25,California,97427.84 35 | 55493.95,103057.49,214634.81,Florida,96778.92 36 | 46426.07,157693.92,210797.67,California,96712.8 37 | 46014.02,85047.44,205517.64,New York,96479.51 38 | 28663.76,127056.21,201126.82,Florida,90708.19 39 | 44069.95,51283.14,197029.42,California,89949.14 40 | 20229.59,65947.93,185265.1,New York,81229.06 41 | 38558.51,82982.09,174999.3,California,81005.76 42 | 28754.33,118546.05,172795.67,California,78239.91 43 | 27892.92,84710.77,164470.71,Florida,77798.83 44 | 23640.93,96189.63,148001.11,California,71498.49 45 | 15505.73,127382.3,35534.17,New York,69758.98 46 | 22177.74,154806.14,28334.72,California,65200.33 47 | 1000.23,124153.04,1903.93,New York,64926.08 48 | 1315.46,115816.21,297114.46,Florida,49490.75 49 | 0,135426.92,0,California,42559.73 50 | 542.05,51743.15,0,New York,35673.41 51 | 0,116983.8,45173.06,California,14681.4 -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/Homework_Solutions/data_preprocessing_template.R: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Data.csv') 5 | 6 | # Splitting the dataset into the Training set and Test set 7 | # install.packages('caTools') 8 | library(caTools) 9 | set.seed(123) 10 | split = sample.split(dataset$DependentVariable, SplitRatio = 0.8) 11 | training_set = subset(dataset, split == TRUE) 12 | test_set = subset(dataset, split == FALSE) 13 | 14 | # Feature Scaling 15 | # training_set = scale(training_set) 16 | # test_set = scale(test_set) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/Homework_Solutions/data_preprocessing_template.py: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Data.csv') 10 | X = dataset.iloc[:, :-1].values 11 | y = dataset.iloc[:, 3].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/Homework_Solutions/multiple_linear_regression.R: -------------------------------------------------------------------------------- 1 | # Multiple Linear Regression 2 | 3 | # Importing the dataset 4 | dataset = read.csv('50_Startups.csv') 5 | 6 | # Encoding categorical data 7 | dataset$State = factor(dataset$State, 8 | levels = c('New York', 'California', 'Florida'), 9 | labels = c(1, 2, 3)) 10 | 11 | # Splitting the dataset into the Training set and Test set 12 | # install.packages('caTools') 13 | library(caTools) 14 | set.seed(123) 15 | split = sample.split(dataset$Profit, SplitRatio = 0.8) 16 | training_set = subset(dataset, split == TRUE) 17 | test_set = subset(dataset, split == FALSE) 18 | 19 | # Feature Scaling 20 | # training_set = scale(training_set) 21 | # test_set = scale(test_set) 22 | 23 | # Fitting Multiple Linear Regression to the Training set 24 | regressor = lm(formula = Profit ~ ., 25 | data = training_set) 26 | 27 | # Predicting the Test set results 28 | y_pred = predict(regressor, newdata = test_set) 29 | 30 | # Building the optimal model using Backward Elimination 31 | regressor = lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend + State, 32 | data = dataset) 33 | summary(regressor) 34 | # Optional Step: Remove State2 only (as opposed to removing State directly) 35 | # regressor = lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend + factor(State, exclude = 2), 36 | # data = dataset) 37 | # summary(regressor) 38 | regressor = lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend, 39 | data = dataset) 40 | summary(regressor) 41 | regressor = lm(formula = Profit ~ R.D.Spend + Marketing.Spend, 42 | data = dataset) 43 | summary(regressor) 44 | regressor = lm(formula = Profit ~ R.D.Spend, 45 | data = dataset) 46 | summary(regressor) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/Homework_Solutions/multiple_linear_regression.py: -------------------------------------------------------------------------------- 1 | # Multiple Linear Regression 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('50_Startups.csv') 10 | X = dataset.iloc[:, :-1].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Encoding categorical data 14 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 15 | labelencoder = LabelEncoder() 16 | X[:, 3] = labelencoder.fit_transform(X[:, 3]) 17 | onehotencoder = OneHotEncoder(categorical_features = [3]) 18 | X = onehotencoder.fit_transform(X).toarray() 19 | 20 | # Avoiding the Dummy Variable Trap 21 | X = X[:, 1:] 22 | 23 | # Splitting the dataset into the Training set and Test set 24 | from sklearn.cross_validation import train_test_split 25 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 26 | 27 | # Feature Scaling 28 | """from sklearn.preprocessing import StandardScaler 29 | sc_X = StandardScaler() 30 | X_train = sc_X.fit_transform(X_train) 31 | X_test = sc_X.transform(X_test) 32 | sc_y = StandardScaler() 33 | y_train = sc_y.fit_transform(y_train)""" 34 | 35 | # Fitting Multiple Linear Regression to the Training set 36 | from sklearn.linear_model import LinearRegression 37 | regressor = LinearRegression() 38 | regressor.fit(X_train, y_train) 39 | 40 | # Predicting the Test set results 41 | y_pred = regressor.predict(X_test) 42 | 43 | # Building the optimal model using Backward Elimination 44 | import statsmodels.formula.api as sm 45 | X = np.append(arr = np.ones((50, 1)).astype(int), values = X, axis = 1) 46 | X_opt = X[:, [0, 1, 2, 3, 4, 5]] 47 | regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() 48 | regressor_OLS.summary() 49 | X_opt = X[:, [0, 1, 3, 4, 5]] 50 | regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() 51 | regressor_OLS.summary() 52 | X_opt = X[:, [0, 3, 4, 5]] 53 | regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() 54 | regressor_OLS.summary() 55 | X_opt = X[:, [0, 3, 5]] 56 | regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() 57 | regressor_OLS.summary() 58 | X_opt = X[:, [0, 3]] 59 | regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() 60 | regressor_OLS.summary() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/VL2019205000898_DA.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/VL2019205000898_DA.pdf -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/data_preprocessing_template.R: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Data.csv') 5 | 6 | # Splitting the dataset into the Training set and Test set 7 | # install.packages('caTools') 8 | library(caTools) 9 | set.seed(123) 10 | split = sample.split(dataset$DependentVariable, SplitRatio = 0.8) 11 | training_set = subset(dataset, split == TRUE) 12 | test_set = subset(dataset, split == FALSE) 13 | 14 | # Feature Scaling 15 | # training_set = scale(training_set) 16 | # test_set = scale(test_set) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/data_preprocessing_template.py: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Data.csv') 10 | X = dataset.iloc[:, :-1].values 11 | y = dataset.iloc[:, 3].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/multiple_linear_regression.R: -------------------------------------------------------------------------------- 1 | # Multiple Linear Regression 2 | 3 | # Importing the dataset 4 | dataset = read.csv('50_Startups.csv') 5 | 6 | # Encoding categorical data 7 | dataset$State = factor(dataset$State, 8 | levels = c('New York', 'California', 'Florida'), 9 | labels = c(1, 2, 3)) 10 | 11 | # Splitting the dataset into the Training set and Test set 12 | # install.packages('caTools') 13 | library(caTools) 14 | set.seed(123) 15 | split = sample.split(dataset$Profit, SplitRatio = 0.8) 16 | training_set = subset(dataset, split == TRUE) 17 | test_set = subset(dataset, split == FALSE) 18 | 19 | # Feature Scaling 20 | # training_set = scale(training_set) 21 | # test_set = scale(test_set) 22 | 23 | # Fitting Multiple Linear Regression to the Training set 24 | regressor = lm(formula = Profit ~ ., 25 | data = training_set) 26 | 27 | # Predicting the Test set results 28 | y_pred = predict(regressor, newdata = test_set) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Multiple Linear Regression/multiple_linear_regression.py: -------------------------------------------------------------------------------- 1 | # Multiple Linear Regression 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('50_Startups.csv') 10 | X = dataset.iloc[:, :-1].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Encoding categorical data 14 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 15 | labelencoder = LabelEncoder() 16 | X[:, 3] = labelencoder.fit_transform(X[:, 3]) 17 | onehotencoder = OneHotEncoder(categorical_features = [3]) 18 | X = onehotencoder.fit_transform(X).toarray() 19 | 20 | # Avoiding the Dummy Variable Trap 21 | X = X[:, 1:] 22 | 23 | # Splitting the dataset into the Training set and Test set 24 | from sklearn.model_selection import train_test_split 25 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 26 | 27 | # Feature Scaling 28 | """from sklearn.preprocessing import StandardScaler 29 | sc_X = StandardScaler() 30 | X_train = sc_X.fit_transform(X_train) 31 | X_test = sc_X.transform(X_test) 32 | sc_y = StandardScaler() 33 | y_train = sc_y.fit_transform(y_train)""" 34 | 35 | # Fitting Multiple Linear Regression to the Training set 36 | from sklearn.linear_model import LinearRegression 37 | regressor = LinearRegression() 38 | regressor.fit(X_train, y_train) 39 | 40 | # Predicting the Test set results 41 | y_pred = regressor.predict(X_test) 42 | 43 | import statsmodels.formula.api as sm 44 | X= np.append(arr=np.ones((50,1)).astype(int), values=X,axis=1) 45 | import statsmodels.regression.linear_model as lm 46 | X_opt = X[:,[0,1,2,3,4,5]] 47 | regressor_OLS = lm.OLS(endog = y, exog = X_opt).fit() 48 | regressor_OLS.summary() 49 | X_opt = X[:,[0,1,2,4,5]] 50 | regressor_OLS = lm.OLS(endog = y, exog = X_opt).fit() 51 | regressor_OLS.summary() 52 | X_opt = X[:,[0,1,2,5]] 53 | regressor_OLS = lm.OLS(endog = y, exog = X_opt).fit() 54 | regressor_OLS.summary() 55 | 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Polynomial_Regression/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 2 - Regression/Polynomial_Regression/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Polynomial_Regression/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Polynomial_Regression/Regression_Template/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 2 - Regression/Polynomial_Regression/Regression_Template/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Polynomial_Regression/Regression_Template/regression_template.R: -------------------------------------------------------------------------------- 1 | # Regression Template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Position_Salaries.csv') 5 | dataset = dataset[2:3] 6 | 7 | # Splitting the dataset into the Training set and Test set 8 | # # install.packages('caTools') 9 | # library(caTools) 10 | # set.seed(123) 11 | # split = sample.split(dataset$Salary, SplitRatio = 2/3) 12 | # training_set = subset(dataset, split == TRUE) 13 | # test_set = subset(dataset, split == FALSE) 14 | 15 | # Feature Scaling 16 | # training_set = scale(training_set) 17 | # test_set = scale(test_set) 18 | 19 | # Fitting the Regression Model to the dataset 20 | # Create your regressor here 21 | 22 | # Predicting a new result 23 | y_pred = predict(regressor, data.frame(Level = 6.5)) 24 | 25 | # Visualising the Regression Model results 26 | # install.packages('ggplot2') 27 | library(ggplot2) 28 | ggplot() + 29 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 30 | colour = 'red') + 31 | geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)), 32 | colour = 'blue') + 33 | ggtitle('Truth or Bluff (Regression Model)') + 34 | xlab('Level') + 35 | ylab('Salary') 36 | 37 | # Visualising the Regression Model results (for higher resolution and smoother curve) 38 | # install.packages('ggplot2') 39 | library(ggplot2) 40 | x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1) 41 | ggplot() + 42 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 43 | colour = 'red') + 44 | geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), 45 | colour = 'blue') + 46 | ggtitle('Truth or Bluff (Regression Model)') + 47 | xlab('Level') + 48 | ylab('Salary') -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Polynomial_Regression/Regression_Template/regression_template.py: -------------------------------------------------------------------------------- 1 | # Regression Template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Position_Salaries.csv') 10 | X = dataset.iloc[:, 1:2].values 11 | y = dataset.iloc[:, 2].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | """from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | 25 | # Fitting the Regression Model to the dataset 26 | # Create your regressor here 27 | 28 | # Predicting a new result 29 | y_pred = regressor.predict(6.5) 30 | 31 | # Visualising the Regression results 32 | plt.scatter(X, y, color = 'red') 33 | plt.plot(X, regressor.predict(X), color = 'blue') 34 | plt.title('Truth or Bluff (Regression Model)') 35 | plt.xlabel('Position level') 36 | plt.ylabel('Salary') 37 | plt.show() 38 | 39 | # Visualising the Regression results (for higher resolution and smoother curve) 40 | X_grid = np.arange(min(X), max(X), 0.1) 41 | X_grid = X_grid.reshape((len(X_grid), 1)) 42 | plt.scatter(X, y, color = 'red') 43 | plt.plot(X_grid, regressor.predict(X_grid), color = 'blue') 44 | plt.title('Truth or Bluff (Regression Model)') 45 | plt.xlabel('Position level') 46 | plt.ylabel('Salary') 47 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Polynomial_Regression/data_preprocessing_template.R: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Data.csv') 5 | 6 | # Splitting the dataset into the Training set and Test set 7 | # install.packages('caTools') 8 | library(caTools) 9 | set.seed(123) 10 | split = sample.split(dataset$DependentVariable, SplitRatio = 0.8) 11 | training_set = subset(dataset, split == TRUE) 12 | test_set = subset(dataset, split == FALSE) 13 | 14 | # Feature Scaling 15 | # training_set = scale(training_set) 16 | # test_set = scale(test_set) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Polynomial_Regression/data_preprocessing_template.py: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Position_Salaries.csv') 10 | X = dataset.iloc[:, 1:2].values 11 | y = dataset.iloc[:, 2].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | """from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | from sklearn.linear_model import LinearRegression 25 | lin_reg = LinearRegression() 26 | lin_reg.fit(X,y) 27 | 28 | from sklearn.preprocessing import PolynomialFeatures 29 | poly_reg = PolynomialFeatures(degree=4) 30 | X_poly = poly_reg.fit_transform(X) 31 | 32 | lin_reg_2 = LinearRegression() 33 | lin_reg_2.fit(X_poly,y) 34 | 35 | plt.scatter(X,y , color = 'red') 36 | plt.plot(X, lin_reg.predict(X), color='blue' ) 37 | plt.title('linear regression') 38 | plt.xlabel('Position level') 39 | plt.ylabel('salary') 40 | plt.show() 41 | 42 | X_grid = np.arange(min(X), max(X), 0.1) 43 | X_grid = X_grid.reshape((len(X_grid),1)) 44 | plt.scatter(X,y , color = 'red') 45 | plt.plot(X_grid, lin_reg_2.predict(poly_reg.fit_transform(X_grid)), color='blue' ) 46 | plt.title('polynomial regression') 47 | plt.xlabel('Position level') 48 | plt.ylabel('salary') 49 | plt.show() 50 | 51 | lin_reg.predict(X) 52 | 53 | 54 | lin_reg_2.predict(poly_reg.fit_transform(X) 55 | y_pred = regressor.predict(6.5) 56 | -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Polynomial_Regression/polynomial_regression-updated.py: -------------------------------------------------------------------------------- 1 | # Polynomial Regression 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Position_Salaries.csv') 10 | X = dataset.iloc[:, 1:2].values 11 | y = dataset.iloc[:, 2].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | """from sklearn.model_selection import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test)""" 22 | 23 | # Fitting Linear Regression to the dataset 24 | from sklearn.linear_model import LinearRegression 25 | lin_reg = LinearRegression() 26 | lin_reg.fit(X, y) 27 | 28 | # Fitting Polynomial Regression to the dataset 29 | from sklearn.preprocessing import PolynomialFeatures 30 | poly_reg = PolynomialFeatures(degree = 4) 31 | X_poly = poly_reg.fit_transform(X) 32 | poly_reg.fit(X_poly, y) 33 | lin_reg_2 = LinearRegression() 34 | lin_reg_2.fit(X_poly, y) 35 | 36 | # Visualising the Linear Regression results 37 | plt.scatter(X, y, color = 'red') 38 | plt.plot(X, lin_reg.predict(X), color = 'blue') 39 | plt.title('Truth or Bluff (Linear Regression)') 40 | plt.xlabel('Position level') 41 | plt.ylabel('Salary') 42 | plt.show() 43 | 44 | # Visualising the Polynomial Regression results 45 | plt.scatter(X, y, color = 'red') 46 | plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)), color = 'blue') 47 | plt.title('Truth or Bluff (Polynomial Regression)') 48 | plt.xlabel('Position level') 49 | plt.ylabel('Salary') 50 | plt.show() 51 | 52 | # Visualising the Polynomial Regression results (for higher resolution and smoother curve) 53 | X_grid = np.arange(min(X), max(X), 0.1) 54 | X_grid = X_grid.reshape((len(X_grid), 1)) 55 | plt.scatter(X, y, color = 'red') 56 | plt.plot(X_grid, lin_reg_2.predict(poly_reg.fit_transform(X_grid)), color = 'blue') 57 | plt.title('Truth or Bluff (Polynomial Regression)') 58 | plt.xlabel('Position level') 59 | plt.ylabel('Salary') 60 | plt.show() 61 | 62 | # Predicting a new result with Linear Regression 63 | lin_reg.predict([[6.5]]) 64 | 65 | # Predicting a new result with Polynomial Regression 66 | lin_reg_2.predict(poly_reg.fit_transform([[6.5]])) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Polynomial_Regression/polynomial_regression.R: -------------------------------------------------------------------------------- 1 | # Polynomial Regression 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Position_Salaries.csv') 5 | dataset = dataset[2:3] 6 | 7 | # Splitting the dataset into the Training set and Test set 8 | # # install.packages('caTools') 9 | # library(caTools) 10 | # set.seed(123) 11 | # split = sample.split(dataset$Salary, SplitRatio = 2/3) 12 | # training_set = subset(dataset, split == TRUE) 13 | # test_set = subset(dataset, split == FALSE) 14 | 15 | # Feature Scaling 16 | # training_set = scale(training_set) 17 | # test_set = scale(test_set) 18 | 19 | # Fitting Linear Regression to the dataset 20 | lin_reg = lm(formula = Salary ~ ., 21 | data = dataset) 22 | 23 | # Fitting Polynomial Regression to the dataset 24 | dataset$Level2 = dataset$Level^2 25 | dataset$Level3 = dataset$Level^3 26 | dataset$Level4 = dataset$Level^4 27 | poly_reg = lm(formula = Salary ~ ., 28 | data = dataset) 29 | 30 | # Visualising the Linear Regression results 31 | # install.packages('ggplot2') 32 | library(ggplot2) 33 | ggplot() + 34 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 35 | colour = 'red') + 36 | geom_line(aes(x = dataset$Level, y = predict(lin_reg, newdata = dataset)), 37 | colour = 'blue') + 38 | ggtitle('Truth or Bluff (Linear Regression)') + 39 | xlab('Level') + 40 | ylab('Salary') 41 | 42 | # Visualising the Polynomial Regression results 43 | # install.packages('ggplot2') 44 | library(ggplot2) 45 | ggplot() + 46 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 47 | colour = 'red') + 48 | geom_line(aes(x = dataset$Level, y = predict(poly_reg, newdata = dataset)), 49 | colour = 'blue') + 50 | ggtitle('Truth or Bluff (Polynomial Regression)') + 51 | xlab('Level') + 52 | ylab('Salary') 53 | 54 | # Visualising the Regression Model results (for higher resolution and smoother curve) 55 | # install.packages('ggplot2') 56 | library(ggplot2) 57 | x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1) 58 | ggplot() + 59 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 60 | colour = 'red') + 61 | geom_line(aes(x = x_grid, y = predict(poly_reg, 62 | newdata = data.frame(Level = x_grid, 63 | Level2 = x_grid^2, 64 | Level3 = x_grid^3, 65 | Level4 = x_grid^4))), 66 | colour = 'blue') + 67 | ggtitle('Truth or Bluff (Polynomial Regression)') + 68 | xlab('Level') + 69 | ylab('Salary') 70 | 71 | # Predicting a new result with Linear Regression 72 | predict(lin_reg, data.frame(Level = 6.5)) 73 | 74 | # Predicting a new result with Polynomial Regression 75 | predict(poly_reg, data.frame(Level = 6.5, 76 | Level2 = 6.5^2, 77 | Level3 = 6.5^3, 78 | Level4 = 6.5^4)) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Polynomial_Regression/polynomial_regression.py: -------------------------------------------------------------------------------- 1 | # Polynomial Regression 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Position_Salaries.csv') 10 | X = dataset.iloc[:, 1:2].values 11 | y = dataset.iloc[:, 2].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | """from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test)""" 22 | 23 | # Fitting Linear Regression to the dataset 24 | from sklearn.linear_model import LinearRegression 25 | lin_reg = LinearRegression() 26 | lin_reg.fit(X, y) 27 | 28 | # Fitting Polynomial Regression to the dataset 29 | from sklearn.preprocessing import PolynomialFeatures 30 | poly_reg = PolynomialFeatures(degree = 4) 31 | X_poly = poly_reg.fit_transform(X) 32 | poly_reg.fit(X_poly, y) 33 | lin_reg_2 = LinearRegression() 34 | lin_reg_2.fit(X_poly, y) 35 | 36 | # Visualising the Linear Regression results 37 | plt.scatter(X, y, color = 'red') 38 | plt.plot(X, lin_reg.predict(X), color = 'blue') 39 | plt.title('Truth or Bluff (Linear Regression)') 40 | plt.xlabel('Position level') 41 | plt.ylabel('Salary') 42 | plt.show() 43 | 44 | # Visualising the Polynomial Regression results 45 | plt.scatter(X, y, color = 'red') 46 | plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)), color = 'blue') 47 | plt.title('Truth or Bluff (Polynomial Regression)') 48 | plt.xlabel('Position level') 49 | plt.ylabel('Salary') 50 | plt.show() 51 | 52 | # Visualising the Polynomial Regression results (for higher resolution and smoother curve) 53 | X_grid = np.arange(min(X), max(X), 0.1) 54 | X_grid = X_grid.reshape((len(X_grid), 1)) 55 | plt.scatter(X, y, color = 'red') 56 | plt.plot(X_grid, lin_reg_2.predict(poly_reg.fit_transform(X_grid)), color = 'blue') 57 | plt.title('Truth or Bluff (Polynomial Regression)') 58 | plt.xlabel('Position level') 59 | plt.ylabel('Salary') 60 | plt.show() 61 | 62 | # Predicting a new result with Linear Regression 63 | lin_reg.predict(6.5) 64 | 65 | # Predicting a new result with Polynomial Regression 66 | lin_reg_2.predict(poly_reg.fit_transform(6.5)) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Random Forest Regression/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 2 - Regression/Random Forest Regression/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Random Forest Regression/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Random Forest Regression/random_forest_regression.R: -------------------------------------------------------------------------------- 1 | # Random Forest Regression 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Position_Salaries.csv') 5 | dataset = dataset[2:3] 6 | 7 | # Splitting the dataset into the Training set and Test set 8 | # # install.packages('caTools') 9 | # library(caTools) 10 | # set.seed(123) 11 | # split = sample.split(dataset$Salary, SplitRatio = 2/3) 12 | # training_set = subset(dataset, split == TRUE) 13 | # test_set = subset(dataset, split == FALSE) 14 | 15 | # Feature Scaling 16 | # training_set = scale(training_set) 17 | # test_set = scale(test_set) 18 | 19 | # Fitting Random Forest Regression to the dataset 20 | # install.packages('randomForest') 21 | library(randomForest) 22 | set.seed(1234) 23 | regressor = randomForest(x = dataset[-2], 24 | y = dataset$Salary, 25 | ntree = 500) 26 | 27 | # Predicting a new result with Random Forest Regression 28 | y_pred = predict(regressor, data.frame(Level = 6.5)) 29 | 30 | # Visualising the Random Forest Regression results (higher resolution) 31 | # install.packages('ggplot2') 32 | library(ggplot2) 33 | x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01) 34 | ggplot() + 35 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 36 | colour = 'red') + 37 | geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), 38 | colour = 'blue') + 39 | ggtitle('Truth or Bluff (Random Forest Regression)') + 40 | xlab('Level') + 41 | ylab('Salary') -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Random Forest Regression/random_forest_regression.py: -------------------------------------------------------------------------------- 1 | # Random Forest Regression 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Position_Salaries.csv') 10 | X = dataset.iloc[:, 1:2].values 11 | y = dataset.iloc[:, 2].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | """from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | 25 | # Fitting Random Forest Regression to the dataset 26 | from sklearn.ensemble import RandomForestRegressor 27 | regressor = RandomForestRegressor(n_estimators = 10, random_state = 0) 28 | regressor.fit(X, y) 29 | 30 | # Predicting a new result 31 | y_pred = regressor.predict(6.5) 32 | 33 | # Visualising the Random Forest Regression results (higher resolution) 34 | X_grid = np.arange(min(X), max(X), 0.01) 35 | X_grid = X_grid.reshape((len(X_grid), 1)) 36 | plt.scatter(X, y, color = 'red') 37 | plt.plot(X_grid, regressor.predict(X_grid), color = 'blue') 38 | plt.title('Truth or Bluff (Random Forest Regression)') 39 | plt.xlabel('Position level') 40 | plt.ylabel('Salary') 41 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Random Forest Regression/regression_template.R: -------------------------------------------------------------------------------- 1 | # Regression Template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Position_Salaries.csv') 5 | dataset = dataset[2:3] 6 | 7 | # Splitting the dataset into the Training set and Test set 8 | # # install.packages('caTools') 9 | # library(caTools) 10 | # set.seed(123) 11 | # split = sample.split(dataset$Salary, SplitRatio = 2/3) 12 | # training_set = subset(dataset, split == TRUE) 13 | # test_set = subset(dataset, split == FALSE) 14 | 15 | # Feature Scaling 16 | # training_set = scale(training_set) 17 | # test_set = scale(test_set) 18 | 19 | # Fitting the Regression Model to the dataset 20 | # Create your regressor here 21 | 22 | # Predicting a new result 23 | y_pred = predict(regressor, data.frame(Level = 6.5)) 24 | 25 | # Visualising the Regression Model results 26 | # install.packages('ggplot2') 27 | library(ggplot2) 28 | ggplot() + 29 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 30 | colour = 'red') + 31 | geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)), 32 | colour = 'blue') + 33 | ggtitle('Truth or Bluff (Regression Model)') + 34 | xlab('Level') + 35 | ylab('Salary') 36 | 37 | # Visualising the Regression Model results (for higher resolution and smoother curve) 38 | # install.packages('ggplot2') 39 | library(ggplot2) 40 | x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1) 41 | ggplot() + 42 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 43 | colour = 'red') + 44 | geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), 45 | colour = 'blue') + 46 | ggtitle('Truth or Bluff (Regression Model)') + 47 | xlab('Level') + 48 | ylab('Salary') -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Random Forest Regression/regression_template.py: -------------------------------------------------------------------------------- 1 | # Regression Template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Position_Salaries.csv') 10 | X = dataset.iloc[:, 1:2].values 11 | y = dataset.iloc[:, 2].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | """from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | 25 | # Fitting the Regression Model to the dataset 26 | from sklearn.ensemle import RandomForestRegessor 27 | regressor = RandomForestRegressor(n_estimators= 10, random_state = 0) 28 | regressor.fit(X,y) 29 | 30 | # Predicting a new result 31 | y_pred = regressor.predict(6.5) 32 | 33 | # Visualising the Regression results 34 | plt.scatter(X, y, color = 'red') 35 | plt.plot(X, regressor.predict(X), color = 'blue') 36 | plt.title('Truth or Bluff (Random forest Model)') 37 | plt.xlabel('Position level') 38 | plt.ylabel('Salary') 39 | plt.show() 40 | 41 | # Visualising the Regression results (for higher resolution and smoother curve) 42 | X_grid = np.arange(min(X), max(X), 0.1) 43 | X_grid = X_grid.reshape((len(X_grid), 1)) 44 | plt.scatter(X, y, color = 'red') 45 | plt.plot(X_grid, regressor.predict(X_grid), color = 'blue') 46 | plt.title('Truth or Bluff (Regression Model)') 47 | plt.xlabel('Position level') 48 | plt.ylabel('Salary') 49 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Regression-Pros-Cons.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 2 - Regression/Regression-Pros-Cons.pdf -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Simple_Linear_Regression/Salary_Data.csv: -------------------------------------------------------------------------------- 1 | YearsExperience,Salary 2 | 1.1,39343.00 3 | 1.3,46205.00 4 | 1.5,37731.00 5 | 2.0,43525.00 6 | 2.2,39891.00 7 | 2.9,56642.00 8 | 3.0,60150.00 9 | 3.2,54445.00 10 | 3.2,64445.00 11 | 3.7,57189.00 12 | 3.9,63218.00 13 | 4.0,55794.00 14 | 4.0,56957.00 15 | 4.1,57081.00 16 | 4.5,61111.00 17 | 4.9,67938.00 18 | 5.1,66029.00 19 | 5.3,83088.00 20 | 5.9,81363.00 21 | 6.0,93940.00 22 | 6.8,91738.00 23 | 7.1,98273.00 24 | 7.9,101302.00 25 | 8.2,113812.00 26 | 8.7,109431.00 27 | 9.0,105582.00 28 | 9.5,116969.00 29 | 9.6,112635.00 30 | 10.3,122391.00 31 | 10.5,121872.00 32 | -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Simple_Linear_Regression/data_preprocessing_template.R: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Data.csv') 5 | 6 | # Splitting the dataset into the Training set and Test set 7 | # install.packages('caTools') 8 | library(caTools) 9 | set.seed(123) 10 | split = sample.split(dataset$DependentVariable, SplitRatio = 0.8) 11 | training_set = subset(dataset, split == TRUE) 12 | test_set = subset(dataset, split == FALSE) 13 | 14 | # Feature Scaling 15 | # training_set = scale(training_set) 16 | # test_set = scale(test_set) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Simple_Linear_Regression/data_preprocessing_template.py: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Data.csv') 10 | X = dataset.iloc[:, :-1].values 11 | y = dataset.iloc[:, 3].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Simple_Linear_Regression/simple_linear_regression.R: -------------------------------------------------------------------------------- 1 | # Simple Linear Regression 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Salary_Data.csv') 5 | 6 | # Splitting the dataset into the Training set and Test set 7 | # install.packages('caTools') 8 | library(caTools) 9 | set.seed(123) 10 | split = sample.split(dataset$Salary, SplitRatio = 2/3) 11 | training_set = subset(dataset, split == TRUE) 12 | test_set = subset(dataset, split == FALSE) 13 | 14 | # Feature Scaling 15 | # training_set = scale(training_set) 16 | # test_set = scale(test_set) 17 | 18 | # Fitting Simple Linear Regression to the Training set 19 | regressor = lm(formula = Salary ~ YearsExperience, 20 | data = training_set) 21 | 22 | # Predicting the Test set results 23 | y_pred = predict(regressor, newdata = test_set) 24 | 25 | # Visualising the Training set results 26 | library(ggplot2) 27 | ggplot() + 28 | geom_point(aes(x = training_set$YearsExperience, y = training_set$Salary), 29 | colour = 'red') + 30 | geom_line(aes(x = training_set$YearsExperience, y = predict(regressor, newdata = training_set)), 31 | colour = 'blue') + 32 | ggtitle('Salary vs Experience (Training set)') + 33 | xlab('Years of experience') + 34 | ylab('Salary') 35 | 36 | # Visualising the Test set results 37 | library(ggplot2) 38 | ggplot() + 39 | geom_point(aes(x = test_set$YearsExperience, y = test_set$Salary), 40 | colour = 'red') + 41 | geom_line(aes(x = training_set$YearsExperience, y = predict(regressor, newdata = training_set)), 42 | colour = 'blue') + 43 | ggtitle('Salary vs Experience (Test set)') + 44 | xlab('Years of experience') + 45 | ylab('Salary') -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Simple_Linear_Regression/simple_linear_regression.py: -------------------------------------------------------------------------------- 1 | # Simple Linear Regression 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Salary_Data.csv') 10 | X = dataset.iloc[:, :-1].values 11 | y = dataset.iloc[:, 1].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.model_selection import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0) 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | 25 | # Fitting Simple Linear Regression to the Training set 26 | from sklearn.linear_model import LinearRegression 27 | regressor = LinearRegression() 28 | regressor.fit(X_train, y_train) 29 | 30 | # Predicting the Test set results 31 | y_pred = regressor.predict(X_test) 32 | 33 | # Visualising the Training set results 34 | plt.scatter(X_train, y_train, color = 'red') 35 | plt.plot(X_train, regressor.predict(X_train), color = 'blue') 36 | plt.title('Salary vs Experience (Training set)') 37 | plt.xlabel('Years of Experience') 38 | plt.ylabel('Salary') 39 | plt.show() 40 | 41 | # Visualising the Test set results 42 | plt.scatter(X_test, y_test, color = 'red') 43 | plt.plot(X_train, regressor.predict(X_train), color = 'blue') 44 | plt.title('Salary vs Experience (Test set)') 45 | plt.xlabel('Years of Experience') 46 | plt.ylabel('Salary') 47 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Support Vector Regression (SVR)/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Support Vector Regression (SVR)/regression_template.R: -------------------------------------------------------------------------------- 1 | # Regression Template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Position_Salaries.csv') 5 | dataset = dataset[2:3] 6 | 7 | # Splitting the dataset into the Training set and Test set 8 | # # install.packages('caTools') 9 | # library(caTools) 10 | # set.seed(123) 11 | # split = sample.split(dataset$Salary, SplitRatio = 2/3) 12 | # training_set = subset(dataset, split == TRUE) 13 | # test_set = subset(dataset, split == FALSE) 14 | 15 | # Feature Scaling 16 | # training_set = scale(training_set) 17 | # test_set = scale(test_set) 18 | 19 | # Fitting the Regression Model to the dataset 20 | # Create your regressor here 21 | 22 | # Predicting a new result 23 | y_pred = predict(regressor, data.frame(Level = 6.5)) 24 | 25 | # Visualising the Regression Model results 26 | # install.packages('ggplot2') 27 | library(ggplot2) 28 | ggplot() + 29 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 30 | colour = 'red') + 31 | geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)), 32 | colour = 'blue') + 33 | ggtitle('Truth or Bluff (Regression Model)') + 34 | xlab('Level') + 35 | ylab('Salary') 36 | 37 | # Visualising the Regression Model results (for higher resolution and smoother curve) 38 | # install.packages('ggplot2') 39 | library(ggplot2) 40 | x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1) 41 | ggplot() + 42 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 43 | colour = 'red') + 44 | geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), 45 | colour = 'blue') + 46 | ggtitle('Truth or Bluff (Regression Model)') + 47 | xlab('Level') + 48 | ylab('Salary') -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Support Vector Regression (SVR)/regression_template.py: -------------------------------------------------------------------------------- 1 | # Regression Template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Position_Salaries.csv') 10 | X = dataset.iloc[:, 1:2].values 11 | y = dataset.iloc[:, 2].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | """from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | 25 | # Fitting the Regression Model to the dataset 26 | # Create your regressor here 27 | 28 | # Predicting a new result 29 | y_pred = regressor.predict(6.5) 30 | 31 | # Visualising the Regression results 32 | plt.scatter(X, y, color = 'red') 33 | plt.plot(X, regressor.predict(X), color = 'blue') 34 | plt.title('Truth or Bluff (Regression Model)') 35 | plt.xlabel('Position level') 36 | plt.ylabel('Salary') 37 | plt.show() 38 | 39 | # Visualising the Regression results (for higher resolution and smoother curve) 40 | X_grid = np.arange(min(X), max(X), 0.1) 41 | X_grid = X_grid.reshape((len(X_grid), 1)) 42 | plt.scatter(X, y, color = 'red') 43 | plt.plot(X_grid, regressor.predict(X_grid), color = 'blue') 44 | plt.title('Truth or Bluff (Regression Model)') 45 | plt.xlabel('Position level') 46 | plt.ylabel('Salary') 47 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Support Vector Regression (SVR)/svr.R: -------------------------------------------------------------------------------- 1 | # SVR 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Position_Salaries.csv') 5 | dataset = dataset[2:3] 6 | 7 | # Splitting the dataset into the Training set and Test set 8 | # # install.packages('caTools') 9 | # library(caTools) 10 | # set.seed(123) 11 | # split = sample.split(dataset$Salary, SplitRatio = 2/3) 12 | # training_set = subset(dataset, split == TRUE) 13 | # test_set = subset(dataset, split == FALSE) 14 | 15 | # Feature Scaling 16 | # training_set = scale(training_set) 17 | # test_set = scale(test_set) 18 | 19 | # Fitting SVR to the dataset 20 | # install.packages('e1071') 21 | library(e1071) 22 | regressor = svm(formula = Salary ~ ., 23 | data = dataset, 24 | type = 'eps-regression', 25 | kernel = 'radial') 26 | 27 | # Predicting a new result 28 | y_pred = predict(regressor, data.frame(Level = 6.5)) 29 | 30 | # Visualising the SVR results 31 | # install.packages('ggplot2') 32 | library(ggplot2) 33 | ggplot() + 34 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 35 | colour = 'red') + 36 | geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)), 37 | colour = 'blue') + 38 | ggtitle('Truth or Bluff (SVR)') + 39 | xlab('Level') + 40 | ylab('Salary') 41 | 42 | # Visualising the SVR results (for higher resolution and smoother curve) 43 | # install.packages('ggplot2') 44 | library(ggplot2) 45 | x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1) 46 | ggplot() + 47 | geom_point(aes(x = dataset$Level, y = dataset$Salary), 48 | colour = 'red') + 49 | geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))), 50 | colour = 'blue') + 51 | ggtitle('Truth or Bluff (SVR)') + 52 | xlab('Level') + 53 | ylab('Salary') -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 2 - Regression/Support Vector Regression (SVR)/svr.py: -------------------------------------------------------------------------------- 1 | # SVR 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Position_Salaries.csv') 10 | X = dataset.iloc[:, 1:2].values 11 | y = dataset.iloc[:, 2].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | """from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | sc_y = StandardScaler() 21 | X = sc_X.fit_transform(X) 22 | y = sc_y.fit_transform(y) 23 | 24 | # Fitting SVR to the dataset 25 | from sklearn.svm import SVR 26 | regressor = SVR(kernel = 'rbf') 27 | regressor.fit(X, y) 28 | 29 | # Predicting a new result 30 | y_pred = regressor.predict(sc_X.transform(np.array([[6.5]]))) 31 | y_pred = sc_y.inverse_transform(y_pred) 32 | 33 | # Visualising the SVR results 34 | plt.scatter(X, y, color = 'red') 35 | plt.plot(X, regressor.predict(X), color = 'blue') 36 | plt.title('Truth or Bluff (SVR)') 37 | plt.xlabel('Position level') 38 | plt.ylabel('Salary') 39 | plt.show() 40 | 41 | # Visualising the SVR results (for higher resolution and smoother curve) 42 | X_grid = np.arange(min(X), max(X), 0.01) # choice of 0.01 instead of 0.1 step because the data is feature scaled 43 | X_grid = X_grid.reshape((len(X_grid), 1)) 44 | plt.scatter(X, y, color = 'red') 45 | plt.plot(X_grid, regressor.predict(X_grid), color = 'blue') 46 | plt.title('Truth or Bluff (SVR)') 47 | plt.xlabel('Position level') 48 | plt.ylabel('Salary') 49 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 14 - Logistic Regression/Classification_Template/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 3 - Classification/Section 14 - Logistic Regression/Classification_Template/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 14 - Logistic Regression/Classification_Template/classification_template.R: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting classifier to the Training set 23 | # Create your classifier here 24 | 25 | # Predicting the Test set results 26 | y_pred = predict(classifier, newdata = test_set[-3]) 27 | 28 | # Making the Confusion Matrix 29 | cm = table(test_set[, 3], y_pred) 30 | 31 | # Visualising the Training set results 32 | library(ElemStatLearn) 33 | set = training_set 34 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 35 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 36 | grid_set = expand.grid(X1, X2) 37 | colnames(grid_set) = c('Age', 'EstimatedSalary') 38 | y_grid = predict(classifier, newdata = grid_set) 39 | plot(set[, -3], 40 | main = 'Classifier (Training set)', 41 | xlab = 'Age', ylab = 'Estimated Salary', 42 | xlim = range(X1), ylim = range(X2)) 43 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 44 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 45 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 46 | 47 | # Visualising the Test set results 48 | library(ElemStatLearn) 49 | set = test_set 50 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 51 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 52 | grid_set = expand.grid(X1, X2) 53 | colnames(grid_set) = c('Age', 'EstimatedSalary') 54 | y_grid = predict(classifier, newdata = grid_set) 55 | plot(set[, -3], main = 'Classifier (Test set)', 56 | xlab = 'Age', ylab = 'Estimated Salary', 57 | xlim = range(X1), ylim = range(X2)) 58 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 59 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 60 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 14 - Logistic Regression/Classification_Template/classification_template.py: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting classifier to the Training set 24 | # Create your classifier here 25 | 26 | # Predicting the Test set results 27 | y_pred = classifier.predict(X_test) 28 | 29 | # Making the Confusion Matrix 30 | from sklearn.metrics import confusion_matrix 31 | cm = confusion_matrix(y_test, y_pred) 32 | 33 | # Visualising the Training set results 34 | from matplotlib.colors import ListedColormap 35 | X_set, y_set = X_train, y_train 36 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 37 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 38 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 39 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 40 | plt.xlim(X1.min(), X1.max()) 41 | plt.ylim(X2.min(), X2.max()) 42 | for i, j in enumerate(np.unique(y_set)): 43 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 44 | c = ListedColormap(('red', 'green'))(i), label = j) 45 | plt.title('Classifier (Training set)') 46 | plt.xlabel('Age') 47 | plt.ylabel('Estimated Salary') 48 | plt.legend() 49 | plt.show() 50 | 51 | # Visualising the Test set results 52 | from matplotlib.colors import ListedColormap 53 | X_set, y_set = X_test, y_test 54 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 55 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 56 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 57 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 58 | plt.xlim(X1.min(), X1.max()) 59 | plt.ylim(X2.min(), X2.max()) 60 | for i, j in enumerate(np.unique(y_set)): 61 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 62 | c = ListedColormap(('red', 'green'))(i), label = j) 63 | plt.title('Classifier (Test set)') 64 | plt.xlabel('Age') 65 | plt.ylabel('Estimated Salary') 66 | plt.legend() 67 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 14 - Logistic Regression/Logistic_Regression/logistic_regression.R: -------------------------------------------------------------------------------- 1 | # Logistic Regression 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting Logistic Regression to the Training set 23 | classifier = glm(formula = Purchased ~ ., 24 | family = binomial, 25 | data = training_set) 26 | 27 | # Predicting the Test set results 28 | prob_pred = predict(classifier, type = 'response', newdata = test_set[-3]) 29 | y_pred = ifelse(prob_pred > 0.5, 1, 0) 30 | 31 | # Making the Confusion Matrix 32 | cm = table(test_set[, 3], y_pred > 0.5) 33 | 34 | # Visualising the Training set results 35 | library(ElemStatLearn) 36 | set = training_set 37 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 38 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 39 | grid_set = expand.grid(X1, X2) 40 | colnames(grid_set) = c('Age', 'EstimatedSalary') 41 | prob_set = predict(classifier, type = 'response', newdata = grid_set) 42 | y_grid = ifelse(prob_set > 0.5, 1, 0) 43 | plot(set[, -3], 44 | main = 'Logistic Regression (Training set)', 45 | xlab = 'Age', ylab = 'Estimated Salary', 46 | xlim = range(X1), ylim = range(X2)) 47 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 48 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 49 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 50 | 51 | # Visualising the Test set results 52 | library(ElemStatLearn) 53 | set = test_set 54 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 55 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 56 | grid_set = expand.grid(X1, X2) 57 | colnames(grid_set) = c('Age', 'EstimatedSalary') 58 | prob_set = predict(classifier, type = 'response', newdata = grid_set) 59 | y_grid = ifelse(prob_set > 0.5, 1, 0) 60 | plot(set[, -3], 61 | main = 'Logistic Regression (Test set)', 62 | xlab = 'Age', ylab = 'Estimated Salary', 63 | xlim = range(X1), ylim = range(X2)) 64 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 65 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 66 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 14 - Logistic Regression/Logistic_Regression/logistic_regression.py: -------------------------------------------------------------------------------- 1 | # Logistic Regression 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting Logistic Regression to the Training set 24 | from sklearn.linear_model import LogisticRegression 25 | classifier = LogisticRegression(random_state = 0) 26 | classifier.fit(X_train, y_train) 27 | 28 | # Predicting the Test set results 29 | y_pred = classifier.predict(X_test) 30 | 31 | # Making the Confusion Matrix 32 | from sklearn.metrics import confusion_matrix 33 | cm = confusion_matrix(y_test, y_pred) 34 | 35 | # Visualising the Training set results 36 | from matplotlib.colors import ListedColormap 37 | X_set, y_set = X_train, y_train 38 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 39 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 40 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 41 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 42 | plt.xlim(X1.min(), X1.max()) 43 | plt.ylim(X2.min(), X2.max()) 44 | for i, j in enumerate(np.unique(y_set)): 45 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 46 | c = ListedColormap(('red', 'green'))(i), label = j) 47 | plt.title('Logistic Regression (Training set)') 48 | plt.xlabel('Age') 49 | plt.ylabel('Estimated Salary') 50 | plt.legend() 51 | plt.show() 52 | 53 | # Visualising the Test set results 54 | from matplotlib.colors import ListedColormap 55 | X_set, y_set = X_test, y_test 56 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 57 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 58 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 59 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 60 | plt.xlim(X1.min(), X1.max()) 61 | plt.ylim(X2.min(), X2.max()) 62 | for i, j in enumerate(np.unique(y_set)): 63 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 64 | c = ListedColormap(('red', 'green'))(i), label = j) 65 | plt.title('Logistic Regression (Test set)') 66 | plt.xlabel('Age') 67 | plt.ylabel('Estimated Salary') 68 | plt.legend() 69 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 15 - K-Nearest Neighbors (K-NN)/K_Nearest_Neighbors/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 3 - Classification/Section 15 - K-Nearest Neighbors (K-NN)/K_Nearest_Neighbors/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 15 - K-Nearest Neighbors (K-NN)/K_Nearest_Neighbors/classification_template.R: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting classifier to the Training set 23 | # Create your classifier here 24 | 25 | # Predicting the Test set results 26 | y_pred = predict(classifier, newdata = test_set[-3]) 27 | 28 | # Making the Confusion Matrix 29 | cm = table(test_set[, 3], y_pred) 30 | 31 | # Visualising the Training set results 32 | library(ElemStatLearn) 33 | set = training_set 34 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 35 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 36 | grid_set = expand.grid(X1, X2) 37 | colnames(grid_set) = c('Age', 'EstimatedSalary') 38 | y_grid = predict(classifier, newdata = grid_set) 39 | plot(set[, -3], 40 | main = 'Classifier (Training set)', 41 | xlab = 'Age', ylab = 'Estimated Salary', 42 | xlim = range(X1), ylim = range(X2)) 43 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 44 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 45 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 46 | 47 | # Visualising the Test set results 48 | library(ElemStatLearn) 49 | set = test_set 50 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 51 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 52 | grid_set = expand.grid(X1, X2) 53 | colnames(grid_set) = c('Age', 'EstimatedSalary') 54 | y_grid = predict(classifier, newdata = grid_set) 55 | plot(set[, -3], main = 'Classifier (Test set)', 56 | xlab = 'Age', ylab = 'Estimated Salary', 57 | xlim = range(X1), ylim = range(X2)) 58 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 59 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 60 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 15 - K-Nearest Neighbors (K-NN)/K_Nearest_Neighbors/classification_template.py: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.model_selection import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting classifier to the Training set 24 | from sklearn.neighbors import KNeighborsClassifier 25 | classifier = KNeighborsClassifier(n_neighbors = 5,metric = 'minkowski', p=2) 26 | classifier.fit(X_train,y_train) 27 | 28 | # Predicting the Test set results 29 | y_pred = classifier.predict(X_test) 30 | 31 | # Making the Confusion Matrix 32 | from sklearn.metrics import confusion_matrix 33 | cm = confusion_matrix(y_test, y_pred) 34 | 35 | # Visualising the Training set results 36 | from matplotlib.colors import ListedColormap 37 | X_set, y_set = X_train, y_train 38 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 39 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 40 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 41 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 42 | plt.xlim(X1.min(), X1.max()) 43 | plt.ylim(X2.min(), X2.max()) 44 | for i, j in enumerate(np.unique(y_set)): 45 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 46 | c = ListedColormap(('red', 'green'))(i), label = j) 47 | plt.title('K-NN (Training set)') 48 | plt.xlabel('Age') 49 | plt.ylabel('Estimated Salary') 50 | plt.legend() 51 | plt.show() 52 | 53 | # Visualising the Test set results 54 | from matplotlib.colors import ListedColormap 55 | X_set, y_set = X_test, y_test 56 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 57 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 58 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 59 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 60 | plt.xlim(X1.min(), X1.max()) 61 | plt.ylim(X2.min(), X2.max()) 62 | for i, j in enumerate(np.unique(y_set)): 63 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 64 | c = ListedColormap(('red', 'green'))(i), label = j) 65 | plt.title('Classifier (Test set)') 66 | plt.xlabel('Age') 67 | plt.ylabel('Estimated Salary') 68 | plt.legend() 69 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 15 - K-Nearest Neighbors (K-NN)/K_Nearest_Neighbors/knn.R: -------------------------------------------------------------------------------- 1 | # K-Nearest Neighbors (K-NN) 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting K-NN to the Training set and Predicting the Test set results 23 | library(class) 24 | y_pred = knn(train = training_set[, -3], 25 | test = test_set[, -3], 26 | cl = training_set[, 3], 27 | k = 5, 28 | prob = TRUE) 29 | 30 | # Making the Confusion Matrix 31 | cm = table(test_set[, 3], y_pred) 32 | 33 | # Visualising the Training set results 34 | library(ElemStatLearn) 35 | set = training_set 36 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 37 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 38 | grid_set = expand.grid(X1, X2) 39 | colnames(grid_set) = c('Age', 'EstimatedSalary') 40 | y_grid = knn(train = training_set[, -3], test = grid_set, cl = training_set[, 3], k = 5) 41 | plot(set[, -3], 42 | main = 'K-NN (Training set)', 43 | xlab = 'Age', ylab = 'Estimated Salary', 44 | xlim = range(X1), ylim = range(X2)) 45 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 46 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 47 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 48 | 49 | # Visualising the Test set results 50 | library(ElemStatLearn) 51 | set = test_set 52 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 53 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 54 | grid_set = expand.grid(X1, X2) 55 | colnames(grid_set) = c('Age', 'EstimatedSalary') 56 | y_grid = knn(train = training_set[, -3], test = grid_set, cl = training_set[, 3], k = 5) 57 | plot(set[, -3], 58 | main = 'K-NN (Test set)', 59 | xlab = 'Age', ylab = 'Estimated Salary', 60 | xlim = range(X1), ylim = range(X2)) 61 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 62 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 63 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 15 - K-Nearest Neighbors (K-NN)/K_Nearest_Neighbors/knn.py: -------------------------------------------------------------------------------- 1 | # K-Nearest Neighbors (K-NN) 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting K-NN to the Training set 24 | from sklearn.neighbors import KNeighborsClassifier 25 | classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2) 26 | classifier.fit(X_train, y_train) 27 | 28 | # Predicting the Test set results 29 | y_pred = classifier.predict(X_test) 30 | 31 | # Making the Confusion Matrix 32 | from sklearn.metrics import confusion_matrix 33 | cm = confusion_matrix(y_test, y_pred) 34 | 35 | # Visualising the Training set results 36 | from matplotlib.colors import ListedColormap 37 | X_set, y_set = X_train, y_train 38 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 39 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 40 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 41 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 42 | plt.xlim(X1.min(), X1.max()) 43 | plt.ylim(X2.min(), X2.max()) 44 | for i, j in enumerate(np.unique(y_set)): 45 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 46 | c = ListedColormap(('red', 'green'))(i), label = j) 47 | plt.title('K-NN (Training set)') 48 | plt.xlabel('Age') 49 | plt.ylabel('Estimated Salary') 50 | plt.legend() 51 | plt.show() 52 | 53 | # Visualising the Test set results 54 | from matplotlib.colors import ListedColormap 55 | X_set, y_set = X_test, y_test 56 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 57 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 58 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 59 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 60 | plt.xlim(X1.min(), X1.max()) 61 | plt.ylim(X2.min(), X2.max()) 62 | for i, j in enumerate(np.unique(y_set)): 63 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 64 | c = ListedColormap(('red', 'green'))(i), label = j) 65 | plt.title('K-NN (Test set)') 66 | plt.xlabel('Age') 67 | plt.ylabel('Estimated Salary') 68 | plt.legend() 69 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 16 - Support Vector Machine (SVM)/SVM/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 3 - Classification/Section 16 - Support Vector Machine (SVM)/SVM/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 16 - Support Vector Machine (SVM)/SVM/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000,0 216 | 15622478,Male,47,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 16 - Support Vector Machine (SVM)/SVM/classification_template.R: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting classifier to the Training set 23 | # Create your classifier here 24 | 25 | # Predicting the Test set results 26 | y_pred = predict(classifier, newdata = test_set[-3]) 27 | 28 | # Making the Confusion Matrix 29 | cm = table(test_set[, 3], y_pred) 30 | 31 | # Visualising the Training set results 32 | library(ElemStatLearn) 33 | set = training_set 34 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 35 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 36 | grid_set = expand.grid(X1, X2) 37 | colnames(grid_set) = c('Age', 'EstimatedSalary') 38 | y_grid = predict(classifier, newdata = grid_set) 39 | plot(set[, -3], 40 | main = 'Classifier (Training set)', 41 | xlab = 'Age', ylab = 'Estimated Salary', 42 | xlim = range(X1), ylim = range(X2)) 43 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 44 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 45 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 46 | 47 | # Visualising the Test set results 48 | library(ElemStatLearn) 49 | set = test_set 50 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 51 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 52 | grid_set = expand.grid(X1, X2) 53 | colnames(grid_set) = c('Age', 'EstimatedSalary') 54 | y_grid = predict(classifier, newdata = grid_set) 55 | plot(set[, -3], main = 'Classifier (Test set)', 56 | xlab = 'Age', ylab = 'Estimated Salary', 57 | xlim = range(X1), ylim = range(X2)) 58 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 59 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 60 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 16 - Support Vector Machine (SVM)/SVM/classification_template.py: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting classifier to the Training set 24 | # Create your classifier here 25 | 26 | # Predicting the Test set results 27 | y_pred = classifier.predict(X_test) 28 | 29 | # Making the Confusion Matrix 30 | from sklearn.metrics import confusion_matrix 31 | cm = confusion_matrix(y_test, y_pred) 32 | 33 | # Visualising the Training set results 34 | from matplotlib.colors import ListedColormap 35 | X_set, y_set = X_train, y_train 36 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 37 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 38 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 39 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 40 | plt.xlim(X1.min(), X1.max()) 41 | plt.ylim(X2.min(), X2.max()) 42 | for i, j in enumerate(np.unique(y_set)): 43 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 44 | c = ListedColormap(('red', 'green'))(i), label = j) 45 | plt.title('Classifier (Training set)') 46 | plt.xlabel('Age') 47 | plt.ylabel('Estimated Salary') 48 | plt.legend() 49 | plt.show() 50 | 51 | # Visualising the Test set results 52 | from matplotlib.colors import ListedColormap 53 | X_set, y_set = X_test, y_test 54 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 55 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 56 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 57 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 58 | plt.xlim(X1.min(), X1.max()) 59 | plt.ylim(X2.min(), X2.max()) 60 | for i, j in enumerate(np.unique(y_set)): 61 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 62 | c = ListedColormap(('red', 'green'))(i), label = j) 63 | plt.title('Classifier (Test set)') 64 | plt.xlabel('Age') 65 | plt.ylabel('Estimated Salary') 66 | plt.legend() 67 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 16 - Support Vector Machine (SVM)/SVM/svm.R: -------------------------------------------------------------------------------- 1 | # Support Vector Machine (SVM) 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting SVM to the Training set 23 | # install.packages('e1071') 24 | library(e1071) 25 | classifier = svm(formula = Purchased ~ ., 26 | data = training_set, 27 | type = 'C-classification', 28 | kernel = 'linear') 29 | 30 | # Predicting the Test set results 31 | y_pred = predict(classifier, newdata = test_set[-3]) 32 | 33 | # Making the Confusion Matrix 34 | cm = table(test_set[, 3], y_pred) 35 | 36 | # Visualising the Training set results 37 | library(ElemStatLearn) 38 | set = training_set 39 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 40 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 41 | grid_set = expand.grid(X1, X2) 42 | colnames(grid_set) = c('Age', 'EstimatedSalary') 43 | y_grid = predict(classifier, newdata = grid_set) 44 | plot(set[, -3], 45 | main = 'SVM (Training set)', 46 | xlab = 'Age', ylab = 'Estimated Salary', 47 | xlim = range(X1), ylim = range(X2)) 48 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 49 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 50 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 51 | 52 | # Visualising the Test set results 53 | library(ElemStatLearn) 54 | set = test_set 55 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 56 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 57 | grid_set = expand.grid(X1, X2) 58 | colnames(grid_set) = c('Age', 'EstimatedSalary') 59 | y_grid = predict(classifier, newdata = grid_set) 60 | plot(set[, -3], main = 'SVM (Test set)', 61 | xlab = 'Age', ylab = 'Estimated Salary', 62 | xlim = range(X1), ylim = range(X2)) 63 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 64 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 65 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 16 - Support Vector Machine (SVM)/SVM/svm.py: -------------------------------------------------------------------------------- 1 | # Support Vector Machine (SVM) 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.model_selection import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting SVM to the Training set 24 | from sklearn.svm import SVC 25 | classifier = SVC(kernel = 'linear', random_state = 0) 26 | classifier.fit(X_train, y_train) 27 | 28 | # Predicting the Test set results 29 | y_pred = classifier.predict(X_test) 30 | 31 | # Making the Confusion Matrix 32 | from sklearn.metrics import confusion_matrix 33 | cm = confusion_matrix(y_test, y_pred) 34 | 35 | # Visualising the Training set results 36 | from matplotlib.colors import ListedColormap 37 | X_set, y_set = X_train, y_train 38 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 39 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 40 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 41 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 42 | plt.xlim(X1.min(), X1.max()) 43 | plt.ylim(X2.min(), X2.max()) 44 | for i, j in enumerate(np.unique(y_set)): 45 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 46 | c = ListedColormap(('red', 'green'))(i), label = j) 47 | plt.title('SVM (Training set)') 48 | plt.xlabel('Age') 49 | plt.ylabel('Estimated Salary') 50 | plt.legend() 51 | plt.show() 52 | 53 | # Visualising the Test set results 54 | from matplotlib.colors import ListedColormap 55 | X_set, y_set = X_test, y_test 56 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 57 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 58 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 59 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 60 | plt.xlim(X1.min(), X1.max()) 61 | plt.ylim(X2.min(), X2.max()) 62 | for i, j in enumerate(np.unique(y_set)): 63 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 64 | c = ListedColormap(('red', 'green'))(i), label = j) 65 | plt.title('SVM (Test set)') 66 | plt.xlabel('Age') 67 | plt.ylabel('Estimated Salary') 68 | plt.legend() 69 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 17 - Kernel SVM/Kernel_SVM/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 3 - Classification/Section 17 - Kernel SVM/Kernel_SVM/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 17 - Kernel SVM/Kernel_SVM/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000,0 216 | 15622478,Male,47,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 17 - Kernel SVM/Kernel_SVM/classification_template.R: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting classifier to the Training set 23 | # Create your classifier here 24 | 25 | # Predicting the Test set results 26 | y_pred = predict(classifier, newdata = test_set[-3]) 27 | 28 | # Making the Confusion Matrix 29 | cm = table(test_set[, 3], y_pred) 30 | 31 | # Visualising the Training set results 32 | library(ElemStatLearn) 33 | set = training_set 34 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 35 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 36 | grid_set = expand.grid(X1, X2) 37 | colnames(grid_set) = c('Age', 'EstimatedSalary') 38 | y_grid = predict(classifier, newdata = grid_set) 39 | plot(set[, -3], 40 | main = 'Classifier (Training set)', 41 | xlab = 'Age', ylab = 'Estimated Salary', 42 | xlim = range(X1), ylim = range(X2)) 43 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 44 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 45 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 46 | 47 | # Visualising the Test set results 48 | library(ElemStatLearn) 49 | set = test_set 50 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 51 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 52 | grid_set = expand.grid(X1, X2) 53 | colnames(grid_set) = c('Age', 'EstimatedSalary') 54 | y_grid = predict(classifier, newdata = grid_set) 55 | plot(set[, -3], main = 'Classifier (Test set)', 56 | xlab = 'Age', ylab = 'Estimated Salary', 57 | xlim = range(X1), ylim = range(X2)) 58 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 59 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 60 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 17 - Kernel SVM/Kernel_SVM/classification_template.py: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting classifier to the Training set 24 | # Create your classifier here 25 | 26 | # Predicting the Test set results 27 | y_pred = classifier.predict(X_test) 28 | 29 | # Making the Confusion Matrix 30 | from sklearn.metrics import confusion_matrix 31 | cm = confusion_matrix(y_test, y_pred) 32 | 33 | # Visualising the Training set results 34 | from matplotlib.colors import ListedColormap 35 | X_set, y_set = X_train, y_train 36 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 37 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 38 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 39 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 40 | plt.xlim(X1.min(), X1.max()) 41 | plt.ylim(X2.min(), X2.max()) 42 | for i, j in enumerate(np.unique(y_set)): 43 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 44 | c = ListedColormap(('red', 'green'))(i), label = j) 45 | plt.title('Classifier (Training set)') 46 | plt.xlabel('Age') 47 | plt.ylabel('Estimated Salary') 48 | plt.legend() 49 | plt.show() 50 | 51 | # Visualising the Test set results 52 | from matplotlib.colors import ListedColormap 53 | X_set, y_set = X_test, y_test 54 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 55 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 56 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 57 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 58 | plt.xlim(X1.min(), X1.max()) 59 | plt.ylim(X2.min(), X2.max()) 60 | for i, j in enumerate(np.unique(y_set)): 61 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 62 | c = ListedColormap(('red', 'green'))(i), label = j) 63 | plt.title('Classifier (Test set)') 64 | plt.xlabel('Age') 65 | plt.ylabel('Estimated Salary') 66 | plt.legend() 67 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 17 - Kernel SVM/Kernel_SVM/kernel_svm.R: -------------------------------------------------------------------------------- 1 | # Kernel SVM 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting Kernel SVM to the Training set 23 | # install.packages('e1071') 24 | library(e1071) 25 | classifier = svm(formula = Purchased ~ ., 26 | data = training_set, 27 | type = 'C-classification', 28 | kernel = 'radial') 29 | 30 | # Predicting the Test set results 31 | y_pred = predict(classifier, newdata = test_set[-3]) 32 | 33 | # Making the Confusion Matrix 34 | cm = table(test_set[, 3], y_pred) 35 | 36 | # Visualising the Training set results 37 | library(ElemStatLearn) 38 | set = training_set 39 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 40 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 41 | grid_set = expand.grid(X1, X2) 42 | colnames(grid_set) = c('Age', 'EstimatedSalary') 43 | y_grid = predict(classifier, newdata = grid_set) 44 | plot(set[, -3], 45 | main = 'Kernel SVM (Training set)', 46 | xlab = 'Age', ylab = 'Estimated Salary', 47 | xlim = range(X1), ylim = range(X2)) 48 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 49 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 50 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 51 | 52 | # Visualising the Test set results 53 | library(ElemStatLearn) 54 | set = test_set 55 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 56 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 57 | grid_set = expand.grid(X1, X2) 58 | colnames(grid_set) = c('Age', 'EstimatedSalary') 59 | y_grid = predict(classifier, newdata = grid_set) 60 | plot(set[, -3], main = 'Kernel SVM (Test set)', 61 | xlab = 'Age', ylab = 'Estimated Salary', 62 | xlim = range(X1), ylim = range(X2)) 63 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 64 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 65 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 17 - Kernel SVM/Kernel_SVM/kernel_svm.py: -------------------------------------------------------------------------------- 1 | # Kernel SVM 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.model_selection import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting Kernel SVM to the Training set 24 | from sklearn.svm import SVC 25 | classifier = SVC(kernel = 'rbf', random_state = 0) 26 | classifier.fit(X_train, y_train) 27 | 28 | # Predicting the Test set results 29 | y_pred = classifier.predict(X_test) 30 | 31 | # Making the Confusion Matrix 32 | from sklearn.metrics import confusion_matrix 33 | cm = confusion_matrix(y_test, y_pred) 34 | 35 | # Visualising the Training set results 36 | from matplotlib.colors import ListedColormap 37 | X_set, y_set = X_train, y_train 38 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 39 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 40 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 41 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 42 | plt.xlim(X1.min(), X1.max()) 43 | plt.ylim(X2.min(), X2.max()) 44 | for i, j in enumerate(np.unique(y_set)): 45 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 46 | c = ListedColormap(('red', 'green'))(i), label = j) 47 | plt.title('Kernel SVM (Training set)') 48 | plt.xlabel('Age') 49 | plt.ylabel('Estimated Salary') 50 | plt.legend() 51 | plt.show() 52 | 53 | # Visualising the Test set results 54 | from matplotlib.colors import ListedColormap 55 | X_set, y_set = X_test, y_test 56 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 57 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 58 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 59 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 60 | plt.xlim(X1.min(), X1.max()) 61 | plt.ylim(X2.min(), X2.max()) 62 | for i, j in enumerate(np.unique(y_set)): 63 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 64 | c = ListedColormap(('red', 'green'))(i), label = j) 65 | plt.title('Kernel SVM (Test set)') 66 | plt.xlabel('Age') 67 | plt.ylabel('Estimated Salary') 68 | plt.legend() 69 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 18 - Naive Bayes/Naive_Bayes/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 3 - Classification/Section 18 - Naive Bayes/Naive_Bayes/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 18 - Naive Bayes/Naive_Bayes/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000,0 216 | 15622478,Male,47,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 18 - Naive Bayes/Naive_Bayes/classification_template.R: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting classifier to the Training set 23 | # Create your classifier here 24 | 25 | # Predicting the Test set results 26 | y_pred = predict(classifier, newdata = test_set[-3]) 27 | 28 | # Making the Confusion Matrix 29 | cm = table(test_set[, 3], y_pred) 30 | 31 | # Visualising the Training set results 32 | library(ElemStatLearn) 33 | set = training_set 34 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 35 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 36 | grid_set = expand.grid(X1, X2) 37 | colnames(grid_set) = c('Age', 'EstimatedSalary') 38 | y_grid = predict(classifier, newdata = grid_set) 39 | plot(set[, -3], 40 | main = 'Classifier (Training set)', 41 | xlab = 'Age', ylab = 'Estimated Salary', 42 | xlim = range(X1), ylim = range(X2)) 43 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 44 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 45 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 46 | 47 | # Visualising the Test set results 48 | library(ElemStatLearn) 49 | set = test_set 50 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 51 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 52 | grid_set = expand.grid(X1, X2) 53 | colnames(grid_set) = c('Age', 'EstimatedSalary') 54 | y_grid = predict(classifier, newdata = grid_set) 55 | plot(set[, -3], main = 'Classifier (Test set)', 56 | xlab = 'Age', ylab = 'Estimated Salary', 57 | xlim = range(X1), ylim = range(X2)) 58 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 59 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 60 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 18 - Naive Bayes/Naive_Bayes/classification_template.py: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting classifier to the Training set 24 | # Create your classifier here 25 | 26 | # Predicting the Test set results 27 | y_pred = classifier.predict(X_test) 28 | 29 | # Making the Confusion Matrix 30 | from sklearn.metrics import confusion_matrix 31 | cm = confusion_matrix(y_test, y_pred) 32 | 33 | # Visualising the Training set results 34 | from matplotlib.colors import ListedColormap 35 | X_set, y_set = X_train, y_train 36 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 37 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 38 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 39 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 40 | plt.xlim(X1.min(), X1.max()) 41 | plt.ylim(X2.min(), X2.max()) 42 | for i, j in enumerate(np.unique(y_set)): 43 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 44 | c = ListedColormap(('red', 'green'))(i), label = j) 45 | plt.title('Classifier (Training set)') 46 | plt.xlabel('Age') 47 | plt.ylabel('Estimated Salary') 48 | plt.legend() 49 | plt.show() 50 | 51 | # Visualising the Test set results 52 | from matplotlib.colors import ListedColormap 53 | X_set, y_set = X_test, y_test 54 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 55 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 56 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 57 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 58 | plt.xlim(X1.min(), X1.max()) 59 | plt.ylim(X2.min(), X2.max()) 60 | for i, j in enumerate(np.unique(y_set)): 61 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 62 | c = ListedColormap(('red', 'green'))(i), label = j) 63 | plt.title('Classifier (Test set)') 64 | plt.xlabel('Age') 65 | plt.ylabel('Estimated Salary') 66 | plt.legend() 67 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 18 - Naive Bayes/Naive_Bayes/naive_bayes.R: -------------------------------------------------------------------------------- 1 | # Naive Bayes 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting SVM to the Training set 23 | # install.packages('e1071') 24 | library(e1071) 25 | classifier = naiveBayes(x = training_set[-3], 26 | y = training_set$Purchased) 27 | 28 | # Predicting the Test set results 29 | y_pred = predict(classifier, newdata = test_set[-3]) 30 | 31 | # Making the Confusion Matrix 32 | cm = table(test_set[, 3], y_pred) 33 | 34 | # Visualising the Training set results 35 | library(ElemStatLearn) 36 | set = training_set 37 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 38 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 39 | grid_set = expand.grid(X1, X2) 40 | colnames(grid_set) = c('Age', 'EstimatedSalary') 41 | y_grid = predict(classifier, newdata = grid_set) 42 | plot(set[, -3], 43 | main = 'SVM (Training set)', 44 | xlab = 'Age', ylab = 'Estimated Salary', 45 | xlim = range(X1), ylim = range(X2)) 46 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 47 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 48 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 49 | 50 | # Visualising the Test set results 51 | library(ElemStatLearn) 52 | set = test_set 53 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 54 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 55 | grid_set = expand.grid(X1, X2) 56 | colnames(grid_set) = c('Age', 'EstimatedSalary') 57 | y_grid = predict(classifier, newdata = grid_set) 58 | plot(set[, -3], main = 'SVM (Test set)', 59 | xlab = 'Age', ylab = 'Estimated Salary', 60 | xlim = range(X1), ylim = range(X2)) 61 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 62 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 63 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 18 - Naive Bayes/Naive_Bayes/naive_bayes.py: -------------------------------------------------------------------------------- 1 | # Naive Bayes 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.model_selection import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting Naive Bayes to the Training set 24 | from sklearn.naive_bayes import GaussianNB 25 | classifier = GaussianNB() 26 | classifier.fit(X_train, y_train) 27 | 28 | # Predicting the Test set results 29 | y_pred = classifier.predict(X_test) 30 | 31 | # Making the Confusion Matrix 32 | from sklearn.metrics import confusion_matrix 33 | cm = confusion_matrix(y_test, y_pred) 34 | 35 | # Visualising the Training set results 36 | from matplotlib.colors import ListedColormap 37 | X_set, y_set = X_train, y_train 38 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 39 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 40 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 41 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 42 | plt.xlim(X1.min(), X1.max()) 43 | plt.ylim(X2.min(), X2.max()) 44 | for i, j in enumerate(np.unique(y_set)): 45 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 46 | c = ListedColormap(('red', 'green'))(i), label = j) 47 | plt.title('Naive Bayes (Training set)') 48 | plt.xlabel('Age') 49 | plt.ylabel('Estimated Salary') 50 | plt.legend() 51 | plt.show() 52 | 53 | # Visualising the Test set results 54 | from matplotlib.colors import ListedColormap 55 | X_set, y_set = X_test, y_test 56 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 57 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 58 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 59 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 60 | plt.xlim(X1.min(), X1.max()) 61 | plt.ylim(X2.min(), X2.max()) 62 | for i, j in enumerate(np.unique(y_set)): 63 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 64 | c = ListedColormap(('red', 'green'))(i), label = j) 65 | plt.title('Naive Bayes (Test set)') 66 | plt.xlabel('Age') 67 | plt.ylabel('Estimated Salary') 68 | plt.legend() 69 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 19 - Decision Tree Classification/Decision_Tree_Classification/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 3 - Classification/Section 19 - Decision Tree Classification/Decision_Tree_Classification/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 19 - Decision Tree Classification/Decision_Tree_Classification/classification_template.R: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting classifier to the Training set 23 | # Create your classifier here 24 | 25 | # Predicting the Test set results 26 | y_pred = predict(classifier, newdata = test_set[-3]) 27 | 28 | # Making the Confusion Matrix 29 | cm = table(test_set[, 3], y_pred) 30 | 31 | # Visualising the Training set results 32 | library(ElemStatLearn) 33 | set = training_set 34 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 35 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 36 | grid_set = expand.grid(X1, X2) 37 | colnames(grid_set) = c('Age', 'EstimatedSalary') 38 | y_grid = predict(classifier, newdata = grid_set) 39 | plot(set[, -3], 40 | main = 'Classifier (Training set)', 41 | xlab = 'Age', ylab = 'Estimated Salary', 42 | xlim = range(X1), ylim = range(X2)) 43 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 44 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 45 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 46 | 47 | # Visualising the Test set results 48 | library(ElemStatLearn) 49 | set = test_set 50 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 51 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 52 | grid_set = expand.grid(X1, X2) 53 | colnames(grid_set) = c('Age', 'EstimatedSalary') 54 | y_grid = predict(classifier, newdata = grid_set) 55 | plot(set[, -3], main = 'Classifier (Test set)', 56 | xlab = 'Age', ylab = 'Estimated Salary', 57 | xlim = range(X1), ylim = range(X2)) 58 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 59 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 60 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 19 - Decision Tree Classification/Decision_Tree_Classification/classification_template.py: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting classifier to the Training set 24 | # Create your classifier here 25 | 26 | # Predicting the Test set results 27 | y_pred = classifier.predict(X_test) 28 | 29 | # Making the Confusion Matrix 30 | from sklearn.metrics import confusion_matrix 31 | cm = confusion_matrix(y_test, y_pred) 32 | 33 | # Visualising the Training set results 34 | from matplotlib.colors import ListedColormap 35 | X_set, y_set = X_train, y_train 36 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 37 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 38 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 39 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 40 | plt.xlim(X1.min(), X1.max()) 41 | plt.ylim(X2.min(), X2.max()) 42 | for i, j in enumerate(np.unique(y_set)): 43 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 44 | c = ListedColormap(('red', 'green'))(i), label = j) 45 | plt.title('Classifier (Training set)') 46 | plt.xlabel('Age') 47 | plt.ylabel('Estimated Salary') 48 | plt.legend() 49 | plt.show() 50 | 51 | # Visualising the Test set results 52 | from matplotlib.colors import ListedColormap 53 | X_set, y_set = X_test, y_test 54 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 55 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 56 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 57 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 58 | plt.xlim(X1.min(), X1.max()) 59 | plt.ylim(X2.min(), X2.max()) 60 | for i, j in enumerate(np.unique(y_set)): 61 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 62 | c = ListedColormap(('red', 'green'))(i), label = j) 63 | plt.title('Classifier (Test set)') 64 | plt.xlabel('Age') 65 | plt.ylabel('Estimated Salary') 66 | plt.legend() 67 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 19 - Decision Tree Classification/Decision_Tree_Classification/decision_tree_classification.R: -------------------------------------------------------------------------------- 1 | # Decision Tree Classification 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting Decision Tree Classification to the Training set 23 | # install.packages('rpart') 24 | library(rpart) 25 | classifier = rpart(formula = Purchased ~ ., 26 | data = training_set) 27 | 28 | # Predicting the Test set results 29 | y_pred = predict(classifier, newdata = test_set[-3], type = 'class') 30 | 31 | # Making the Confusion Matrix 32 | cm = table(test_set[, 3], y_pred) 33 | 34 | # Visualising the Training set results 35 | library(ElemStatLearn) 36 | set = training_set 37 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 38 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 39 | grid_set = expand.grid(X1, X2) 40 | colnames(grid_set) = c('Age', 'EstimatedSalary') 41 | y_grid = predict(classifier, newdata = grid_set, type = 'class') 42 | plot(set[, -3], 43 | main = 'Decision Tree Classification (Training set)', 44 | xlab = 'Age', ylab = 'Estimated Salary', 45 | xlim = range(X1), ylim = range(X2)) 46 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 47 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 48 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 49 | 50 | # Visualising the Test set results 51 | library(ElemStatLearn) 52 | set = test_set 53 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 54 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 55 | grid_set = expand.grid(X1, X2) 56 | colnames(grid_set) = c('Age', 'EstimatedSalary') 57 | y_grid = predict(classifier, newdata = grid_set, type = 'class') 58 | plot(set[, -3], main = 'Decision Tree Classification (Test set)', 59 | xlab = 'Age', ylab = 'Estimated Salary', 60 | xlim = range(X1), ylim = range(X2)) 61 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 62 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 63 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 64 | 65 | # Plotting the tree 66 | plot(classifier) 67 | text(classifier) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 19 - Decision Tree Classification/Decision_Tree_Classification/decision_tree_classification.py: -------------------------------------------------------------------------------- 1 | # Decision Tree Classification 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.model_selection import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting Decision Tree Classification to the Training set 24 | from sklearn.tree import DecisionTreeClassifier 25 | classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0) 26 | classifier.fit(X_train, y_train) 27 | 28 | # Predicting the Test set results 29 | y_pred = classifier.predict(X_test) 30 | 31 | # Making the Confusion Matrix 32 | from sklearn.metrics import confusion_matrix 33 | cm = confusion_matrix(y_test, y_pred) 34 | 35 | # Visualising the Training set results 36 | from matplotlib.colors import ListedColormap 37 | X_set, y_set = X_train, y_train 38 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 39 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 40 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 41 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 42 | plt.xlim(X1.min(), X1.max()) 43 | plt.ylim(X2.min(), X2.max()) 44 | for i, j in enumerate(np.unique(y_set)): 45 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 46 | c = ListedColormap(('red', 'green'))(i), label = j) 47 | plt.title('Decision Tree Classification (Training set)') 48 | plt.xlabel('Age') 49 | plt.ylabel('Estimated Salary') 50 | plt.legend() 51 | plt.show() 52 | 53 | # Visualising the Test set results 54 | from matplotlib.colors import ListedColormap 55 | X_set, y_set = X_test, y_test 56 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 57 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 58 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 59 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 60 | plt.xlim(X1.min(), X1.max()) 61 | plt.ylim(X2.min(), X2.max()) 62 | for i, j in enumerate(np.unique(y_set)): 63 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 64 | c = ListedColormap(('red', 'green'))(i), label = j) 65 | plt.title('Decision Tree Classification (Test set)') 66 | plt.xlabel('Age') 67 | plt.ylabel('Estimated Salary') 68 | plt.legend() 69 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 20 - Random Forest Classification/Random_Forest_Classification/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 3 - Classification/Section 20 - Random Forest Classification/Random_Forest_Classification/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 20 - Random Forest Classification/Random_Forest_Classification/classification_template.R: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting classifier to the Training set 23 | # Create your classifier here 24 | 25 | # Predicting the Test set results 26 | y_pred = predict(classifier, newdata = test_set[-3]) 27 | 28 | # Making the Confusion Matrix 29 | cm = table(test_set[, 3], y_pred) 30 | 31 | # Visualising the Training set results 32 | library(ElemStatLearn) 33 | set = training_set 34 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 35 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 36 | grid_set = expand.grid(X1, X2) 37 | colnames(grid_set) = c('Age', 'EstimatedSalary') 38 | y_grid = predict(classifier, newdata = grid_set) 39 | plot(set[, -3], 40 | main = 'Classifier (Training set)', 41 | xlab = 'Age', ylab = 'Estimated Salary', 42 | xlim = range(X1), ylim = range(X2)) 43 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 44 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 45 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 46 | 47 | # Visualising the Test set results 48 | library(ElemStatLearn) 49 | set = test_set 50 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 51 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 52 | grid_set = expand.grid(X1, X2) 53 | colnames(grid_set) = c('Age', 'EstimatedSalary') 54 | y_grid = predict(classifier, newdata = grid_set) 55 | plot(set[, -3], main = 'Classifier (Test set)', 56 | xlab = 'Age', ylab = 'Estimated Salary', 57 | xlim = range(X1), ylim = range(X2)) 58 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 59 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 60 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 20 - Random Forest Classification/Random_Forest_Classification/classification_template.py: -------------------------------------------------------------------------------- 1 | # Classification template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting classifier to the Training set 24 | # Create your classifier here 25 | 26 | # Predicting the Test set results 27 | y_pred = classifier.predict(X_test) 28 | 29 | # Making the Confusion Matrix 30 | from sklearn.metrics import confusion_matrix 31 | cm = confusion_matrix(y_test, y_pred) 32 | 33 | # Visualising the Training set results 34 | from matplotlib.colors import ListedColormap 35 | X_set, y_set = X_train, y_train 36 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 37 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 38 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 39 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 40 | plt.xlim(X1.min(), X1.max()) 41 | plt.ylim(X2.min(), X2.max()) 42 | for i, j in enumerate(np.unique(y_set)): 43 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 44 | c = ListedColormap(('red', 'green'))(i), label = j) 45 | plt.title('Classifier (Training set)') 46 | plt.xlabel('Age') 47 | plt.ylabel('Estimated Salary') 48 | plt.legend() 49 | plt.show() 50 | 51 | # Visualising the Test set results 52 | from matplotlib.colors import ListedColormap 53 | X_set, y_set = X_test, y_test 54 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 55 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 56 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 57 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 58 | plt.xlim(X1.min(), X1.max()) 59 | plt.ylim(X2.min(), X2.max()) 60 | for i, j in enumerate(np.unique(y_set)): 61 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 62 | c = ListedColormap(('red', 'green'))(i), label = j) 63 | plt.title('Classifier (Test set)') 64 | plt.xlabel('Age') 65 | plt.ylabel('Estimated Salary') 66 | plt.legend() 67 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 20 - Random Forest Classification/Random_Forest_Classification/random_forest_classification.R: -------------------------------------------------------------------------------- 1 | # Random Forest Classification 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Social_Network_Ads.csv') 5 | dataset = dataset[3:5] 6 | 7 | # Encoding the target feature as factor 8 | dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) 9 | 10 | # Splitting the dataset into the Training set and Test set 11 | # install.packages('caTools') 12 | library(caTools) 13 | set.seed(123) 14 | split = sample.split(dataset$Purchased, SplitRatio = 0.75) 15 | training_set = subset(dataset, split == TRUE) 16 | test_set = subset(dataset, split == FALSE) 17 | 18 | # Feature Scaling 19 | training_set[-3] = scale(training_set[-3]) 20 | test_set[-3] = scale(test_set[-3]) 21 | 22 | # Fitting Random Forest Classification to the Training set 23 | # install.packages('randomForest') 24 | library(randomForest) 25 | set.seed(123) 26 | classifier = randomForest(x = training_set[-3], 27 | y = training_set$Purchased, 28 | ntree = 500) 29 | 30 | # Predicting the Test set results 31 | y_pred = predict(classifier, newdata = test_set[-3]) 32 | 33 | # Making the Confusion Matrix 34 | cm = table(test_set[, 3], y_pred) 35 | 36 | # Visualising the Training set results 37 | library(ElemStatLearn) 38 | set = training_set 39 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 40 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 41 | grid_set = expand.grid(X1, X2) 42 | colnames(grid_set) = c('Age', 'EstimatedSalary') 43 | y_grid = predict(classifier, grid_set) 44 | plot(set[, -3], 45 | main = 'Random Forest Classification (Training set)', 46 | xlab = 'Age', ylab = 'Estimated Salary', 47 | xlim = range(X1), ylim = range(X2)) 48 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 49 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 50 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 51 | 52 | # Visualising the Test set results 53 | library(ElemStatLearn) 54 | set = test_set 55 | X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) 56 | X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) 57 | grid_set = expand.grid(X1, X2) 58 | colnames(grid_set) = c('Age', 'EstimatedSalary') 59 | y_grid = predict(classifier, grid_set) 60 | plot(set[, -3], main = 'Random Forest Classification (Test set)', 61 | xlab = 'Age', ylab = 'Estimated Salary', 62 | xlim = range(X1), ylim = range(X2)) 63 | contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) 64 | points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) 65 | points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) 66 | 67 | # Choosing the number of trees 68 | plot(classifier) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 3 - Classification/Section 20 - Random Forest Classification/Random_Forest_Classification/random_forest_classification.py: -------------------------------------------------------------------------------- 1 | # Random Forest Classification 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Social_Network_Ads.csv') 10 | X = dataset.iloc[:, [2, 3]].values 11 | y = dataset.iloc[:, 4].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.model_selection import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) 16 | 17 | # Feature Scaling 18 | from sklearn.preprocessing import StandardScaler 19 | sc = StandardScaler() 20 | X_train = sc.fit_transform(X_train) 21 | X_test = sc.transform(X_test) 22 | 23 | # Fitting Random Forest Classification to the Training set 24 | from sklearn.ensemble import RandomForestClassifier 25 | classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0) 26 | classifier.fit(X_train, y_train) 27 | 28 | # Predicting the Test set results 29 | y_pred = classifier.predict(X_test) 30 | 31 | # Making the Confusion Matrix 32 | from sklearn.metrics import confusion_matrix 33 | cm = confusion_matrix(y_test, y_pred) 34 | 35 | # Visualising the Training set results 36 | from matplotlib.colors import ListedColormap 37 | X_set, y_set = X_train, y_train 38 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 39 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 40 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 41 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 42 | plt.xlim(X1.min(), X1.max()) 43 | plt.ylim(X2.min(), X2.max()) 44 | for i, j in enumerate(np.unique(y_set)): 45 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 46 | c = ListedColormap(('red', 'green'))(i), label = j) 47 | plt.title('Random Forest Classification (Training set)') 48 | plt.xlabel('Age') 49 | plt.ylabel('Estimated Salary') 50 | plt.legend() 51 | plt.show() 52 | 53 | # Visualising the Test set results 54 | from matplotlib.colors import ListedColormap 55 | X_set, y_set = X_test, y_test 56 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 57 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 58 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 59 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 60 | plt.xlim(X1.min(), X1.max()) 61 | plt.ylim(X2.min(), X2.max()) 62 | for i, j in enumerate(np.unique(y_set)): 63 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 64 | c = ListedColormap(('red', 'green'))(i), label = j) 65 | plt.title('Random Forest Classification (Test set)') 66 | plt.xlabel('Age') 67 | plt.ylabel('Estimated Salary') 68 | plt.legend() 69 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 4 - Clustering/Section 24 - K-Means Clustering/K_Means/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 4 - Clustering/Section 24 - K-Means Clustering/K_Means/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 4 - Clustering/Section 24 - K-Means Clustering/K_Means/Mall_Customers.csv: -------------------------------------------------------------------------------- 1 | CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100) 2 | 0001,Male,19,15,39 3 | 0002,Male,21,15,81 4 | 0003,Female,20,16,6 5 | 0004,Female,23,16,77 6 | 0005,Female,31,17,40 7 | 0006,Female,22,17,76 8 | 0007,Female,35,18,6 9 | 0008,Female,23,18,94 10 | 0009,Male,64,19,3 11 | 0010,Female,30,19,72 12 | 0011,Male,67,19,14 13 | 0012,Female,35,19,99 14 | 0013,Female,58,20,15 15 | 0014,Female,24,20,77 16 | 0015,Male,37,20,13 17 | 0016,Male,22,20,79 18 | 0017,Female,35,21,35 19 | 0018,Male,20,21,66 20 | 0019,Male,52,23,29 21 | 0020,Female,35,23,98 22 | 0021,Male,35,24,35 23 | 0022,Male,25,24,73 24 | 0023,Female,46,25,5 25 | 0024,Male,31,25,73 26 | 0025,Female,54,28,14 27 | 0026,Male,29,28,82 28 | 0027,Female,45,28,32 29 | 0028,Male,35,28,61 30 | 0029,Female,40,29,31 31 | 0030,Female,23,29,87 32 | 0031,Male,60,30,4 33 | 0032,Female,21,30,73 34 | 0033,Male,53,33,4 35 | 0034,Male,18,33,92 36 | 0035,Female,49,33,14 37 | 0036,Female,21,33,81 38 | 0037,Female,42,34,17 39 | 0038,Female,30,34,73 40 | 0039,Female,36,37,26 41 | 0040,Female,20,37,75 42 | 0041,Female,65,38,35 43 | 0042,Male,24,38,92 44 | 0043,Male,48,39,36 45 | 0044,Female,31,39,61 46 | 0045,Female,49,39,28 47 | 0046,Female,24,39,65 48 | 0047,Female,50,40,55 49 | 0048,Female,27,40,47 50 | 0049,Female,29,40,42 51 | 0050,Female,31,40,42 52 | 0051,Female,49,42,52 53 | 0052,Male,33,42,60 54 | 0053,Female,31,43,54 55 | 0054,Male,59,43,60 56 | 0055,Female,50,43,45 57 | 0056,Male,47,43,41 58 | 0057,Female,51,44,50 59 | 0058,Male,69,44,46 60 | 0059,Female,27,46,51 61 | 0060,Male,53,46,46 62 | 0061,Male,70,46,56 63 | 0062,Male,19,46,55 64 | 0063,Female,67,47,52 65 | 0064,Female,54,47,59 66 | 0065,Male,63,48,51 67 | 0066,Male,18,48,59 68 | 0067,Female,43,48,50 69 | 0068,Female,68,48,48 70 | 0069,Male,19,48,59 71 | 0070,Female,32,48,47 72 | 0071,Male,70,49,55 73 | 0072,Female,47,49,42 74 | 0073,Female,60,50,49 75 | 0074,Female,60,50,56 76 | 0075,Male,59,54,47 77 | 0076,Male,26,54,54 78 | 0077,Female,45,54,53 79 | 0078,Male,40,54,48 80 | 0079,Female,23,54,52 81 | 0080,Female,49,54,42 82 | 0081,Male,57,54,51 83 | 0082,Male,38,54,55 84 | 0083,Male,67,54,41 85 | 0084,Female,46,54,44 86 | 0085,Female,21,54,57 87 | 0086,Male,48,54,46 88 | 0087,Female,55,57,58 89 | 0088,Female,22,57,55 90 | 0089,Female,34,58,60 91 | 0090,Female,50,58,46 92 | 0091,Female,68,59,55 93 | 0092,Male,18,59,41 94 | 0093,Male,48,60,49 95 | 0094,Female,40,60,40 96 | 0095,Female,32,60,42 97 | 0096,Male,24,60,52 98 | 0097,Female,47,60,47 99 | 0098,Female,27,60,50 100 | 0099,Male,48,61,42 101 | 0100,Male,20,61,49 102 | 0101,Female,23,62,41 103 | 0102,Female,49,62,48 104 | 0103,Male,67,62,59 105 | 0104,Male,26,62,55 106 | 0105,Male,49,62,56 107 | 0106,Female,21,62,42 108 | 0107,Female,66,63,50 109 | 0108,Male,54,63,46 110 | 0109,Male,68,63,43 111 | 0110,Male,66,63,48 112 | 0111,Male,65,63,52 113 | 0112,Female,19,63,54 114 | 0113,Female,38,64,42 115 | 0114,Male,19,64,46 116 | 0115,Female,18,65,48 117 | 0116,Female,19,65,50 118 | 0117,Female,63,65,43 119 | 0118,Female,49,65,59 120 | 0119,Female,51,67,43 121 | 0120,Female,50,67,57 122 | 0121,Male,27,67,56 123 | 0122,Female,38,67,40 124 | 0123,Female,40,69,58 125 | 0124,Male,39,69,91 126 | 0125,Female,23,70,29 127 | 0126,Female,31,70,77 128 | 0127,Male,43,71,35 129 | 0128,Male,40,71,95 130 | 0129,Male,59,71,11 131 | 0130,Male,38,71,75 132 | 0131,Male,47,71,9 133 | 0132,Male,39,71,75 134 | 0133,Female,25,72,34 135 | 0134,Female,31,72,71 136 | 0135,Male,20,73,5 137 | 0136,Female,29,73,88 138 | 0137,Female,44,73,7 139 | 0138,Male,32,73,73 140 | 0139,Male,19,74,10 141 | 0140,Female,35,74,72 142 | 0141,Female,57,75,5 143 | 0142,Male,32,75,93 144 | 0143,Female,28,76,40 145 | 0144,Female,32,76,87 146 | 0145,Male,25,77,12 147 | 0146,Male,28,77,97 148 | 0147,Male,48,77,36 149 | 0148,Female,32,77,74 150 | 0149,Female,34,78,22 151 | 0150,Male,34,78,90 152 | 0151,Male,43,78,17 153 | 0152,Male,39,78,88 154 | 0153,Female,44,78,20 155 | 0154,Female,38,78,76 156 | 0155,Female,47,78,16 157 | 0156,Female,27,78,89 158 | 0157,Male,37,78,1 159 | 0158,Female,30,78,78 160 | 0159,Male,34,78,1 161 | 0160,Female,30,78,73 162 | 0161,Female,56,79,35 163 | 0162,Female,29,79,83 164 | 0163,Male,19,81,5 165 | 0164,Female,31,81,93 166 | 0165,Male,50,85,26 167 | 0166,Female,36,85,75 168 | 0167,Male,42,86,20 169 | 0168,Female,33,86,95 170 | 0169,Female,36,87,27 171 | 0170,Male,32,87,63 172 | 0171,Male,40,87,13 173 | 0172,Male,28,87,75 174 | 0173,Male,36,87,10 175 | 0174,Male,36,87,92 176 | 0175,Female,52,88,13 177 | 0176,Female,30,88,86 178 | 0177,Male,58,88,15 179 | 0178,Male,27,88,69 180 | 0179,Male,59,93,14 181 | 0180,Male,35,93,90 182 | 0181,Female,37,97,32 183 | 0182,Female,32,97,86 184 | 0183,Male,46,98,15 185 | 0184,Female,29,98,88 186 | 0185,Female,41,99,39 187 | 0186,Male,30,99,97 188 | 0187,Female,54,101,24 189 | 0188,Male,28,101,68 190 | 0189,Female,41,103,17 191 | 0190,Female,36,103,85 192 | 0191,Female,34,103,23 193 | 0192,Female,32,103,69 194 | 0193,Male,33,113,8 195 | 0194,Female,38,113,91 196 | 0195,Female,47,120,16 197 | 0196,Female,35,120,79 198 | 0197,Female,45,126,28 199 | 0198,Male,32,126,74 200 | 0199,Male,32,137,18 201 | 0200,Male,30,137,83 -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 4 - Clustering/Section 24 - K-Means Clustering/K_Means/data_preprocessing_template.R: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Data.csv') 5 | 6 | # Splitting the dataset into the Training set and Test set 7 | # install.packages('caTools') 8 | library(caTools) 9 | set.seed(123) 10 | split = sample.split(dataset$DependentVariable, SplitRatio = 0.8) 11 | training_set = subset(dataset, split == TRUE) 12 | test_set = subset(dataset, split == FALSE) 13 | 14 | # Feature Scaling 15 | # training_set = scale(training_set) 16 | # test_set = scale(test_set) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 4 - Clustering/Section 24 - K-Means Clustering/K_Means/data_preprocessing_template.py: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Mall_Customers.csv') 10 | X = dataset.iloc[:, [3,4]].values 11 | 12 | from sklearn.cluster import KMeans 13 | wcss=[] 14 | for i in range(1,11): 15 | kmeans=KMeans(n_clusters=i,init ='k-means++',max_iter=300,n_init=10,random_state = 0) 16 | kmeans.fit(X) 17 | wcss.append(kmeans.inertia_) 18 | plt.plot(range(1,11),wcss) 19 | plt.title('The Elbow Method') 20 | plt.xlabel('Number of Cluster') 21 | plt.ylabel('WCSS') 22 | plt.show() 23 | 24 | kmeans=KMeans(n_clusters=5,init ='k-means++',max_iter=300,n_init=10,random_state = 0) 25 | y_kmeans = kmeans.fit_predict(X) 26 | 27 | 28 | plt.scatter(X[y_kmeans==0,0],X[y_kmeans==0, 1],s=100,c='red',label='Careful') 29 | plt.scatter(X[y_kmeans==1,0],X[y_kmeans==1, 1],s=100,c='blue',label='Standard') 30 | plt.scatter(X[y_kmeans==2,0],X[y_kmeans==2, 1],s=100,c='green',label='Target') 31 | plt.scatter(X[y_kmeans==3,0],X[y_kmeans==3, 1],s=100,c='cyan',label='careless') 32 | plt.scatter(X[y_kmeans==4,0],X[y_kmeans==4, 1],s=100,c='magenta',label='Sensible') 33 | plt.scatter(kmeans.cluster_centers_[:,0],kmeans.cluster_centers_[:, 1],s=300,c='yellow',label='Centroids') 34 | plt.title('Cluster of cleints') 35 | plt.xlabel('Annual income') 36 | plt.ylabel('Spending Score') 37 | plt.legend() 38 | plt.show() 39 | 40 | # Feature Scaling 41 | """from sklearn.preprocessing import StandardScaler 42 | sc_X = StandardScaler() 43 | X_train = sc_X.fit_transform(X_train) 44 | X_test = sc_X.transform(X_test) 45 | sc_y = StandardScaler() 46 | y_train = sc_y.fit_transform(y_train)""" 47 | -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 4 - Clustering/Section 24 - K-Means Clustering/K_Means/kmeans.R: -------------------------------------------------------------------------------- 1 | # K-Means Clustering 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Mall_Customers.csv') 5 | dataset = dataset[4:5] 6 | 7 | # Splitting the dataset into the Training set and Test set 8 | # install.packages('caTools') 9 | # library(caTools) 10 | # set.seed(123) 11 | # split = sample.split(dataset$DependentVariable, SplitRatio = 0.8) 12 | # training_set = subset(dataset, split == TRUE) 13 | # test_set = subset(dataset, split == FALSE) 14 | 15 | # Feature Scaling 16 | # training_set = scale(training_set) 17 | # test_set = scale(test_set) 18 | 19 | # Using the elbow method to find the optimal number of clusters 20 | set.seed(6) 21 | wcss = vector() 22 | for (i in 1:10) wcss[i] = sum(kmeans(dataset, i)$withinss) 23 | plot(1:10, 24 | wcss, 25 | type = 'b', 26 | main = paste('The Elbow Method'), 27 | xlab = 'Number of clusters', 28 | ylab = 'WCSS') 29 | 30 | # Fitting K-Means to the dataset 31 | set.seed(29) 32 | kmeans = kmeans(x = dataset, centers = 5) 33 | y_kmeans = kmeans$cluster 34 | 35 | # Visualising the clusters 36 | library(cluster) 37 | clusplot(dataset, 38 | y_kmeans, 39 | lines = 0, 40 | shade = TRUE, 41 | color = TRUE, 42 | labels = 2, 43 | plotchar = FALSE, 44 | span = TRUE, 45 | main = paste('Clusters of customers'), 46 | xlab = 'Annual Income', 47 | ylab = 'Spending Score') -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 4 - Clustering/Section 24 - K-Means Clustering/K_Means/kmeans.py: -------------------------------------------------------------------------------- 1 | # K-Means Clustering 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Mall_Customers.csv') 10 | X = dataset.iloc[:, [3, 4]].values 11 | # y = dataset.iloc[:, 3].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | """from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | 25 | # Using the elbow method to find the optimal number of clusters 26 | from sklearn.cluster import KMeans 27 | wcss = [] 28 | for i in range(1, 11): 29 | kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42) 30 | kmeans.fit(X) 31 | wcss.append(kmeans.inertia_) 32 | plt.plot(range(1, 11), wcss) 33 | plt.title('The Elbow Method') 34 | plt.xlabel('Number of clusters') 35 | plt.ylabel('WCSS') 36 | plt.show() 37 | 38 | # Fitting K-Means to the dataset 39 | kmeans = KMeans(n_clusters = 5, init = 'k-means++', random_state = 42) 40 | y_kmeans = kmeans.fit_predict(X) 41 | 42 | # Visualising the clusters 43 | plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1') 44 | plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2') 45 | plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3') 46 | plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4') 47 | plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5') 48 | plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids') 49 | plt.title('Clusters of customers') 50 | plt.xlabel('Annual Income (k$)') 51 | plt.ylabel('Spending Score (1-100)') 52 | plt.legend() 53 | plt.show() -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 4 - Clustering/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/Machine Learning A-Z/Part 4 - Clustering/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/.DS_Store -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 4 - Clustering/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/Mall_Customers.csv: -------------------------------------------------------------------------------- 1 | CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100) 2 | 0001,Male,19,15,39 3 | 0002,Male,21,15,81 4 | 0003,Female,20,16,6 5 | 0004,Female,23,16,77 6 | 0005,Female,31,17,40 7 | 0006,Female,22,17,76 8 | 0007,Female,35,18,6 9 | 0008,Female,23,18,94 10 | 0009,Male,64,19,3 11 | 0010,Female,30,19,72 12 | 0011,Male,67,19,14 13 | 0012,Female,35,19,99 14 | 0013,Female,58,20,15 15 | 0014,Female,24,20,77 16 | 0015,Male,37,20,13 17 | 0016,Male,22,20,79 18 | 0017,Female,35,21,35 19 | 0018,Male,20,21,66 20 | 0019,Male,52,23,29 21 | 0020,Female,35,23,98 22 | 0021,Male,35,24,35 23 | 0022,Male,25,24,73 24 | 0023,Female,46,25,5 25 | 0024,Male,31,25,73 26 | 0025,Female,54,28,14 27 | 0026,Male,29,28,82 28 | 0027,Female,45,28,32 29 | 0028,Male,35,28,61 30 | 0029,Female,40,29,31 31 | 0030,Female,23,29,87 32 | 0031,Male,60,30,4 33 | 0032,Female,21,30,73 34 | 0033,Male,53,33,4 35 | 0034,Male,18,33,92 36 | 0035,Female,49,33,14 37 | 0036,Female,21,33,81 38 | 0037,Female,42,34,17 39 | 0038,Female,30,34,73 40 | 0039,Female,36,37,26 41 | 0040,Female,20,37,75 42 | 0041,Female,65,38,35 43 | 0042,Male,24,38,92 44 | 0043,Male,48,39,36 45 | 0044,Female,31,39,61 46 | 0045,Female,49,39,28 47 | 0046,Female,24,39,65 48 | 0047,Female,50,40,55 49 | 0048,Female,27,40,47 50 | 0049,Female,29,40,42 51 | 0050,Female,31,40,42 52 | 0051,Female,49,42,52 53 | 0052,Male,33,42,60 54 | 0053,Female,31,43,54 55 | 0054,Male,59,43,60 56 | 0055,Female,50,43,45 57 | 0056,Male,47,43,41 58 | 0057,Female,51,44,50 59 | 0058,Male,69,44,46 60 | 0059,Female,27,46,51 61 | 0060,Male,53,46,46 62 | 0061,Male,70,46,56 63 | 0062,Male,19,46,55 64 | 0063,Female,67,47,52 65 | 0064,Female,54,47,59 66 | 0065,Male,63,48,51 67 | 0066,Male,18,48,59 68 | 0067,Female,43,48,50 69 | 0068,Female,68,48,48 70 | 0069,Male,19,48,59 71 | 0070,Female,32,48,47 72 | 0071,Male,70,49,55 73 | 0072,Female,47,49,42 74 | 0073,Female,60,50,49 75 | 0074,Female,60,50,56 76 | 0075,Male,59,54,47 77 | 0076,Male,26,54,54 78 | 0077,Female,45,54,53 79 | 0078,Male,40,54,48 80 | 0079,Female,23,54,52 81 | 0080,Female,49,54,42 82 | 0081,Male,57,54,51 83 | 0082,Male,38,54,55 84 | 0083,Male,67,54,41 85 | 0084,Female,46,54,44 86 | 0085,Female,21,54,57 87 | 0086,Male,48,54,46 88 | 0087,Female,55,57,58 89 | 0088,Female,22,57,55 90 | 0089,Female,34,58,60 91 | 0090,Female,50,58,46 92 | 0091,Female,68,59,55 93 | 0092,Male,18,59,41 94 | 0093,Male,48,60,49 95 | 0094,Female,40,60,40 96 | 0095,Female,32,60,42 97 | 0096,Male,24,60,52 98 | 0097,Female,47,60,47 99 | 0098,Female,27,60,50 100 | 0099,Male,48,61,42 101 | 0100,Male,20,61,49 102 | 0101,Female,23,62,41 103 | 0102,Female,49,62,48 104 | 0103,Male,67,62,59 105 | 0104,Male,26,62,55 106 | 0105,Male,49,62,56 107 | 0106,Female,21,62,42 108 | 0107,Female,66,63,50 109 | 0108,Male,54,63,46 110 | 0109,Male,68,63,43 111 | 0110,Male,66,63,48 112 | 0111,Male,65,63,52 113 | 0112,Female,19,63,54 114 | 0113,Female,38,64,42 115 | 0114,Male,19,64,46 116 | 0115,Female,18,65,48 117 | 0116,Female,19,65,50 118 | 0117,Female,63,65,43 119 | 0118,Female,49,65,59 120 | 0119,Female,51,67,43 121 | 0120,Female,50,67,57 122 | 0121,Male,27,67,56 123 | 0122,Female,38,67,40 124 | 0123,Female,40,69,58 125 | 0124,Male,39,69,91 126 | 0125,Female,23,70,29 127 | 0126,Female,31,70,77 128 | 0127,Male,43,71,35 129 | 0128,Male,40,71,95 130 | 0129,Male,59,71,11 131 | 0130,Male,38,71,75 132 | 0131,Male,47,71,9 133 | 0132,Male,39,71,75 134 | 0133,Female,25,72,34 135 | 0134,Female,31,72,71 136 | 0135,Male,20,73,5 137 | 0136,Female,29,73,88 138 | 0137,Female,44,73,7 139 | 0138,Male,32,73,73 140 | 0139,Male,19,74,10 141 | 0140,Female,35,74,72 142 | 0141,Female,57,75,5 143 | 0142,Male,32,75,93 144 | 0143,Female,28,76,40 145 | 0144,Female,32,76,87 146 | 0145,Male,25,77,12 147 | 0146,Male,28,77,97 148 | 0147,Male,48,77,36 149 | 0148,Female,32,77,74 150 | 0149,Female,34,78,22 151 | 0150,Male,34,78,90 152 | 0151,Male,43,78,17 153 | 0152,Male,39,78,88 154 | 0153,Female,44,78,20 155 | 0154,Female,38,78,76 156 | 0155,Female,47,78,16 157 | 0156,Female,27,78,89 158 | 0157,Male,37,78,1 159 | 0158,Female,30,78,78 160 | 0159,Male,34,78,1 161 | 0160,Female,30,78,73 162 | 0161,Female,56,79,35 163 | 0162,Female,29,79,83 164 | 0163,Male,19,81,5 165 | 0164,Female,31,81,93 166 | 0165,Male,50,85,26 167 | 0166,Female,36,85,75 168 | 0167,Male,42,86,20 169 | 0168,Female,33,86,95 170 | 0169,Female,36,87,27 171 | 0170,Male,32,87,63 172 | 0171,Male,40,87,13 173 | 0172,Male,28,87,75 174 | 0173,Male,36,87,10 175 | 0174,Male,36,87,92 176 | 0175,Female,52,88,13 177 | 0176,Female,30,88,86 178 | 0177,Male,58,88,15 179 | 0178,Male,27,88,69 180 | 0179,Male,59,93,14 181 | 0180,Male,35,93,90 182 | 0181,Female,37,97,32 183 | 0182,Female,32,97,86 184 | 0183,Male,46,98,15 185 | 0184,Female,29,98,88 186 | 0185,Female,41,99,39 187 | 0186,Male,30,99,97 188 | 0187,Female,54,101,24 189 | 0188,Male,28,101,68 190 | 0189,Female,41,103,17 191 | 0190,Female,36,103,85 192 | 0191,Female,34,103,23 193 | 0192,Female,32,103,69 194 | 0193,Male,33,113,8 195 | 0194,Female,38,113,91 196 | 0195,Female,47,120,16 197 | 0196,Female,35,120,79 198 | 0197,Female,45,126,28 199 | 0198,Male,32,126,74 200 | 0199,Male,32,137,18 201 | 0200,Male,30,137,83 -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 4 - Clustering/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/data_preprocessing_template.R: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Data.csv') 5 | 6 | # Splitting the dataset into the Training set and Test set 7 | # install.packages('caTools') 8 | library(caTools) 9 | set.seed(123) 10 | split = sample.split(dataset$DependentVariable, SplitRatio = 0.8) 11 | training_set = subset(dataset, split == TRUE) 12 | test_set = subset(dataset, split == FALSE) 13 | 14 | # Feature Scaling 15 | # training_set = scale(training_set) 16 | # test_set = scale(test_set) -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 4 - Clustering/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/data_preprocessing_template.py: -------------------------------------------------------------------------------- 1 | # Data Preprocessing Template 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Data.csv') 10 | X = dataset.iloc[:, :-1].values 11 | y = dataset.iloc[:, 3].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 4 - Clustering/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/hc.R: -------------------------------------------------------------------------------- 1 | # Hierarchical Clustering 2 | 3 | # Importing the dataset 4 | dataset = read.csv('Mall_Customers.csv') 5 | dataset = dataset[4:5] 6 | 7 | # Splitting the dataset into the Training set and Test set 8 | # install.packages('caTools') 9 | # library(caTools) 10 | # set.seed(123) 11 | # split = sample.split(dataset$DependentVariable, SplitRatio = 0.8) 12 | # training_set = subset(dataset, split == TRUE) 13 | # test_set = subset(dataset, split == FALSE) 14 | 15 | # Feature Scaling 16 | # training_set = scale(training_set) 17 | # test_set = scale(test_set) 18 | 19 | # Using the dendrogram to find the optimal number of clusters 20 | dendrogram = hclust(d = dist(dataset, method = 'euclidean'), method = 'ward.D') 21 | plot(dendrogram, 22 | main = paste('Dendrogram'), 23 | xlab = 'Customers', 24 | ylab = 'Euclidean distances') 25 | 26 | # Fitting Hierarchical Clustering to the dataset 27 | hc = hclust(d = dist(dataset, method = 'euclidean'), method = 'ward.D') 28 | y_hc = cutree(hc, 5) 29 | 30 | # Visualising the clusters 31 | library(cluster) 32 | clusplot(dataset, 33 | y_hc, 34 | lines = 0, 35 | shade = TRUE, 36 | color = TRUE, 37 | labels= 2, 38 | plotchar = FALSE, 39 | span = TRUE, 40 | main = paste('Clusters of customers'), 41 | xlab = 'Annual Income', 42 | ylab = 'Spending Score') -------------------------------------------------------------------------------- /udemy/Machine Learning A-Z/Part 4 - Clustering/Section 25 - Hierarchical Clustering/Hierarchical_Clustering/hc.py: -------------------------------------------------------------------------------- 1 | # Hierarchical Clustering 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Mall_Customers.csv') 10 | X = dataset.iloc[:, [3, 4]].values 11 | # y = dataset.iloc[:, 3].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | """from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | 25 | # Using the dendrogram to find the optimal number of clusters 26 | import scipy.cluster.hierarchy as sch 27 | dendrogram = sch.dendrogram(sch.linkage(X, method = 'ward')) 28 | plt.title('Dendrogram') 29 | plt.xlabel('Customers') 30 | plt.ylabel('Euclidean distances') 31 | plt.show() 32 | 33 | # Fitting Hierarchical Clustering to the dataset 34 | from sklearn.cluster import AgglomerativeClustering 35 | hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward') 36 | y_hc = hc.fit_predict(X) 37 | 38 | # Visualising the clusters 39 | plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s = 100, c = 'red', label = 'Cluster 1') 40 | plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1], s = 100, c = 'blue', label = 'Cluster 2') 41 | plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s = 100, c = 'green', label = 'Cluster 3') 42 | plt.scatter(X[y_hc == 3, 0], X[y_hc == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4') 43 | plt.scatter(X[y_hc == 4, 0], X[y_hc == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5') 44 | plt.title('Clusters of customers') 45 | plt.xlabel('Annual Income (k$)') 46 | plt.ylabel('Spending Score (1-100)') 47 | plt.legend() 48 | plt.show() -------------------------------------------------------------------------------- /udemy/QnA.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apurva-tech/Machine-learning/680135fe94bcfe6f4da354172a4799c0aa0eb1e3/udemy/QnA.pdf --------------------------------------------------------------------------------