├── README.md └── breast_cancer_analysis.py /README.md: -------------------------------------------------------------------------------- 1 | # vishabh 2 | Breast Cancer data analysis 3 | http://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+%28diagnostic%29 4 | -------------------------------------------------------------------------------- /breast_cancer_analysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Aug 29 01:04:23 2018 4 | 5 | @author: 1022316 6 | """ 7 | 8 | #Classification of cancer dignosis 9 | #importing the libraries 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | import pandas as pd 13 | 14 | #importing the dataset 15 | dataset = pd.read_csv('C:\Machine learning\cancer_data\cancer.csv') 16 | X = dataset.iloc[:, 1:31].values 17 | Y = dataset.iloc[:, 31].values 18 | 19 | dataset.head() 20 | 21 | print("Cancer data set dimensions : {}".format(dataset.shape)) 22 | 23 | dataset.groupby('diagnosis').size() 24 | 25 | #Visualization of data 26 | dataset.groupby('diagnosis').hist(figsize=(12, 12)) 27 | 28 | dataset.isnull().sum() 29 | dataset.isna().sum() 30 | 31 | dataframe = pd.DataFrame(Y) 32 | #Encoding categorical data values 33 | from sklearn.preprocessing import LabelEncoder 34 | labelencoder_Y = LabelEncoder() 35 | Y = labelencoder_Y.fit_transform(Y) 36 | 37 | 38 | # Splitting the dataset into the Training set and Test set 39 | from sklearn.model_selection import train_test_split 40 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.25, random_state = 0) 41 | 42 | 43 | #Feature Scaling 44 | from sklearn.preprocessing import StandardScaler 45 | sc = StandardScaler() 46 | X_train = sc.fit_transform(X_train) 47 | X_test = sc.transform(X_test) 48 | 49 | #Fitting the Logistic Regression Algorithm to the Training Set 50 | from sklearn.linear_model import LogisticRegression 51 | classifier = LogisticRegression(random_state = 0) 52 | classifier.fit(X_train, Y_train) 53 | #95.8 Acuracy 54 | 55 | #Fitting K-NN Algorithm 56 | from sklearn.neighbors import KNeighborsClassifier 57 | classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2) 58 | classifier.fit(X_train, Y_train) 59 | #95.1 Acuracy 60 | 61 | #Fitting SVM 62 | from sklearn.svm import SVC 63 | classifier = SVC(kernel = 'linear', random_state = 0) 64 | classifier.fit(X_train, Y_train) 65 | #97.2 Acuracy 66 | 67 | #Fitting K-SVM 68 | from sklearn.svm import SVC 69 | classifier = SVC(kernel = 'rbf', random_state = 0) 70 | classifier.fit(X_train, Y_train) 71 | #96.5 Acuracy 72 | 73 | #Fitting Naive_Bayes 74 | from sklearn.naive_bayes import GaussianNB 75 | classifier = GaussianNB() 76 | classifier.fit(X_train, Y_train) 77 | #91.6 Acuracy 78 | 79 | #Fitting Decision Tree Algorithm 80 | from sklearn.tree import DecisionTreeClassifier 81 | classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0) 82 | classifier.fit(X_train, Y_train) 83 | #95.8 Acuracy 84 | 85 | #Fitting Random Forest Classification Algorithm 86 | from sklearn.ensemble import RandomForestClassifier 87 | classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0) 88 | classifier.fit(X_train, Y_train) 89 | #98.6 Acuracy 90 | 91 | #predicting the Test set results 92 | Y_pred = classifier.predict(X_test) 93 | 94 | #Creating the confusion Matrix 95 | from sklearn.metrics import confusion_matrix 96 | cm = confusion_matrix(Y_test, Y_pred) 97 | c = print(cm[0, 0] + cm[1, 1]) 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | --------------------------------------------------------------------------------