├── INTERNSHALA ├── 2 Understanding the Statistics for Data science │ ├── chi_square.csv │ ├── Data for paired t test.xlsx │ ├── Data for 2 sample test.csv │ ├── Data for paired t test.csv │ ├── onesample.csv │ ├── mean.csv │ ├── mode.csv │ ├── Histogram.csv │ ├── Median.csv │ ├── variance.csv │ ├── Frequency Table.csv │ ├── Spread of Data.csv │ ├── Standard Deviation.csv │ ├── mean_robust.csv │ ├── 12 Chi square.ipynb │ ├── 11 Paired T-test.ipynb │ ├── 9 T-test( one sample ).ipynb │ ├── 7 Frequency table.ipynb │ ├── 4 Spread of Data.ipynb │ ├── 5 Variance.ipynb │ ├── 3 Median.ipynb │ ├── 6 Standard Deviation.ipynb │ ├── 10 2 sample T-test.ipynb │ ├── 2 Mean.ipynb │ ├── 8 Histogram.ipynb │ └── 1 Mode.ipynb ├── 1 Python Basics │ ├── data.xlsx │ ├── 2 Python for DataScience.ipynb │ ├── 5 Dictionaries.ipynb │ ├── 3 loop.ipynb │ ├── 4 List.ipynb │ ├── 1 Python Basics.ipynb │ └── Python_Coding_Challenge_Solution.ipynb ├── Final Project │ ├── Problem Statement.pdf │ └── solution_checker.xlsx ├── Module and Final Test │ ├── final test pdf.pdf │ ├── stat answers.pdf │ └── predictive answers.pdf └── 3 Predictive modeling and ML │ ├── 3 Decesion Tree │ ├── Decision tree simplified.odt │ └── Decision Tree.ipynb │ ├── 2 Logistic Regression │ ├── How to decide the threshold .odt │ └── logistic Regression.ipynb │ ├── 6 Transforming variables test.ipynb │ ├── 1 Reading the data into Python.ipynb │ ├── 2 Variable Identification.ipynb │ └── 4 K means │ └── K-Means.ipynb └── README.md /INTERNSHALA/2 Understanding the Statistics for Data science/chi_square.csv: -------------------------------------------------------------------------------- 1 | Event,Observed,Expected Successful,41,33 Unsuccessful,59,67 -------------------------------------------------------------------------------- /INTERNSHALA/1 Python Basics/data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sarimurrab/Summer-Training-Internshala/HEAD/INTERNSHALA/1 Python Basics/data.xlsx -------------------------------------------------------------------------------- /INTERNSHALA/Final Project/Problem Statement.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sarimurrab/Summer-Training-Internshala/HEAD/INTERNSHALA/Final Project/Problem Statement.pdf -------------------------------------------------------------------------------- /INTERNSHALA/Final Project/solution_checker.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sarimurrab/Summer-Training-Internshala/HEAD/INTERNSHALA/Final Project/solution_checker.xlsx -------------------------------------------------------------------------------- /INTERNSHALA/Module and Final Test/final test pdf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sarimurrab/Summer-Training-Internshala/HEAD/INTERNSHALA/Module and Final Test/final test pdf.pdf -------------------------------------------------------------------------------- /INTERNSHALA/Module and Final Test/stat answers.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sarimurrab/Summer-Training-Internshala/HEAD/INTERNSHALA/Module and Final Test/stat answers.pdf -------------------------------------------------------------------------------- /INTERNSHALA/Module and Final Test/predictive answers.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sarimurrab/Summer-Training-Internshala/HEAD/INTERNSHALA/Module and Final Test/predictive answers.pdf -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/Data for paired t test.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sarimurrab/Summer-Training-Internshala/HEAD/INTERNSHALA/2 Understanding the Statistics for Data science/Data for paired t test.xlsx -------------------------------------------------------------------------------- /INTERNSHALA/3 Predictive modeling and ML/3 Decesion Tree/Decision tree simplified.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sarimurrab/Summer-Training-Internshala/HEAD/INTERNSHALA/3 Predictive modeling and ML/3 Decesion Tree/Decision tree simplified.odt -------------------------------------------------------------------------------- /INTERNSHALA/3 Predictive modeling and ML/2 Logistic Regression/How to decide the threshold .odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sarimurrab/Summer-Training-Internshala/HEAD/INTERNSHALA/3 Predictive modeling and ML/2 Logistic Regression/How to decide the threshold .odt -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/Data for 2 sample test.csv: -------------------------------------------------------------------------------- 1 | Hauz Khas,Defence Colony 2 | 180,220 3 | 100,200 4 | 120,240 5 | 220,180 6 | 160,160 7 | 100,260 8 | 140,280 9 | 260,300 10 | 240,240 11 | 260,220 12 | 180,260 13 | 160,160 14 | 200,180 15 | 120,220 -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/Data for paired t test.csv: -------------------------------------------------------------------------------- 1 | Errors using typewriter,Errors using a computer 2 | 8,8 3 | 13,8 4 | 10,4 5 | 7,9 6 | 13,10 7 | 10,10 8 | 12,4 9 | 9,5 10 | 6,7 11 | 5,9 12 | 9,12 13 | 8,7 14 | 12,6 15 | 12,9 16 | 8,7 17 | 7,9 18 | 12,6 19 | 13,7 20 | 15,4 21 | 10,7 22 | 7,5 23 | 13,6 24 | 9,6 25 | 10,6 26 | 7,6 27 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/onesample.csv: -------------------------------------------------------------------------------- 1 | Insect Length,,,, 2 | 6.55,,,, 3 | 6.67,,,, 4 | 6.56,,,, 5 | 6.63,,,, 6 | 6.39,,,, 7 | 6.33,,,, 8 | 6.28,,,, 9 | 6.75,,,, 10 | 6.86,,,, 11 | 6.90,,,, 12 | 6.90,,,, 13 | 6.90,,,, 14 | 6.30,,,, 15 | 6.90,,,, 16 | 6.22,,,, 17 | 6.40,,,, 18 | 6.42,,,, 19 | 6.43,,,, 20 | 6.54,,,, 21 | 6.78,,,, 22 | 5.50,,,, 23 | 6.92,,,, 24 | 6.66,,,, 25 | 5.20,,,, 26 | 6.11,,,, 27 | 6.27,,,, 28 | 6.82,,,, 29 | 6.60,,,, 30 | 6.49,,,, -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 1. INTRODUCTION TO DATA SCIENCE 2 | 3 | Get an overview of data science. Understand its different applications and get insights on how is data science disrupting industries. 4 | 5 | # 2. PYTHON FOR DATA SCIENCE 6 | 7 | Learn the basics of python - how to read a CSV file, understand operators, variables, dictionaries, functions, and data structure in data science. 8 | 9 | # 3. UNDERSTANDING THE STATISTICS FOR DATA SCIENCE 10 | 11 | Learn about data distribution, probability, data statistics and types of testing. Understand the concepts of Descriptive stats and Inferential stats. 12 | 13 | # 4. PREDICTIVE MODELING AND BASICS OF MACHINE LEARNING 14 | 15 | Learn about predictive models & their stages, data extraction & exploration, univariate & bivariate analysis, model building, and linear regression. 16 | 17 | # FINAL PROJECT 18 | # Problem Statement 19 | Your client is a retail banking institution. Term deposits are a major source 20 | of income for a bank. 21 | A term deposit is a cash investment held at a financial institution. Your 22 | money is invested for an agreed rate of interest over a fixed amount of 23 | time, or term. 24 | The bank has various outreach plans to sell term deposits to their 25 | customers such as email marketing, advertisements, telephonic marketing 26 | and digital marketing. 27 | Telephonic marketing campaigns still remain one of the most effective way 28 | to reach out to people. However, they require huge investment as large call 29 | centers are hired to actually execute these campaigns. Hence, it is crucial 30 | to identify the customers most likely to convert beforehand so that they can 31 | be specifically targeted via call. 32 | You are provided with the client data such as : age of the client, their job 33 | type, their marital status, etc. Along with the client data, you are also 34 | provided with the information of the call such as the duration of the call, day 35 | and month of the call, etc. Given this information, your task is to predict if 36 | the client will subscribe to term deposit. 37 | -------------------------------------------------------------------------------- /INTERNSHALA/1 Python Basics/2 Python for DataScience.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "'sarim32'" 12 | ] 13 | }, 14 | "execution_count": 3, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "\"sarim\"+ str(32)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 6, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "ename": "NameError", 30 | "evalue": "name 'factor' is not defined", 31 | "output_type": "error", 32 | "traceback": [ 33 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 34 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", 35 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;34m\"sarim\"\u001b[0m\u001b[1;33m+\u001b[0m \u001b[0mfactor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m32\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 36 | "\u001b[1;31mNameError\u001b[0m: name 'factor' is not defined" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "\"sarim\"+ factor(32)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [] 50 | } 51 | ], 52 | "metadata": { 53 | "kernelspec": { 54 | "display_name": "Python 3", 55 | "language": "python", 56 | "name": "python3" 57 | }, 58 | "language_info": { 59 | "codemirror_mode": { 60 | "name": "ipython", 61 | "version": 3 62 | }, 63 | "file_extension": ".py", 64 | "mimetype": "text/x-python", 65 | "name": "python", 66 | "nbconvert_exporter": "python", 67 | "pygments_lexer": "ipython3", 68 | "version": "3.7.1" 69 | } 70 | }, 71 | "nbformat": 4, 72 | "nbformat_minor": 2 73 | } 74 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/mean.csv: -------------------------------------------------------------------------------- 1 | Student,Overall Marks,Gender,Subject 2 | Anish,97,Male,English 3 | Rishabh,83,Male,English 4 | Ajay,78,Male,Physics 5 | Abhinav,86,Male,Chemistry 6 | Anurag,91,Male,English 7 | Ajeet,15,Male,Physics 8 | Varun,83,Male,Chemistry 9 | Rajeev,64,Male,Chemistry 10 | sanjay,66,Male,Chemistry 11 | NISHA,58,Female,Physics 12 | SURESH,62,Male,Physics 13 | Aniket,62,Male,English 14 | manu,20,Male,Physics 15 | ALOK,85,Male,English 16 | Pranav,49,Male,Chemistry 17 | Deep,92,Male,English 18 | Prashant,21,Male,Physics 19 | Kunal,15,Male,Physics 20 | Soham,46,Male,Chemistry 21 | mayur,100,Male,Chemistry 22 | shaan,18,Male,Chemistry 23 | shivangi,70,Female,Physics 24 | Sam,82,Female,English 25 | Dhruv,88,Male,English 26 | tushar,88,Male,English 27 | Neelam,100,Female,Chemistry 28 | yash,86,Male,Physics 29 | Nitin,60,Male,English 30 | Arun,88,Male,Chemistry 31 | Sumit,19,Male,Physics 32 | manish,81,Male,English 33 | SHAIL,74,Male,Chemistry 34 | Parth,94,Male,Physics 35 | kumar,72,Male,Physics 36 | shyam,77,Male,Chemistry 37 | vikas,75,Male,Chemistry 38 | Manoj,70,Male,English 39 | Siddharth,18,Male,Chemistry 40 | Mayank,51,Male,Physics 41 | swati,81,Female,Physics 42 | anamika,23,Female,English 43 | pawan,77,Male,Physics 44 | krithika,5,Female,Physics 45 | Deepak,100,Male,Chemistry 46 | Jatin,85,Male,Chemistry 47 | PRATEEK,80,Male,Chemistry 48 | Deepro,84,Male,Physics 49 | atul,63,Male,English 50 | Neeraj,67,Male,English 51 | SUNNY,81,Male,English 52 | Shashank,83,Male,Chemistry 53 | Nishant,89,Male,Physics 54 | Aryan,60,Male,English 55 | riya,47,Female,Chemistry 56 | Ashish,82,Male,Physics 57 | shivam,79,Male,English 58 | GIRISH,88,Male,Chemistry 59 | Sunil,50,Male,Physics 60 | Raju,88,Male,Physics 61 | gokul,40,Male,Chemistry 62 | rakesh,84,Male,Chemistry 63 | krish,83,Male,English 64 | Aditya,87,Male,Chemistry 65 | Ira,48,Female,Physics 66 | Arjun,47,Male,Physics 67 | ROHIT,88,Male,English 68 | Harish,51,Male,Physics 69 | Ramanan,45,Male,Physics 70 | MOHIT,94,Male,Chemistry 71 | Raj,88,Male,Chemistry 72 | Abhishek,91,Male,Chemistry 73 | simran,0,Female,Physics 74 | Angel,99,Female,English 75 | juvina,78,Female,English 76 | ajith,63,Male,English 77 | Tanya,98,Female,Chemistry 78 | Vaibhav,98,Male,Physics 79 | Sneha,98,Female,English 80 | prince,64,Male,Chemistry 81 | jay,81,Male,Physics 82 | Avinash,76,Male,English 83 | sanchit,80,Male,Chemistry 84 | Rohan,97,Male,Physics 85 | dinesh,18,Male,Physics 86 | Lily,90,Female,Chemistry 87 | diksha,71,Female,Chemistry 88 | vishal,67,Male,English 89 | Rutuja,73,Female,Chemistry 90 | akash,93,Male,Physics 91 | mahesh,78,Male,Physics 92 | vedant,74,Male,English 93 | Ram,99,Male,Physics 94 | Dawn,73,Female,Physics 95 | ankur,17,Male,Chemistry 96 | Smriti,0,Female,Chemistry 97 | Aaditya,77,Male,Chemistry 98 | vivek,55,Male,Physics 99 | Tisha,7,Female,English 100 | Raghav,73,Male,English 101 | Ishita,82,Female,Chemistry 102 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/mode.csv: -------------------------------------------------------------------------------- 1 | Student,Overall Marks,Gender,Subject 2 | Anish,97,Male,English 3 | Rishabh,83,Male,English 4 | Ajay,78,Male,Physics 5 | Abhinav,86,Male,Chemistry 6 | Anurag,91,Male,English 7 | Ajeet,15,Male,Physics 8 | Varun,83,Male,Chemistry 9 | Rajeev,64,Male,Chemistry 10 | sanjay,66,Male,Chemistry 11 | NISHA,58,Female,Physics 12 | SURESH,62,Male,Physics 13 | Aniket,62,Male,English 14 | manu,20,Male,Physics 15 | ALOK,85,Male,English 16 | Pranav,49,Male,Chemistry 17 | Deep,92,Male,English 18 | Prashant,21,Male,Physics 19 | Kunal,15,Male,Physics 20 | Soham,46,Male,Chemistry 21 | mayur,100,Male,Chemistry 22 | shaan,18,Male,Chemistry 23 | shivangi,70,Female,Physics 24 | Sam,82,Female,English 25 | Dhruv,88,Male,English 26 | tushar,88,Male,English 27 | Neelam,100,Female,Chemistry 28 | yash,86,Male,Physics 29 | Nitin,60,Male,English 30 | Arun,88,Male,Chemistry 31 | Sumit,19,Male,Physics 32 | manish,81,Male,English 33 | SHAIL,74,Male,Chemistry 34 | Parth,94,Male,Physics 35 | kumar,72,Male,Physics 36 | shyam,77,Male,Chemistry 37 | vikas,75,Male,Chemistry 38 | Manoj,70,Male,English 39 | Siddharth,18,Male,Chemistry 40 | Mayank,51,Male,Physics 41 | swati,81,Female,Physics 42 | anamika,23,Female,English 43 | pawan,77,Male,Physics 44 | krithika,5,Female,Physics 45 | Deepak,100,Male,Chemistry 46 | Jatin,85,Male,Chemistry 47 | PRATEEK,80,Male,Chemistry 48 | Deepro,84,Male,Physics 49 | atul,63,Male,English 50 | Neeraj,67,Male,English 51 | SUNNY,81,Male,English 52 | Shashank,83,Male,Chemistry 53 | Nishant,89,Male,Physics 54 | Aryan,60,Male,English 55 | riya,47,Female,Chemistry 56 | Ashish,82,Male,Physics 57 | shivam,79,Male,English 58 | GIRISH,88,Male,Chemistry 59 | Sunil,50,Male,Physics 60 | Raju,88,Male,Physics 61 | gokul,40,Male,Chemistry 62 | rakesh,84,Male,Chemistry 63 | krish,83,Male,English 64 | Aditya,87,Male,Chemistry 65 | Ira,48,Female,Physics 66 | Arjun,47,Male,Physics 67 | ROHIT,88,Male,English 68 | Harish,51,Male,Physics 69 | Ramanan,45,Male,Physics 70 | MOHIT,94,Male,Chemistry 71 | Raj,88,Male,Chemistry 72 | Abhishek,91,Male,Chemistry 73 | simran,0,Female,Physics 74 | Angel,99,Female,English 75 | juvina,78,Female,English 76 | ajith,63,Male,English 77 | Tanya,98,Female,Chemistry 78 | Vaibhav,98,Male,Physics 79 | Sneha,98,Female,English 80 | prince,64,Male,Chemistry 81 | jay,81,Male,Physics 82 | Avinash,76,Male,English 83 | sanchit,80,Male,Chemistry 84 | Rohan,97,Male,Physics 85 | dinesh,18,Male,Physics 86 | Lily,90,Female,Chemistry 87 | diksha,71,Female,Chemistry 88 | vishal,67,Male,English 89 | Rutuja,73,Female,Chemistry 90 | akash,93,Male,Physics 91 | mahesh,78,Male,Physics 92 | vedant,74,Male,English 93 | Ram,99,Male,Physics 94 | Dawn,73,Female,Physics 95 | ankur,17,Male,Chemistry 96 | Smriti,0,Female,Chemistry 97 | Aaditya,77,Male,Chemistry 98 | vivek,55,Male,Physics 99 | Tisha,7,Female,English 100 | Raghav,73,Male,English 101 | Ishita,82,Female,Chemistry 102 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/Histogram.csv: -------------------------------------------------------------------------------- 1 | Student,Overall Marks,Gender,Subject 2 | Anish,97,Male,English 3 | Rishabh,83,Male,English 4 | Ajay,78,Male,Physics 5 | Abhinav,86,Male,Chemistry 6 | Anurag,91,Male,English 7 | Ajeet,15,Male,Physics 8 | Varun,83,Male,Chemistry 9 | Rajeev,64,Male,Chemistry 10 | sanjay,66,Male,Chemistry 11 | NISHA,58,Female,Physics 12 | SURESH,62,Male,Physics 13 | Aniket,62,Male,English 14 | manu,20,Male,Physics 15 | ALOK,85,Male,English 16 | Pranav,49,Male,Chemistry 17 | Deep,92,Male,English 18 | Prashant,21,Male,Physics 19 | Kunal,15,Male,Physics 20 | Soham,46,Male,Chemistry 21 | mayur,100,Male,Chemistry 22 | shaan,18,Male,Chemistry 23 | shivangi,70,Female,Physics 24 | Sam,82,Female,English 25 | Dhruv,88,Male,English 26 | tushar,88,Male,English 27 | Neelam,100,Female,Chemistry 28 | yash,86,Male,Physics 29 | Nitin,60,Male,English 30 | Arun,88,Male,Chemistry 31 | Sumit,19,Male,Physics 32 | manish,81,Male,English 33 | SHAIL,74,Male,Chemistry 34 | Parth,94,Male,Physics 35 | kumar,72,Male,Physics 36 | shyam,77,Male,Chemistry 37 | vikas,75,Male,Chemistry 38 | Manoj,70,Male,English 39 | Siddharth,18,Male,Chemistry 40 | Mayank,51,Male,Physics 41 | swati,81,Female,Physics 42 | anamika,23,Female,English 43 | pawan,77,Male,Physics 44 | krithika,5,Female,Physics 45 | Deepak,100,Male,Chemistry 46 | Jatin,85,Male,Chemistry 47 | PRATEEK,80,Male,Chemistry 48 | Deepro,84,Male,Physics 49 | atul,63,Male,English 50 | Neeraj,67,Male,English 51 | SUNNY,81,Male,English 52 | Shashank,83,Male,Chemistry 53 | Nishant,89,Male,Physics 54 | Aryan,60,Male,English 55 | riya,47,Female,Chemistry 56 | Ashish,82,Male,Physics 57 | shivam,79,Male,English 58 | GIRISH,88,Male,Chemistry 59 | Sunil,50,Male,Physics 60 | Raju,88,Male,Physics 61 | gokul,40,Male,Chemistry 62 | rakesh,84,Male,Chemistry 63 | krish,83,Male,English 64 | Aditya,87,Male,Chemistry 65 | Ira,48,Female,Physics 66 | Arjun,47,Male,Physics 67 | ROHIT,88,Male,English 68 | Harish,51,Male,Physics 69 | Ramanan,45,Male,Physics 70 | MOHIT,94,Male,Chemistry 71 | Raj,88,Male,Chemistry 72 | Abhishek,91,Male,Chemistry 73 | simran,0,Female,Physics 74 | Angel,99,Female,English 75 | juvina,78,Female,English 76 | ajith,63,Male,English 77 | Tanya,98,Female,Chemistry 78 | Vaibhav,98,Male,Physics 79 | Sneha,98,Female,English 80 | prince,64,Male,Chemistry 81 | jay,81,Male,Physics 82 | Avinash,76,Male,English 83 | sanchit,80,Male,Chemistry 84 | Rohan,97,Male,Physics 85 | dinesh,18,Male,Physics 86 | Lily,90,Female,Chemistry 87 | diksha,71,Female,Chemistry 88 | vishal,67,Male,English 89 | Rutuja,73,Female,Chemistry 90 | akash,93,Male,Physics 91 | mahesh,78,Male,Physics 92 | vedant,74,Male,English 93 | Ram,99,Male,Physics 94 | Dawn,73,Female,Physics 95 | ankur,17,Male,Chemistry 96 | Smriti,0,Female,Chemistry 97 | Aaditya,77,Male,Chemistry 98 | vivek,55,Male,Physics 99 | Tisha,7,Female,English 100 | Raghav,73,Male,English 101 | Ishita,82,Female,Chemistry 102 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/Median.csv: -------------------------------------------------------------------------------- 1 | Student,Overall Marks,Gender,Subject 2 | Anish,9700,Male,English 3 | Rishabh,83,Male,English 4 | Ajay,78,Male,Physics 5 | Abhinav,86,Male,Chemistry 6 | Anurag,91,Male,English 7 | Ajeet,15,Male,Physics 8 | Varun,83,Male,Chemistry 9 | Rajeev,64,Male,Chemistry 10 | sanjay,66,Male,Chemistry 11 | NISHA,58,Female,Physics 12 | SURESH,62,Male,Physics 13 | Aniket,62,Male,English 14 | manu,20,Male,Physics 15 | ALOK,85,Male,English 16 | Pranav,49,Male,Chemistry 17 | Deep,92,Male,English 18 | Prashant,21,Male,Physics 19 | Kunal,15,Male,Physics 20 | Soham,46,Male,Chemistry 21 | mayur,100,Male,Chemistry 22 | shaan,18,Male,Chemistry 23 | shivangi,70,Female,Physics 24 | Sam,82,Female,English 25 | Dhruv,88,Male,English 26 | tushar,88,Male,English 27 | Neelam,100,Female,Chemistry 28 | yash,86,Male,Physics 29 | Nitin,60,Male,English 30 | Arun,88,Male,Chemistry 31 | Sumit,19,Male,Physics 32 | manish,81,Male,English 33 | SHAIL,74,Male,Chemistry 34 | Parth,94,Male,Physics 35 | kumar,72,Male,Physics 36 | shyam,77,Male,Chemistry 37 | vikas,75,Male,Chemistry 38 | Manoj,70,Male,English 39 | Siddharth,18,Male,Chemistry 40 | Mayank,51,Male,Physics 41 | swati,81,Female,Physics 42 | anamika,23,Female,English 43 | pawan,77,Male,Physics 44 | krithika,5,Female,Physics 45 | Deepak,100,Male,Chemistry 46 | Jatin,85,Male,Chemistry 47 | PRATEEK,80,Male,Chemistry 48 | Deepro,84,Male,Physics 49 | atul,63,Male,English 50 | Neeraj,67,Male,English 51 | SUNNY,81,Male,English 52 | Shashank,83,Male,Chemistry 53 | Nishant,89,Male,Physics 54 | Aryan,60,Male,English 55 | riya,47,Female,Chemistry 56 | Ashish,82,Male,Physics 57 | shivam,79,Male,English 58 | GIRISH,88,Male,Chemistry 59 | Sunil,50,Male,Physics 60 | Raju,88,Male,Physics 61 | gokul,40,Male,Chemistry 62 | rakesh,84,Male,Chemistry 63 | krish,83,Male,English 64 | Aditya,87,Male,Chemistry 65 | Ira,48,Female,Physics 66 | Arjun,47,Male,Physics 67 | ROHIT,88,Male,English 68 | Harish,51,Male,Physics 69 | Ramanan,45,Male,Physics 70 | MOHIT,94,Male,Chemistry 71 | Raj,88,Male,Chemistry 72 | Abhishek,91,Male,Chemistry 73 | simran,0,Female,Physics 74 | Angel,99,Female,English 75 | juvina,78,Female,English 76 | ajith,63,Male,English 77 | Tanya,98,Female,Chemistry 78 | Vaibhav,98,Male,Physics 79 | Sneha,98,Female,English 80 | prince,64,Male,Chemistry 81 | jay,81,Male,Physics 82 | Avinash,76,Male,English 83 | sanchit,80,Male,Chemistry 84 | Rohan,97,Male,Physics 85 | dinesh,18,Male,Physics 86 | Lily,90,Female,Chemistry 87 | diksha,71,Female,Chemistry 88 | vishal,67,Male,English 89 | Rutuja,73,Female,Chemistry 90 | akash,93,Male,Physics 91 | mahesh,78,Male,Physics 92 | vedant,74,Male,English 93 | Ram,99,Male,Physics 94 | Dawn,73,Female,Physics 95 | ankur,17,Male,Chemistry 96 | Smriti,0,Female,Chemistry 97 | Aaditya,77,Male,Chemistry 98 | vivek,55,Male,Physics 99 | Tisha,7,Female,English 100 | Raghav,73,Male,English 101 | Ishita,82,Female,Chemistry 102 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/variance.csv: -------------------------------------------------------------------------------- 1 | Student,Overall Marks,Gender,Subject 2 | Anish,9700,Male,English 3 | Rishabh,83,Male,English 4 | Ajay,78,Male,Physics 5 | Abhinav,86,Male,Chemistry 6 | Anurag,91,Male,English 7 | Ajeet,15,Male,Physics 8 | Varun,83,Male,Chemistry 9 | Rajeev,64,Male,Chemistry 10 | sanjay,66,Male,Chemistry 11 | NISHA,58,Female,Physics 12 | SURESH,62,Male,Physics 13 | Aniket,62,Male,English 14 | manu,20,Male,Physics 15 | ALOK,85,Male,English 16 | Pranav,49,Male,Chemistry 17 | Deep,92,Male,English 18 | Prashant,21,Male,Physics 19 | Kunal,15,Male,Physics 20 | Soham,46,Male,Chemistry 21 | mayur,100,Male,Chemistry 22 | shaan,18,Male,Chemistry 23 | shivangi,70,Female,Physics 24 | Sam,82,Female,English 25 | Dhruv,88,Male,English 26 | tushar,88,Male,English 27 | Neelam,100,Female,Chemistry 28 | yash,86,Male,Physics 29 | Nitin,60,Male,English 30 | Arun,88,Male,Chemistry 31 | Sumit,19,Male,Physics 32 | manish,81,Male,English 33 | SHAIL,74,Male,Chemistry 34 | Parth,94,Male,Physics 35 | kumar,72,Male,Physics 36 | shyam,77,Male,Chemistry 37 | vikas,75,Male,Chemistry 38 | Manoj,70,Male,English 39 | Siddharth,18,Male,Chemistry 40 | Mayank,51,Male,Physics 41 | swati,81,Female,Physics 42 | anamika,23,Female,English 43 | pawan,77,Male,Physics 44 | krithika,5,Female,Physics 45 | Deepak,100,Male,Chemistry 46 | Jatin,85,Male,Chemistry 47 | PRATEEK,80,Male,Chemistry 48 | Deepro,84,Male,Physics 49 | atul,63,Male,English 50 | Neeraj,67,Male,English 51 | SUNNY,81,Male,English 52 | Shashank,83,Male,Chemistry 53 | Nishant,89,Male,Physics 54 | Aryan,60,Male,English 55 | riya,47,Female,Chemistry 56 | Ashish,82,Male,Physics 57 | shivam,79,Male,English 58 | GIRISH,88,Male,Chemistry 59 | Sunil,50,Male,Physics 60 | Raju,88,Male,Physics 61 | gokul,40,Male,Chemistry 62 | rakesh,84,Male,Chemistry 63 | krish,83,Male,English 64 | Aditya,87,Male,Chemistry 65 | Ira,48,Female,Physics 66 | Arjun,47,Male,Physics 67 | ROHIT,88,Male,English 68 | Harish,51,Male,Physics 69 | Ramanan,45,Male,Physics 70 | MOHIT,94,Male,Chemistry 71 | Raj,88,Male,Chemistry 72 | Abhishek,91,Male,Chemistry 73 | simran,0,Female,Physics 74 | Angel,99,Female,English 75 | juvina,78,Female,English 76 | ajith,63,Male,English 77 | Tanya,98,Female,Chemistry 78 | Vaibhav,98,Male,Physics 79 | Sneha,98,Female,English 80 | prince,64,Male,Chemistry 81 | jay,81,Male,Physics 82 | Avinash,76,Male,English 83 | sanchit,80,Male,Chemistry 84 | Rohan,97,Male,Physics 85 | dinesh,18,Male,Physics 86 | Lily,90,Female,Chemistry 87 | diksha,71,Female,Chemistry 88 | vishal,67,Male,English 89 | Rutuja,73,Female,Chemistry 90 | akash,93,Male,Physics 91 | mahesh,78,Male,Physics 92 | vedant,74,Male,English 93 | Ram,99,Male,Physics 94 | Dawn,73,Female,Physics 95 | ankur,17,Male,Chemistry 96 | Smriti,0,Female,Chemistry 97 | Aaditya,77,Male,Chemistry 98 | vivek,55,Male,Physics 99 | Tisha,7,Female,English 100 | Raghav,73,Male,English 101 | Ishita,82,Female,Chemistry 102 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/Frequency Table.csv: -------------------------------------------------------------------------------- 1 | Student,Overall Marks,Gender,Subject 2 | Anish,97,Male,English 3 | Rishabh,83,Male,English 4 | Ajay,78,Male,Physics 5 | Abhinav,86,Male,Chemistry 6 | Anurag,91,Male,English 7 | Ajeet,15,Male,Physics 8 | Varun,83,Male,Chemistry 9 | Rajeev,64,Male,Chemistry 10 | sanjay,66,Male,Chemistry 11 | NISHA,58,Female,Physics 12 | SURESH,62,Male,Physics 13 | Aniket,62,Male,English 14 | manu,20,Male,Physics 15 | ALOK,85,Male,English 16 | Pranav,49,Male,Chemistry 17 | Deep,92,Male,English 18 | Prashant,21,Male,Physics 19 | Kunal,15,Male,Physics 20 | Soham,46,Male,Chemistry 21 | mayur,100,Male,Chemistry 22 | shaan,18,Male,Chemistry 23 | shivangi,70,Female,Physics 24 | Sam,82,Female,English 25 | Dhruv,88,Male,English 26 | tushar,88,Male,English 27 | Neelam,100,Female,Chemistry 28 | yash,86,Male,Physics 29 | Nitin,60,Male,English 30 | Arun,88,Male,Chemistry 31 | Sumit,19,Male,Physics 32 | manish,81,Male,English 33 | SHAIL,74,Male,Chemistry 34 | Parth,94,Male,Physics 35 | kumar,72,Male,Physics 36 | shyam,77,Male,Chemistry 37 | vikas,75,Male,Chemistry 38 | Manoj,70,Male,English 39 | Siddharth,18,Male,Chemistry 40 | Mayank,51,Male,Physics 41 | swati,81,Female,Physics 42 | anamika,23,Female,English 43 | pawan,77,Male,Physics 44 | krithika,5,Female,Physics 45 | Deepak,100,Male,Chemistry 46 | Jatin,85,Male,Chemistry 47 | PRATEEK,80,Male,Chemistry 48 | Deepro,84,Male,Physics 49 | atul,63,Male,English 50 | Neeraj,67,Male,English 51 | SUNNY,81,Male,English 52 | Shashank,83,Male,Chemistry 53 | Nishant,89,Male,Physics 54 | Aryan,60,Male,English 55 | riya,47,Female,Chemistry 56 | Ashish,82,Male,Physics 57 | shivam,79,Male,English 58 | GIRISH,88,Male,Chemistry 59 | Sunil,50,Male,Physics 60 | Raju,88,Male,Physics 61 | gokul,40,Male,Chemistry 62 | rakesh,84,Male,Chemistry 63 | krish,83,Male,English 64 | Aditya,87,Male,Chemistry 65 | Ira,48,Female,Physics 66 | Arjun,47,Male,Physics 67 | ROHIT,88,Male,English 68 | Harish,51,Male,Physics 69 | Ramanan,45,Male,Physics 70 | MOHIT,94,Male,Chemistry 71 | Raj,88,Male,Chemistry 72 | Abhishek,91,Male,Chemistry 73 | simran,0,Female,Physics 74 | Angel,99,Female,English 75 | juvina,78,Female,English 76 | ajith,63,Male,English 77 | Tanya,98,Female,Chemistry 78 | Vaibhav,98,Male,Physics 79 | Sneha,98,Female,English 80 | prince,64,Male,Chemistry 81 | jay,81,Male,Physics 82 | Avinash,76,Male,English 83 | sanchit,80,Male,Chemistry 84 | Rohan,97,Male,Physics 85 | dinesh,18,Male,Physics 86 | Lily,90,Female,Chemistry 87 | diksha,71,Female,Chemistry 88 | vishal,67,Male,English 89 | Rutuja,73,Female,Chemistry 90 | akash,93,Male,Physics 91 | mahesh,78,Male,Physics 92 | vedant,74,Male,English 93 | Ram,99,Male,Physics 94 | Dawn,73,Female,Physics 95 | ankur,17,Male,Chemistry 96 | Smriti,0,Female,Chemistry 97 | Aaditya,77,Male,Chemistry 98 | vivek,55,Male,Physics 99 | Tisha,7,Female,English 100 | Raghav,73,Male,English 101 | Ishita,82,Female,Chemistry 102 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/Spread of Data.csv: -------------------------------------------------------------------------------- 1 | Student,Overall Marks,Gender,Subject 2 | Anish,9700,Male,English 3 | Rishabh,83,Male,English 4 | Ajay,78,Male,Physics 5 | Abhinav,86,Male,Chemistry 6 | Anurag,91,Male,English 7 | Ajeet,15,Male,Physics 8 | Varun,83,Male,Chemistry 9 | Rajeev,64,Male,Chemistry 10 | sanjay,66,Male,Chemistry 11 | NISHA,58,Female,Physics 12 | SURESH,62,Male,Physics 13 | Aniket,62,Male,English 14 | manu,20,Male,Physics 15 | ALOK,85,Male,English 16 | Pranav,49,Male,Chemistry 17 | Deep,92,Male,English 18 | Prashant,21,Male,Physics 19 | Kunal,15,Male,Physics 20 | Soham,46,Male,Chemistry 21 | mayur,100,Male,Chemistry 22 | shaan,18,Male,Chemistry 23 | shivangi,70,Female,Physics 24 | Sam,82,Female,English 25 | Dhruv,88,Male,English 26 | tushar,88,Male,English 27 | Neelam,100,Female,Chemistry 28 | yash,86,Male,Physics 29 | Nitin,60,Male,English 30 | Arun,88,Male,Chemistry 31 | Sumit,19,Male,Physics 32 | manish,81,Male,English 33 | SHAIL,74,Male,Chemistry 34 | Parth,94,Male,Physics 35 | kumar,72,Male,Physics 36 | shyam,77,Male,Chemistry 37 | vikas,75,Male,Chemistry 38 | Manoj,70,Male,English 39 | Siddharth,18,Male,Chemistry 40 | Mayank,51,Male,Physics 41 | swati,81,Female,Physics 42 | anamika,23,Female,English 43 | pawan,77,Male,Physics 44 | krithika,5,Female,Physics 45 | Deepak,100,Male,Chemistry 46 | Jatin,85,Male,Chemistry 47 | PRATEEK,80,Male,Chemistry 48 | Deepro,84,Male,Physics 49 | atul,63,Male,English 50 | Neeraj,67,Male,English 51 | SUNNY,81,Male,English 52 | Shashank,83,Male,Chemistry 53 | Nishant,89,Male,Physics 54 | Aryan,60,Male,English 55 | riya,47,Female,Chemistry 56 | Ashish,82,Male,Physics 57 | shivam,79,Male,English 58 | GIRISH,88,Male,Chemistry 59 | Sunil,50,Male,Physics 60 | Raju,88,Male,Physics 61 | gokul,40,Male,Chemistry 62 | rakesh,84,Male,Chemistry 63 | krish,83,Male,English 64 | Aditya,87,Male,Chemistry 65 | Ira,48,Female,Physics 66 | Arjun,47,Male,Physics 67 | ROHIT,88,Male,English 68 | Harish,51,Male,Physics 69 | Ramanan,45,Male,Physics 70 | MOHIT,94,Male,Chemistry 71 | Raj,88,Male,Chemistry 72 | Abhishek,91,Male,Chemistry 73 | simran,0,Female,Physics 74 | Angel,99,Female,English 75 | juvina,78,Female,English 76 | ajith,63,Male,English 77 | Tanya,98,Female,Chemistry 78 | Vaibhav,98,Male,Physics 79 | Sneha,98,Female,English 80 | prince,64,Male,Chemistry 81 | jay,81,Male,Physics 82 | Avinash,76,Male,English 83 | sanchit,80,Male,Chemistry 84 | Rohan,97,Male,Physics 85 | dinesh,18,Male,Physics 86 | Lily,90,Female,Chemistry 87 | diksha,71,Female,Chemistry 88 | vishal,67,Male,English 89 | Rutuja,73,Female,Chemistry 90 | akash,93,Male,Physics 91 | mahesh,78,Male,Physics 92 | vedant,74,Male,English 93 | Ram,99,Male,Physics 94 | Dawn,73,Female,Physics 95 | ankur,17,Male,Chemistry 96 | Smriti,0,Female,Chemistry 97 | Aaditya,77,Male,Chemistry 98 | vivek,55,Male,Physics 99 | Tisha,7,Female,English 100 | Raghav,73,Male,English 101 | Ishita,82,Female,Chemistry 102 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/Standard Deviation.csv: -------------------------------------------------------------------------------- 1 | Student,Overall Marks,Gender,Subject 2 | Anish,9700,Male,English 3 | Rishabh,83,Male,English 4 | Ajay,78,Male,Physics 5 | Abhinav,86,Male,Chemistry 6 | Anurag,91,Male,English 7 | Ajeet,15,Male,Physics 8 | Varun,83,Male,Chemistry 9 | Rajeev,64,Male,Chemistry 10 | sanjay,66,Male,Chemistry 11 | NISHA,58,Female,Physics 12 | SURESH,62,Male,Physics 13 | Aniket,62,Male,English 14 | manu,20,Male,Physics 15 | ALOK,85,Male,English 16 | Pranav,49,Male,Chemistry 17 | Deep,92,Male,English 18 | Prashant,21,Male,Physics 19 | Kunal,15,Male,Physics 20 | Soham,46,Male,Chemistry 21 | mayur,100,Male,Chemistry 22 | shaan,18,Male,Chemistry 23 | shivangi,70,Female,Physics 24 | Sam,82,Female,English 25 | Dhruv,88,Male,English 26 | tushar,88,Male,English 27 | Neelam,100,Female,Chemistry 28 | yash,86,Male,Physics 29 | Nitin,60,Male,English 30 | Arun,88,Male,Chemistry 31 | Sumit,19,Male,Physics 32 | manish,81,Male,English 33 | SHAIL,74,Male,Chemistry 34 | Parth,94,Male,Physics 35 | kumar,72,Male,Physics 36 | shyam,77,Male,Chemistry 37 | vikas,75,Male,Chemistry 38 | Manoj,70,Male,English 39 | Siddharth,18,Male,Chemistry 40 | Mayank,51,Male,Physics 41 | swati,81,Female,Physics 42 | anamika,23,Female,English 43 | pawan,77,Male,Physics 44 | krithika,5,Female,Physics 45 | Deepak,100,Male,Chemistry 46 | Jatin,85,Male,Chemistry 47 | PRATEEK,80,Male,Chemistry 48 | Deepro,84,Male,Physics 49 | atul,63,Male,English 50 | Neeraj,67,Male,English 51 | SUNNY,81,Male,English 52 | Shashank,83,Male,Chemistry 53 | Nishant,89,Male,Physics 54 | Aryan,60,Male,English 55 | riya,47,Female,Chemistry 56 | Ashish,82,Male,Physics 57 | shivam,79,Male,English 58 | GIRISH,88,Male,Chemistry 59 | Sunil,50,Male,Physics 60 | Raju,88,Male,Physics 61 | gokul,40,Male,Chemistry 62 | rakesh,84,Male,Chemistry 63 | krish,83,Male,English 64 | Aditya,87,Male,Chemistry 65 | Ira,48,Female,Physics 66 | Arjun,47,Male,Physics 67 | ROHIT,88,Male,English 68 | Harish,51,Male,Physics 69 | Ramanan,45,Male,Physics 70 | MOHIT,94,Male,Chemistry 71 | Raj,88,Male,Chemistry 72 | Abhishek,91,Male,Chemistry 73 | simran,0,Female,Physics 74 | Angel,99,Female,English 75 | juvina,78,Female,English 76 | ajith,63,Male,English 77 | Tanya,98,Female,Chemistry 78 | Vaibhav,98,Male,Physics 79 | Sneha,98,Female,English 80 | prince,64,Male,Chemistry 81 | jay,81,Male,Physics 82 | Avinash,76,Male,English 83 | sanchit,80,Male,Chemistry 84 | Rohan,97,Male,Physics 85 | dinesh,18,Male,Physics 86 | Lily,90,Female,Chemistry 87 | diksha,71,Female,Chemistry 88 | vishal,67,Male,English 89 | Rutuja,73,Female,Chemistry 90 | akash,93,Male,Physics 91 | mahesh,78,Male,Physics 92 | vedant,74,Male,English 93 | Ram,99,Male,Physics 94 | Dawn,73,Female,Physics 95 | ankur,17,Male,Chemistry 96 | Smriti,0,Female,Chemistry 97 | Aaditya,77,Male,Chemistry 98 | vivek,55,Male,Physics 99 | Tisha,7,Female,English 100 | Raghav,73,Male,English 101 | Ishita,82,Female,Chemistry 102 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/mean_robust.csv: -------------------------------------------------------------------------------- 1 | Student,Overall Marks,Gender,Subject 2 | Anish,97,Male,English 3 | Rishabh,83,Male,English 4 | Ajay,78,Male,Physics 5 | Abhinav,86,Male,Chemistry 6 | Anurag,91,Male,English 7 | Ajeet,15,Male,Physics 8 | Varun,83,Male,Chemistry 9 | Rajeev,64,Male,Chemistry 10 | sanjay,66,Male,Chemistry 11 | NISHA,58,Female,Physics 12 | SURESH,62,Male,Physics 13 | Aniket,62,Male,English 14 | manu,20,Male,Physics 15 | ALOK,85,Male,English 16 | Pranav,49,Male,Chemistry 17 | Deep,92,Male,English 18 | Prashant,21,Male,Physics 19 | Kunal,15,Male,Physics 20 | Soham,46,Male,Chemistry 21 | mayur,100,Male,Chemistry 22 | shaan,18,Male,Chemistry 23 | shivangi,70,Female,Physics 24 | Sam,82,Female,English 25 | Dhruv,88,Male,English 26 | tushar,88,Male,English 27 | Neelam,100,Female,Chemistry 28 | yash,86,Male,Physics 29 | Nitin,60,Male,English 30 | Arun,88,Male,Chemistry 31 | Sumit,19,Male,Physics 32 | manish,81,Male,English 33 | SHAIL,74,Male,Chemistry 34 | Parth,94,Male,Physics 35 | kumar,72,Male,Physics 36 | shyam,77,Male,Chemistry 37 | vikas,75,Male,Chemistry 38 | Manoj,70,Male,English 39 | Siddharth,18,Male,Chemistry 40 | Mayank,51,Male,Physics 41 | swati,81,Female,Physics 42 | anamika,23,Female,English 43 | pawan,77,Male,Physics 44 | krithika,5,Female,Physics 45 | Deepak,100,Male,Chemistry 46 | Jatin,85,Male,Chemistry 47 | PRATEEK,80,Male,Chemistry 48 | Deepro,84,Male,Physics 49 | atul,63,Male,English 50 | Neeraj,67,Male,English 51 | SUNNY,81,Male,English 52 | Shashank,83,Male,Chemistry 53 | Nishant,89,Male,Physics 54 | Aryan,60,Male,English 55 | riya,47,Female,Chemistry 56 | Ashish,82,Male,Physics 57 | shivam,79,Male,English 58 | GIRISH,88,Male,Chemistry 59 | Sunil,50,Male,Physics 60 | Raju,88,Male,Physics 61 | gokul,40,Male,Chemistry 62 | rakesh,84,Male,Chemistry 63 | krish,83,Male,English 64 | Aditya,87,Male,Chemistry 65 | Ira,48,Female,Physics 66 | Arjun,47,Male,Physics 67 | ROHIT,88,Male,English 68 | Harish,51,Male,Physics 69 | Ramanan,45,Male,Physics 70 | MOHIT,94,Male,Chemistry 71 | Raj,88,Male,Chemistry 72 | Abhishek,91,Male,Chemistry 73 | simran,0,Female,Physics 74 | Angel,99,Female,English 75 | juvina,78,Female,English 76 | ajith,63,Male,English 77 | Tanya,98,Female,Chemistry 78 | Vaibhav,98,Male,Physics 79 | Sneha,98,Female,English 80 | prince,64,Male,Chemistry 81 | jay,81,Male,Physics 82 | Avinash,76,Male,English 83 | sanchit,80,Male,Chemistry 84 | Rohan,97,Male,Physics 85 | dinesh,18,Male,Physics 86 | Lily,90,Female,Chemistry 87 | diksha,71,Female,Chemistry 88 | vishal,67,Male,English 89 | Rutuja,73,Female,Chemistry 90 | akash,93,Male,Physics 91 | mahesh,78,Male,Physics 92 | vedant,74,Male,English 93 | Ram,99,Male,Physics 94 | Dawn,73,Female,Physics 95 | ankur,17,Male,Chemistry 96 | Smriti,0,Female,Chemistry 97 | Aaditya,77,Male,Chemistry 98 | vivek,55,Male,Physics 99 | Tisha,7,Female,English 100 | Raghav,73,Male,English 101 | Ishita,82,Female,Chemistry 102 | ,10000,, 103 | -------------------------------------------------------------------------------- /INTERNSHALA/1 Python Basics/5 Dictionaries.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "a = {'sarim': 2, 'rihan':[30,40]}" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "{'sarim': 2, 'rihan': [30, 40]}" 21 | ] 22 | }, 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "a" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 4, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "2" 41 | ] 42 | }, 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "a['sarim']" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 8, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "a.update({'a':2,'c':3})" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 9, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | "{'sarim': 2, 'rihan': [30, 40], 'a': 2, 'c': 3}" 70 | ] 71 | }, 72 | "execution_count": 9, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": [ 78 | "a" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 10, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "a['Ankita']=[10,2,3,4,5,6]" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 11, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/plain": [ 98 | "{'sarim': 2, 'rihan': [30, 40], 'a': 2, 'c': 3, 'Ankita': [10, 2, 3, 4, 5, 6]}" 99 | ] 100 | }, 101 | "execution_count": 11, 102 | "metadata": {}, 103 | "output_type": "execute_result" 104 | } 105 | ], 106 | "source": [ 107 | "a" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "g=dict()\n", 117 | "[1]" 118 | ] 119 | } 120 | ], 121 | "metadata": { 122 | "kernelspec": { 123 | "display_name": "Python 3", 124 | "language": "python", 125 | "name": "python3" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 3 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython3", 137 | "version": "3.7.1" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 2 142 | } 143 | -------------------------------------------------------------------------------- /INTERNSHALA/3 Predictive modeling and ML/6 Transforming variables test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#importing libraries\n", 12 | "\n", 13 | "import pandas as pd\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "%matplotlib inline " 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "#reading the dataset into pandas\n", 27 | "\n", 28 | "df=pd.read_csv(\"data.csv\")" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": true 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "#first few rows of the dataset\n", 40 | "\n", 41 | "df.head()" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": true 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "df['Age'].plot.hist()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": true 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "np.power(df['Age'],1/3).plot.hist()" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "collapsed": true 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "bins=[0,15,80]\n", 75 | "\n", 76 | "group=['children', 'Adult']" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "collapsed": true 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "df['type']=pd.cut(df['Age'],bins,label=group)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "collapsed": true 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "df['type'].value_counts()" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": { 105 | "collapsed": true 106 | }, 107 | "outputs": [], 108 | "source": [] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": true 115 | }, 116 | "outputs": [], 117 | "source": [] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": { 123 | "collapsed": true 124 | }, 125 | "outputs": [], 126 | "source": [] 127 | } 128 | ], 129 | "metadata": { 130 | "kernelspec": { 131 | "display_name": "Python 3", 132 | "language": "python", 133 | "name": "python3" 134 | }, 135 | "language_info": { 136 | "codemirror_mode": { 137 | "name": "ipython", 138 | "version": 3 139 | }, 140 | "file_extension": ".py", 141 | "mimetype": "text/x-python", 142 | "name": "python", 143 | "nbconvert_exporter": "python", 144 | "pygments_lexer": "ipython3", 145 | "version": "3.7.1" 146 | } 147 | }, 148 | "nbformat": 4, 149 | "nbformat_minor": 2 150 | } 151 | -------------------------------------------------------------------------------- /INTERNSHALA/1 Python Basics/3 loop.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "ename": "TypeError", 10 | "evalue": "'float' object cannot be interpreted as an integer", 11 | "output_type": "error", 12 | "traceback": [ 13 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 14 | "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", 15 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m#only integer value\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m20.0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"sarim\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 16 | "\u001b[1;31mTypeError\u001b[0m: 'float' object cannot be interpreted as an integer" 17 | ] 18 | } 19 | ], 20 | "source": [ 21 | "#only integer value\n", 22 | "for i in range(20.0):\n", 23 | " print(\"sarim\")" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 4, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "for i in '':\n", 33 | " print(\"sarim\")" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "# ranging" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 6, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "10\n", 53 | "11\n", 54 | "12\n", 55 | "13\n", 56 | "14\n", 57 | "15\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "for i in range (10,16):\n", 63 | " print(i)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "# printing odd number using two space increament" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 9, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "11\n", 83 | "13\n", 84 | "15\n", 85 | "17\n", 86 | "19\n", 87 | "21\n", 88 | "23\n", 89 | "25\n", 90 | "27\n", 91 | "29\n" 92 | ] 93 | } 94 | ], 95 | "source": [ 96 | "for i in range(11,31,2):\n", 97 | " print(i)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [] 106 | } 107 | ], 108 | "metadata": { 109 | "kernelspec": { 110 | "display_name": "Python 3", 111 | "language": "python", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 3 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython3", 124 | "version": "3.7.1" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 2 129 | } 130 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/12 Chi square.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#Importing libraries\n", 10 | "import pandas as pd\n", 11 | "import numpy as np\n", 12 | "import scipy.stats as stats\n", 13 | "from scipy.stats import chisquare" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "#Importing data\n", 23 | "data = pd.read_csv(\"chi_square.csv\")" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "text/html": [ 34 | "
\n", 35 | "\n", 48 | "\n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | "
EventObservedExpected
0Successful4133
1Unsuccessful5967
\n", 72 | "
" 73 | ], 74 | "text/plain": [ 75 | " Event Observed Expected\n", 76 | "0 Successful 41 33\n", 77 | "1 Unsuccessful 59 67" 78 | ] 79 | }, 80 | "execution_count": 3, 81 | "metadata": {}, 82 | "output_type": "execute_result" 83 | } 84 | ], 85 | "source": [ 86 | "#Printing\n", 87 | "data" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 4, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "#Calculating t-statistic and p-value of the chi square test\n", 97 | "t_statistic, p_value = chisquare(f_obs= data['Observed'],f_exp=data['Expected'])" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 5, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "data": { 107 | "text/plain": [ 108 | "0.08887585044058065" 109 | ] 110 | }, 111 | "execution_count": 5, 112 | "metadata": {}, 113 | "output_type": "execute_result" 114 | } 115 | ], 116 | "source": [ 117 | "p_value" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "Hence as p > 0.05, we fail to reject the NULL hypothesis i.e. the observed and expected frequencies are similar." 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 6, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "2.8946178199909545" 136 | ] 137 | }, 138 | "execution_count": 6, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "t_statistic" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": true 152 | }, 153 | "outputs": [], 154 | "source": [] 155 | } 156 | ], 157 | "metadata": { 158 | "kernelspec": { 159 | "display_name": "Python 3", 160 | "language": "python", 161 | "name": "python3" 162 | }, 163 | "language_info": { 164 | "codemirror_mode": { 165 | "name": "ipython", 166 | "version": 3 167 | }, 168 | "file_extension": ".py", 169 | "mimetype": "text/x-python", 170 | "name": "python", 171 | "nbconvert_exporter": "python", 172 | "pygments_lexer": "ipython3", 173 | "version": "3.7.1" 174 | } 175 | }, 176 | "nbformat": 4, 177 | "nbformat_minor": 2 178 | } 179 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/11 Paired T-test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#Importing libraries\n", 10 | "import pandas as pd\n", 11 | "import scipy.stats as stats\n", 12 | "from scipy.stats import ttest_rel" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "#Loading data\n", 22 | "data = pd.read_csv(\"Data for paired t test.csv\")" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/html": [ 33 | "
\n", 34 | "\n", 47 | "\n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | "
Errors using typewriterErrors using a computer
088
1138
2104
379
41310
\n", 83 | "
" 84 | ], 85 | "text/plain": [ 86 | " Errors using typewriter Errors using a computer\n", 87 | "0 8 8\n", 88 | "1 13 8\n", 89 | "2 10 4\n", 90 | "3 7 9\n", 91 | "4 13 10" 92 | ] 93 | }, 94 | "execution_count": 3, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "#Printing first 5 rows\n", 101 | "data.head()" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 4, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "# Calculating t and p-value using scipy library\n", 111 | "t_statistic, _ = stats.ttest_rel(data['Errors using typewriter'],data['Errors using a computer'])" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 5, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "3.6842075835369266" 123 | ] 124 | }, 125 | "execution_count": 5, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "#Printing t-statistic\n", 132 | "t_statistic" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "T critical from the table at 0.05 significance level and degree of freedom 24 is 1.711" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [] 148 | } 149 | ], 150 | "metadata": { 151 | "kernelspec": { 152 | "display_name": "Python 3", 153 | "language": "python", 154 | "name": "python3" 155 | }, 156 | "language_info": { 157 | "codemirror_mode": { 158 | "name": "ipython", 159 | "version": 3 160 | }, 161 | "file_extension": ".py", 162 | "mimetype": "text/x-python", 163 | "name": "python", 164 | "nbconvert_exporter": "python", 165 | "pygments_lexer": "ipython3", 166 | "version": "3.6.5" 167 | } 168 | }, 169 | "nbformat": 4, 170 | "nbformat_minor": 2 171 | } 172 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/9 T-test( one sample ).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "colab": {}, 8 | "colab_type": "code", 9 | "id": "G4h-zs_qbPVh" 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "#Importing libraries\n", 14 | "import pandas as pd\n", 15 | "import scipy.stats as stats\n", 16 | "from scipy.stats import ttest_1samp" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": { 23 | "colab": {}, 24 | "colab_type": "code", 25 | "id": "VWvE52FEbPVl" 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "#Loading data\n", 30 | "data = pd.read_csv(\"onesample.csv\")\n", 31 | "data = data['Insect Length']" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "metadata": { 38 | "colab": { 39 | "base_uri": "https://localhost:8080/", 40 | "height": 119 41 | }, 42 | "colab_type": "code", 43 | "executionInfo": { 44 | "elapsed": 1052, 45 | "status": "ok", 46 | "timestamp": 1549886841242, 47 | "user": { 48 | "displayName": "Sharoon Saxena", 49 | "photoUrl": "", 50 | "userId": "14774175216384036942" 51 | }, 52 | "user_tz": -330 53 | }, 54 | "id": "WsU4i6DdbPVp", 55 | "outputId": "7f903045-2cca-4708-ee79-41bc7de9ca92" 56 | }, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/plain": [ 61 | "0 6.55\n", 62 | "1 6.67\n", 63 | "2 6.56\n", 64 | "3 6.63\n", 65 | "4 6.39\n", 66 | "Name: Insect Length, dtype: float64" 67 | ] 68 | }, 69 | "execution_count": 3, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "#Printing first 5 rows\n", 76 | "data.head()" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": { 82 | "colab_type": "text", 83 | "id": "U7yNu4avbPVr" 84 | }, 85 | "source": [ 86 | "Let's conduct a one sample t-test to check if the mean of the sample insects is similar to the mean of earlier insects.mean of earlier insects is. 6.09" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": { 92 | "colab_type": "text", 93 | "id": "iYrNZ3fPgb8c" 94 | }, 95 | "source": [ 96 | "We have the degree of freedom that is 28 and taking significance level to be 0.05, the t-critical value comes out to be 2.048\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 4, 102 | "metadata": { 103 | "colab": {}, 104 | "colab_type": "code", 105 | "id": "CaRyVz5kbPVs" 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "t_statistic, _ = ttest_1samp(data, 6.09)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 5, 115 | "metadata": { 116 | "colab": { 117 | "base_uri": "https://localhost:8080/", 118 | "height": 34 119 | }, 120 | "colab_type": "code", 121 | "executionInfo": { 122 | "elapsed": 941, 123 | "status": "ok", 124 | "timestamp": 1549886894690, 125 | "user": { 126 | "displayName": "Sharoon Saxena", 127 | "photoUrl": "", 128 | "userId": "14774175216384036942" 129 | }, 130 | "user_tz": -330 131 | }, 132 | "id": "y-tu703cdKHq", 133 | "outputId": "b794a1ae-2421-4c2c-abfb-31f0e68c59ae" 134 | }, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | "5.466112820736585\n" 141 | ] 142 | } 143 | ], 144 | "source": [ 145 | "print(t_statistic)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": { 151 | "colab_type": "text", 152 | "id": "5GZcqIY0do5u" 153 | }, 154 | "source": [ 155 | "t-statistic > t-critical\n", 156 | "We reject the null Hypothesis." 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [] 165 | } 166 | ], 167 | "metadata": { 168 | "colab": { 169 | "collapsed_sections": [], 170 | "name": "T-test( one sample ).ipynb", 171 | "provenance": [], 172 | "version": "0.3.2" 173 | }, 174 | "kernelspec": { 175 | "display_name": "Python 3", 176 | "language": "python", 177 | "name": "python3" 178 | }, 179 | "language_info": { 180 | "codemirror_mode": { 181 | "name": "ipython", 182 | "version": 3 183 | }, 184 | "file_extension": ".py", 185 | "mimetype": "text/x-python", 186 | "name": "python", 187 | "nbconvert_exporter": "python", 188 | "pygments_lexer": "ipython3", 189 | "version": "3.7.1" 190 | } 191 | }, 192 | "nbformat": 4, 193 | "nbformat_minor": 1 194 | } 195 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/7 Frequency table.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#Importing library\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "#Loading dataset\n", 20 | "data = pd.read_csv(\"Frequency Table.csv\")" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | "
StudentOverall MarksGenderSubject
0Anish97MaleEnglish
1Rishabh83MaleEnglish
2Ajay78MalePhysics
3Abhinav86MaleChemistry
4Anurag91MaleEnglish
\n", 93 | "
" 94 | ], 95 | "text/plain": [ 96 | " Student Overall Marks Gender Subject\n", 97 | "0 Anish 97 Male English\n", 98 | "1 Rishabh 83 Male English\n", 99 | "2 Ajay 78 Male Physics\n", 100 | "3 Abhinav 86 Male Chemistry\n", 101 | "4 Anurag 91 Male English" 102 | ] 103 | }, 104 | "execution_count": 3, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "#Printing first 5 rows\n", 111 | "data.head()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "## Frequency Table for Categorical Variables" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 4, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "#Generating frequency table\n", 128 | "#freq_data = data['Subject'].value_counts()\n", 129 | "freq_data = data['Subject'].value_counts()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 5, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "Chemistry 36\n", 142 | "Physics 35\n", 143 | "English 29\n", 144 | "Name: Subject, dtype: int64\n" 145 | ] 146 | } 147 | ], 148 | "source": [ 149 | "print(freq_data)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [] 165 | } 166 | ], 167 | "metadata": { 168 | "kernelspec": { 169 | "display_name": "Python 3", 170 | "language": "python", 171 | "name": "python3" 172 | }, 173 | "language_info": { 174 | "codemirror_mode": { 175 | "name": "ipython", 176 | "version": 3 177 | }, 178 | "file_extension": ".py", 179 | "mimetype": "text/x-python", 180 | "name": "python", 181 | "nbconvert_exporter": "python", 182 | "pygments_lexer": "ipython3", 183 | "version": "3.7.1" 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 2 188 | } 189 | -------------------------------------------------------------------------------- /INTERNSHALA/1 Python Basics/4 List.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 40, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "list = [1,'sarim', 5, 'chaudhary',7,8]" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 41, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "[1, 'sarim', 5, 'chaudhary', 7, 8]\n" 22 | ] 23 | } 24 | ], 25 | "source": [ 26 | "print(list)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 42, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "1\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "print(list[0])" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 43, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "[1, 'sarim', 5, 'chaudhary']\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "# use colon instead of comma\n", 61 | "x= list[0:4]\n", 62 | "print(x)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 44, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/plain": [ 73 | "7" 74 | ] 75 | }, 76 | "execution_count": 44, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "list[-2]" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 45, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "list.append([2,4,'mane'])" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 46, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "[1, 'sarim', 5, 'chaudhary', 7, 8, [2, 4, 'mane']]" 103 | ] 104 | }, 105 | "execution_count": 46, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "list" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 47, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "[2, 4, 'mane']" 123 | ] 124 | }, 125 | "execution_count": 47, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "list[-1]" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 48, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "list.extend([5,5,5,5])" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 49, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "data": { 150 | "text/plain": [ 151 | "[1, 'sarim', 5, 'chaudhary', 7, 8, [2, 4, 'mane'], 5, 5, 5, 5]" 152 | ] 153 | }, 154 | "execution_count": 49, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "list" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 50, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "name": "stdout", 170 | "output_type": "stream", 171 | "text": [ 172 | "1\n", 173 | "sarim\n", 174 | "5\n", 175 | "chaudhary\n", 176 | "7\n", 177 | "8\n", 178 | "[2, 4, 'mane']\n", 179 | "5\n", 180 | "5\n", 181 | "5\n", 182 | "5\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "for i in list:\n", 188 | " print(i)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 51, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "del list[-5:-1]" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 52, 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "data": { 207 | "text/plain": [ 208 | "[1, 'sarim', 5, 'chaudhary', 7, 8, 5]" 209 | ] 210 | }, 211 | "execution_count": 52, 212 | "metadata": {}, 213 | "output_type": "execute_result" 214 | } 215 | ], 216 | "source": [ 217 | "list" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 53, 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "name": "stdout", 227 | "output_type": "stream", 228 | "text": [ 229 | "1\n", 230 | "sarim\n", 231 | "5\n", 232 | "chaudhary\n", 233 | "7\n", 234 | "8\n", 235 | "5\n" 236 | ] 237 | } 238 | ], 239 | "source": [ 240 | "for i in list:\n", 241 | " print(i)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [] 250 | } 251 | ], 252 | "metadata": { 253 | "kernelspec": { 254 | "display_name": "Python 3", 255 | "language": "python", 256 | "name": "python3" 257 | }, 258 | "language_info": { 259 | "codemirror_mode": { 260 | "name": "ipython", 261 | "version": 3 262 | }, 263 | "file_extension": ".py", 264 | "mimetype": "text/x-python", 265 | "name": "python", 266 | "nbconvert_exporter": "python", 267 | "pygments_lexer": "ipython3", 268 | "version": "3.7.1" 269 | } 270 | }, 271 | "nbformat": 4, 272 | "nbformat_minor": 2 273 | } 274 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/4 Spread of Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#Importing library\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "#Loading dataset\n", 20 | "data = pd.read_csv('Spread of Data.csv')" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | "
StudentOverall MarksGenderSubject
0Anish9700MaleEnglish
1Rishabh83MaleEnglish
2Ajay78MalePhysics
3Abhinav86MaleChemistry
4Anurag91MaleEnglish
\n", 93 | "
" 94 | ], 95 | "text/plain": [ 96 | " Student Overall Marks Gender Subject\n", 97 | "0 Anish 9700 Male English\n", 98 | "1 Rishabh 83 Male English\n", 99 | "2 Ajay 78 Male Physics\n", 100 | "3 Abhinav 86 Male Chemistry\n", 101 | "4 Anurag 91 Male English" 102 | ] 103 | }, 104 | "execution_count": 3, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "#Print first 5 rows\n", 111 | "data.head()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 4, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "max_data = data['Overall Marks'].max()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 5, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "min_data = data['Overall Marks'].min()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 6, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "range_data = max_data - min_data" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 7, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "name": "stdout", 148 | "output_type": "stream", 149 | "text": [ 150 | "9700\n" 151 | ] 152 | } 153 | ], 154 | "source": [ 155 | "print(range_data)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 8, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "# calculating IQR requires calculating 1st and 3rd quartiles.\n", 165 | "\n", 166 | "Q1 = data['Overall Marks'].quantile(0.25)\n", 167 | "Q3 = data['Overall Marks'].quantile(0.75)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 9, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "30.0\n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "IQR = Q3 - Q1\n", 185 | "print(IQR)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [] 201 | } 202 | ], 203 | "metadata": { 204 | "kernelspec": { 205 | "display_name": "Python 3", 206 | "language": "python", 207 | "name": "python3" 208 | }, 209 | "language_info": { 210 | "codemirror_mode": { 211 | "name": "ipython", 212 | "version": 3 213 | }, 214 | "file_extension": ".py", 215 | "mimetype": "text/x-python", 216 | "name": "python", 217 | "nbconvert_exporter": "python", 218 | "pygments_lexer": "ipython3", 219 | "version": "3.7.1" 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 2 224 | } 225 | -------------------------------------------------------------------------------- /INTERNSHALA/1 Python Basics/1 Python Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "and" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 12, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "text/plain": [ 18 | "5" 19 | ] 20 | }, 21 | "execution_count": 12, 22 | "metadata": {}, 23 | "output_type": "execute_result" 24 | } 25 | ], 26 | "source": [ 27 | "1 and 5" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 13, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "5\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "print(5 and 5)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "or" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 14, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": [ 62 | "3" 63 | ] 64 | }, 65 | "execution_count": 14, 66 | "metadata": {}, 67 | "output_type": "execute_result" 68 | } 69 | ], 70 | "source": [ 71 | "3 or 1" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 15, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "1" 83 | ] 84 | }, 85 | "execution_count": 15, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "1 or 3" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 16, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "4" 103 | ] 104 | }, 105 | "execution_count": 16, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "0 or 4" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 17, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "0" 123 | ] 124 | }, 125 | "execution_count": 17, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "0 or 0" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 19, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "data": { 141 | "text/plain": [ 142 | "11" 143 | ] 144 | }, 145 | "execution_count": 19, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "11 or 0" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "not" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 22, 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "name": "stdout", 168 | "output_type": "stream", 169 | "text": [ 170 | "False\n" 171 | ] 172 | } 173 | ], 174 | "source": [ 175 | "print(not 3)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 23, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "data": { 185 | "text/plain": [ 186 | "True" 187 | ] 188 | }, 189 | "execution_count": 23, 190 | "metadata": {}, 191 | "output_type": "execute_result" 192 | } 193 | ], 194 | "source": [ 195 | "not 0" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 24, 201 | "metadata": {}, 202 | "outputs": [ 203 | { 204 | "data": { 205 | "text/plain": [ 206 | "False" 207 | ] 208 | }, 209 | "execution_count": 24, 210 | "metadata": {}, 211 | "output_type": "execute_result" 212 | } 213 | ], 214 | "source": [ 215 | " not \"sarim\"" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 25, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "data": { 225 | "text/plain": [ 226 | "2.0" 227 | ] 228 | }, 229 | "execution_count": 25, 230 | "metadata": {}, 231 | "output_type": "execute_result" 232 | } 233 | ], 234 | "source": [ 235 | "4.5 // 2" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 26, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "text/plain": [ 246 | "2" 247 | ] 248 | }, 249 | "execution_count": 26, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "4 // 2" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 27, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "data": { 265 | "text/plain": [ 266 | "2" 267 | ] 268 | }, 269 | "execution_count": 27, 270 | "metadata": {}, 271 | "output_type": "execute_result" 272 | } 273 | ], 274 | "source": [ 275 | "5 // 2" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [] 284 | } 285 | ], 286 | "metadata": { 287 | "kernelspec": { 288 | "display_name": "Python 3", 289 | "language": "python", 290 | "name": "python3" 291 | }, 292 | "language_info": { 293 | "codemirror_mode": { 294 | "name": "ipython", 295 | "version": 3 296 | }, 297 | "file_extension": ".py", 298 | "mimetype": "text/x-python", 299 | "name": "python", 300 | "nbconvert_exporter": "python", 301 | "pygments_lexer": "ipython3", 302 | "version": "3.7.1" 303 | } 304 | }, 305 | "nbformat": 4, 306 | "nbformat_minor": 2 307 | } 308 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/5 Variance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Importing library\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 5, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "#Loading data\n", 20 | "data = pd.read_csv(\"variance.csv\")" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 6, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | "
StudentOverall MarksGenderSubject
0Anish9700MaleEnglish
1Rishabh83MaleEnglish
2Ajay78MalePhysics
3Abhinav86MaleChemistry
4Anurag91MaleEnglish
\n", 93 | "
" 94 | ], 95 | "text/plain": [ 96 | " Student Overall Marks Gender Subject\n", 97 | "0 Anish 9700 Male English\n", 98 | "1 Rishabh 83 Male English\n", 99 | "2 Ajay 78 Male Physics\n", 100 | "3 Abhinav 86 Male Chemistry\n", 101 | "4 Anurag 91 Male English" 102 | ] 103 | }, 104 | "execution_count": 6, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "#Printing top 5 rows\n", 111 | "data.head()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 7, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "#Steps to calculate variance\n", 121 | "\n", 122 | "# 1. Calculate Mean\n", 123 | "# 2. Calculate the distance from mean for each element\n", 124 | "# 3. Calculate Squared distance\n", 125 | "# 4. Take average of Squared distance" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 8, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "mean = data['Overall Marks'].mean()" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 9, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "difference = data['Overall Marks'] - mean" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 10, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "squared_difference = difference ** 2" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 11, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "variance = squared_difference.mean()" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 12, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "919170.8423999996\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "print(variance)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 13, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "#Calculating variance\n", 195 | "var_data = data['Overall Marks'].var(ddof = 0)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 14, 201 | "metadata": {}, 202 | "outputs": [ 203 | { 204 | "name": "stdout", 205 | "output_type": "stream", 206 | "text": [ 207 | "919170.8423999996\n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "print(var_data)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [] 221 | } 222 | ], 223 | "metadata": { 224 | "kernelspec": { 225 | "display_name": "Python 3", 226 | "language": "python", 227 | "name": "python3" 228 | }, 229 | "language_info": { 230 | "codemirror_mode": { 231 | "name": "ipython", 232 | "version": 3 233 | }, 234 | "file_extension": ".py", 235 | "mimetype": "text/x-python", 236 | "name": "python", 237 | "nbconvert_exporter": "python", 238 | "pygments_lexer": "ipython3", 239 | "version": "3.7.1" 240 | } 241 | }, 242 | "nbformat": 4, 243 | "nbformat_minor": 2 244 | } 245 | -------------------------------------------------------------------------------- /INTERNSHALA/1 Python Basics/Python_Coding_Challenge_Solution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Basics of Python" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "# initialize variables 'a' and 'b' with 5 and 6 respectively\n", 19 | "a = 5\n", 20 | "b = 6\n", 21 | "\n", 22 | "# add 'a' and 'b' and assign the result into a new variable 'c'\n", 23 | "c = a+b\n", 24 | "print(c)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "# build a function to add 2 numbers\n", 36 | "def addition(x,y):\n", 37 | " return(x+y)\n", 38 | "\n", 39 | "# use the function 'addition' to add 'a' and 'b'\n", 40 | "addition(a,b)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "# create a list consisting of first 5 even numbers and print it\n", 52 | "my_list = [2,4,6,8,10]\n", 53 | "print(my_list)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "collapsed": true 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "# access the 3rd element of the list 'my_list'\n", 65 | "my_list[2]" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": true 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "# given below is a dictionary having 4 unique keys, i.e., 'name', 'age', 'gender', 'is_employed'\n", 77 | "my_dict = {'name':'Smith',\n", 78 | " 'age':34,\n", 79 | " 'gender': 'Male',\n", 80 | " 'is_employed': False}\n", 81 | "\n", 82 | "# print 'my_dict'\n", 83 | "print(my_dict)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "collapsed": true 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "# access name 'my_dict'\n", 95 | "my_dict['name']" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": true 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "# update 'is_employed' key to True\n", 107 | "my_dict.update({'is_employed':True})\n", 108 | "\n", 109 | "# print the updated dictionary\n", 110 | "print(my_dict)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "collapsed": true 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "# use a for loop to print only even numbers from the first 20 numbers, i.e. 1-20\n", 122 | "for i in range(1,21):\n", 123 | " if i % 2 == 0:\n", 124 | " print(i)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "### Please download the file \"data_python.csv\"." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "collapsed": true 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "# load required libraries\n", 143 | "import pandas as pd\n", 144 | "import numpy as np" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": true 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "## read data_python.csv using pandas\n", 156 | "## start code\n", 157 | "mydata = pd.read_csv(\"data_python.csv\")\n", 158 | "## end code" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": { 165 | "collapsed": true 166 | }, 167 | "outputs": [], 168 | "source": [ 169 | "## print the number of rows and number of columns of mydata\n", 170 | "## start code\n", 171 | "mydata.shape\n", 172 | "## end code" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": { 179 | "collapsed": true 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "## assign a variable 'target' with the 'Loan_Status' feature from mydata dataframe\n", 184 | "## start code\n", 185 | "target = mydata['Loan_Status']\n", 186 | "## end code" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": { 193 | "collapsed": true 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "## print the datatype of ApplicantIncome feature\n", 198 | "## start code\n", 199 | "print(mydata['ApplicantIncome'].dtype)\n", 200 | "## end code" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "collapsed": true 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "## conditional statement - print 'Yes' if the 21st element of 'Education' feature is 'Graduate' else print 'No'\n", 212 | "if(mydata['Education'][20] == 'Graduate'):\n", 213 | " ## start code\n", 214 | " print('Yes')\n", 215 | " ## end code\n", 216 | "else:\n", 217 | " ## start code\n", 218 | " print('No')\n", 219 | " ## end code" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "collapsed": true 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "## print 31st to 35th rows of mydata\n", 231 | "## start code\n", 232 | "mydata.iloc[30:35]\n", 233 | "## end code" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": true 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "## print first 5 rows of 2nd and 3rd column only\n", 245 | "## start code\n", 246 | "mydata.iloc[:5,1:3]\n", 247 | "## end code" 248 | ] 249 | } 250 | ], 251 | "metadata": { 252 | "kernelspec": { 253 | "display_name": "Python 3", 254 | "language": "python", 255 | "name": "python3" 256 | }, 257 | "language_info": { 258 | "codemirror_mode": { 259 | "name": "ipython", 260 | "version": 3 261 | }, 262 | "file_extension": ".py", 263 | "mimetype": "text/x-python", 264 | "name": "python", 265 | "nbconvert_exporter": "python", 266 | "pygments_lexer": "ipython3", 267 | "version": "3.6.0" 268 | } 269 | }, 270 | "nbformat": 4, 271 | "nbformat_minor": 2 272 | } 273 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/3 Median.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# median is best method to find the central tendency" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as p" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 5, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/html": [ 27 | "
\n", 28 | "\n", 41 | "\n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | "
StudentOverall MarksGenderSubject
0Anish9700MaleEnglish
1Rishabh83MaleEnglish
2Ajay78MalePhysics
3Abhinav86MaleChemistry
4Anurag91MaleEnglish
5Ajeet15MalePhysics
\n", 96 | "
" 97 | ], 98 | "text/plain": [ 99 | " Student Overall Marks Gender Subject\n", 100 | "0 Anish 9700 Male English\n", 101 | "1 Rishabh 83 Male English\n", 102 | "2 Ajay 78 Male Physics\n", 103 | "3 Abhinav 86 Male Chemistry\n", 104 | "4 Anurag 91 Male English\n", 105 | "5 Ajeet 15 Male Physics" 106 | ] 107 | }, 108 | "execution_count": 5, 109 | "metadata": {}, 110 | "output_type": "execute_result" 111 | } 112 | ], 113 | "source": [ 114 | "a= p.read_csv(\"Median.csv\")\n", 115 | "a.head(6)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 8, 121 | "metadata": {}, 122 | "outputs": [ 123 | { 124 | "name": "stdout", 125 | "output_type": "stream", 126 | "text": [ 127 | "77.0\n" 128 | ] 129 | } 130 | ], 131 | "source": [ 132 | "median= a['Overall Marks'].median()\n", 133 | "print(median)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 9, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "q1= a['Overall Marks'].quantile(.25)\n", 143 | "q2= a['Overall Marks'].quantile(.5)\n", 144 | "q3= a['Overall Marks'].quantile(.75)\n", 145 | "q4= a['Overall Marks'].quantile(1)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 11, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "name": "stdout", 155 | "output_type": "stream", 156 | "text": [ 157 | "57.25\n", 158 | "77.0\n", 159 | "87.25\n", 160 | "9700.0\n" 161 | ] 162 | } 163 | ], 164 | "source": [ 165 | "print(q1)\n", 166 | "print(q2)\n", 167 | "print(q3)\n", 168 | "print(q4)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 14, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "ename": "AttributeError", 178 | "evalue": "'list' object has no attribute 'median'", 179 | "output_type": "error", 180 | "traceback": [ 181 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 182 | "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", 183 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mlist\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;36m20\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m30\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m40\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m50\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m60\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m70\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mlist\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmedian\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 184 | "\u001b[1;31mAttributeError\u001b[0m: 'list' object has no attribute 'median'" 185 | ] 186 | } 187 | ], 188 | "source": [ 189 | "list = [20,30,40,50,60,70]\n", 190 | "list.median()" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [] 199 | } 200 | ], 201 | "metadata": { 202 | "kernelspec": { 203 | "display_name": "Python 3", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.7.1" 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 2 222 | } 223 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/6 Standard Deviation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Importing library\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "#Loading data\n", 20 | "data = pd.read_csv(\"Standard Deviation.csv\")" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | "
StudentOverall MarksGenderSubject
0Anish9700MaleEnglish
1Rishabh83MaleEnglish
2Ajay78MalePhysics
3Abhinav86MaleChemistry
4Anurag91MaleEnglish
\n", 93 | "
" 94 | ], 95 | "text/plain": [ 96 | " Student Overall Marks Gender Subject\n", 97 | "0 Anish 9700 Male English\n", 98 | "1 Rishabh 83 Male English\n", 99 | "2 Ajay 78 Male Physics\n", 100 | "3 Abhinav 86 Male Chemistry\n", 101 | "4 Anurag 91 Male English" 102 | ] 103 | }, 104 | "execution_count": 3, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "#Printing top 5 rows\n", 111 | "data.head()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 4, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "#Steps to calculate variance\n", 121 | "\n", 122 | "# 1. Calculate Mean\n", 123 | "# 2. Calculate the distance from mean for each element\n", 124 | "# 3. Calculate Squared distance\n", 125 | "# 4. Take average of Squared distance" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 5, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "mean = data['Overall Marks'].mean()" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 6, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "difference = data['Overall Marks'] - mean" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 7, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "squared_difference = difference ** 2" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 8, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "variance = squared_difference.mean()" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 9, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | "919170.8423999996\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "print(variance)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 10, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/plain": [ 189 | "958.7339789534944" 190 | ] 191 | }, 192 | "execution_count": 10, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "standard_deviation = variance ** (1/2)\n", 199 | "standard_deviation" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 11, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "data": { 209 | "text/plain": [ 210 | "919170.8423999996" 211 | ] 212 | }, 213 | "execution_count": 11, 214 | "metadata": {}, 215 | "output_type": "execute_result" 216 | } 217 | ], 218 | "source": [ 219 | "#Calculating variance\n", 220 | "var_data = data['Overall Marks'].var(ddof = 0)\n", 221 | "var_data" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 12, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/plain": [ 232 | "958.7339789534944" 233 | ] 234 | }, 235 | "execution_count": 12, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "std = data['Overall Marks'].std(ddof = 0)\n", 242 | "std" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [] 251 | } 252 | ], 253 | "metadata": { 254 | "kernelspec": { 255 | "display_name": "Python 3", 256 | "language": "python", 257 | "name": "python3" 258 | }, 259 | "language_info": { 260 | "codemirror_mode": { 261 | "name": "ipython", 262 | "version": 3 263 | }, 264 | "file_extension": ".py", 265 | "mimetype": "text/x-python", 266 | "name": "python", 267 | "nbconvert_exporter": "python", 268 | "pygments_lexer": "ipython3", 269 | "version": "3.7.1" 270 | } 271 | }, 272 | "nbformat": 4, 273 | "nbformat_minor": 2 274 | } 275 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/10 2 sample T-test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "colab": {}, 8 | "colab_type": "code", 9 | "id": "lucs3prAS3ih" 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "#Importing libraries\n", 14 | "import pandas as pd\n", 15 | "import scipy.stats as stats\n", 16 | "from scipy.stats import ttest_ind" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": { 23 | "colab": { 24 | "base_uri": "https://localhost:8080/", 25 | "height": 483 26 | }, 27 | "colab_type": "code", 28 | "executionInfo": { 29 | "elapsed": 1023, 30 | "status": "ok", 31 | "timestamp": 1549888816639, 32 | "user": { 33 | "displayName": "Sharoon Saxena", 34 | "photoUrl": "", 35 | "userId": "14774175216384036942" 36 | }, 37 | "user_tz": -330 38 | }, 39 | "id": "ywqZh_UTS3il", 40 | "outputId": "6cb1b982-1b85-4e8b-e303-5eacc11e7b77" 41 | }, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/html": [ 46 | "
\n", 47 | "\n", 60 | "\n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | "
Hauz KhasDefence Colony
0180220
1100200
2120240
3220180
4160160
5100260
6140280
7260300
8240240
9260220
10180260
11160160
12200180
13120220
\n", 141 | "
" 142 | ], 143 | "text/plain": [ 144 | " Hauz Khas Defence Colony\n", 145 | "0 180 220\n", 146 | "1 100 200\n", 147 | "2 120 240\n", 148 | "3 220 180\n", 149 | "4 160 160\n", 150 | "5 100 260\n", 151 | "6 140 280\n", 152 | "7 260 300\n", 153 | "8 240 240\n", 154 | "9 260 220\n", 155 | "10 180 260\n", 156 | "11 160 160\n", 157 | "12 200 180\n", 158 | "13 120 220" 159 | ] 160 | }, 161 | "execution_count": 2, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "#Loading datasets\n", 168 | "data = pd.read_csv('Data for 2 sample test.csv')\n", 169 | "data" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": { 175 | "colab_type": "text", 176 | "id": "jSArf9i-WrsQ" 177 | }, 178 | "source": [ 179 | "on referring to the t-table, t-critical value came out to be 2.056" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 3, 185 | "metadata": { 186 | "colab": {}, 187 | "colab_type": "code", 188 | "id": "mhQygKAfS3ir" 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "#Calculating t-statistic and p-value using 2 sample t-test\n", 193 | "t_statistic, _ = ttest_ind(data['Defence Colony'],data['Hauz Khas'])" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 4, 199 | "metadata": { 200 | "colab": { 201 | "base_uri": "https://localhost:8080/", 202 | "height": 34 203 | }, 204 | "colab_type": "code", 205 | "executionInfo": { 206 | "elapsed": 933, 207 | "status": "ok", 208 | "timestamp": 1549888817432, 209 | "user": { 210 | "displayName": "Sharoon Saxena", 211 | "photoUrl": "", 212 | "userId": "14774175216384036942" 213 | }, 214 | "user_tz": -330 215 | }, 216 | "id": "KWIgVSf1S3iw", 217 | "outputId": "004ad594-a537-42c8-fcbf-27e5c23c1856" 218 | }, 219 | "outputs": [ 220 | { 221 | "data": { 222 | "text/plain": [ 223 | "2.569593732633792" 224 | ] 225 | }, 226 | "execution_count": 4, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "t_statistic" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": { 238 | "colab_type": "text", 239 | "id": "4KhMSRbuaAOP" 240 | }, 241 | "source": [ 242 | "t-statistic> t-critical\n", 243 | "therefore we reject the null hypothesis." 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [] 252 | } 253 | ], 254 | "metadata": { 255 | "colab": { 256 | "collapsed_sections": [], 257 | "name": "2 sample T-test.ipynb", 258 | "provenance": [], 259 | "version": "0.3.2" 260 | }, 261 | "kernelspec": { 262 | "display_name": "Python 3", 263 | "language": "python", 264 | "name": "python3" 265 | }, 266 | "language_info": { 267 | "codemirror_mode": { 268 | "name": "ipython", 269 | "version": 3 270 | }, 271 | "file_extension": ".py", 272 | "mimetype": "text/x-python", 273 | "name": "python", 274 | "nbconvert_exporter": "python", 275 | "pygments_lexer": "ipython3", 276 | "version": "3.7.1" 277 | } 278 | }, 279 | "nbformat": 4, 280 | "nbformat_minor": 1 281 | } 282 | -------------------------------------------------------------------------------- /INTERNSHALA/3 Predictive modeling and ML/1 Reading the data into Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#importing pandas\n", 10 | "\n", 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# reading the csv file\n", 21 | "\n", 22 | "df = pd.read_csv(\"data.csv\")" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/plain": [ 33 | "(891, 12)" 34 | ] 35 | }, 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "output_type": "execute_result" 39 | } 40 | ], 41 | "source": [ 42 | "#seeing the dimension of the file\n", 43 | "\n", 44 | "df.shape" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 4, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/plain": [ 55 | "Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',\n", 56 | " 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],\n", 57 | " dtype='object')" 58 | ] 59 | }, 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [ 66 | "#seeing the names of varibales in the dataset\n", 67 | "\n", 68 | "df.columns" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 5, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/html": [ 79 | "
\n", 80 | "\n", 93 | "\n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", 189 | "
" 190 | ], 191 | "text/plain": [ 192 | " PassengerId Survived Pclass \\\n", 193 | "0 1 0 3 \n", 194 | "1 2 1 1 \n", 195 | "2 3 1 3 \n", 196 | "3 4 1 1 \n", 197 | "4 5 0 3 \n", 198 | "\n", 199 | " Name Sex Age SibSp \\\n", 200 | "0 Braund, Mr. Owen Harris male 22.0 1 \n", 201 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", 202 | "2 Heikkinen, Miss. Laina female 26.0 0 \n", 203 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", 204 | "4 Allen, Mr. William Henry male 35.0 0 \n", 205 | "\n", 206 | " Parch Ticket Fare Cabin Embarked \n", 207 | "0 0 A/5 21171 7.2500 NaN S \n", 208 | "1 0 PC 17599 71.2833 C85 C \n", 209 | "2 0 STON/O2. 3101282 7.9250 NaN S \n", 210 | "3 0 113803 53.1000 C123 S \n", 211 | "4 0 373450 8.0500 NaN S " 212 | ] 213 | }, 214 | "execution_count": 5, 215 | "metadata": {}, 216 | "output_type": "execute_result" 217 | } 218 | ], 219 | "source": [ 220 | "#seeing the top 5 rows \n", 221 | "\n", 222 | "df.head()" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "collapsed": true 230 | }, 231 | "outputs": [], 232 | "source": [] 233 | } 234 | ], 235 | "metadata": { 236 | "kernelspec": { 237 | "display_name": "Python 3", 238 | "language": "python", 239 | "name": "python3" 240 | }, 241 | "language_info": { 242 | "codemirror_mode": { 243 | "name": "ipython", 244 | "version": 3 245 | }, 246 | "file_extension": ".py", 247 | "mimetype": "text/x-python", 248 | "name": "python", 249 | "nbconvert_exporter": "python", 250 | "pygments_lexer": "ipython3", 251 | "version": "3.7.1" 252 | } 253 | }, 254 | "nbformat": 4, 255 | "nbformat_minor": 2 256 | } 257 | -------------------------------------------------------------------------------- /INTERNSHALA/3 Predictive modeling and ML/2 Variable Identification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# importing pandas\n", 10 | "\n", 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 3, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# reading the dataset\n", 21 | "\n", 22 | "file=pd.read_csv(\"data.csv\")" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 5, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/plain": [ 33 | "(891, 12)" 34 | ] 35 | }, 36 | "execution_count": 5, 37 | "metadata": {}, 38 | "output_type": "execute_result" 39 | } 40 | ], 41 | "source": [ 42 | "# dimensions of the dataset\n", 43 | "\n", 44 | "file.shape" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 6, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/html": [ 55 | "
\n", 56 | "\n", 69 | "\n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", 165 | "
" 166 | ], 167 | "text/plain": [ 168 | " PassengerId Survived Pclass \\\n", 169 | "0 1 0 3 \n", 170 | "1 2 1 1 \n", 171 | "2 3 1 3 \n", 172 | "3 4 1 1 \n", 173 | "4 5 0 3 \n", 174 | "\n", 175 | " Name Sex Age SibSp \\\n", 176 | "0 Braund, Mr. Owen Harris male 22.0 1 \n", 177 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", 178 | "2 Heikkinen, Miss. Laina female 26.0 0 \n", 179 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", 180 | "4 Allen, Mr. William Henry male 35.0 0 \n", 181 | "\n", 182 | " Parch Ticket Fare Cabin Embarked \n", 183 | "0 0 A/5 21171 7.2500 NaN S \n", 184 | "1 0 PC 17599 71.2833 C85 C \n", 185 | "2 0 STON/O2. 3101282 7.9250 NaN S \n", 186 | "3 0 113803 53.1000 C123 S \n", 187 | "4 0 373450 8.0500 NaN S " 188 | ] 189 | }, 190 | "execution_count": 6, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | } 194 | ], 195 | "source": [ 196 | "# head of the dataset\n", 197 | "\n", 198 | "file.head()" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 7, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "data": { 208 | "text/plain": [ 209 | "PassengerId int64\n", 210 | "Survived int64\n", 211 | "Pclass int64\n", 212 | "Name object\n", 213 | "Sex object\n", 214 | "Age float64\n", 215 | "SibSp int64\n", 216 | "Parch int64\n", 217 | "Ticket object\n", 218 | "Fare float64\n", 219 | "Cabin object\n", 220 | "Embarked object\n", 221 | "dtype: object" 222 | ] 223 | }, 224 | "execution_count": 7, 225 | "metadata": {}, 226 | "output_type": "execute_result" 227 | } 228 | ], 229 | "source": [ 230 | "# identifying categorical and continuous variables \n", 231 | "file.dtypes\n", 232 | "\n" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [] 241 | } 242 | ], 243 | "metadata": { 244 | "kernelspec": { 245 | "display_name": "Python 3", 246 | "language": "python", 247 | "name": "python3" 248 | }, 249 | "language_info": { 250 | "codemirror_mode": { 251 | "name": "ipython", 252 | "version": 3 253 | }, 254 | "file_extension": ".py", 255 | "mimetype": "text/x-python", 256 | "name": "python", 257 | "nbconvert_exporter": "python", 258 | "pygments_lexer": "ipython3", 259 | "version": "3.7.1" 260 | } 261 | }, 262 | "nbformat": 4, 263 | "nbformat_minor": 2 264 | } 265 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/2 Mean.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "a= pd.read_csv('mean.csv')" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 15, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "
\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | "
StudentOverall MarksGenderSubject
0Anish97MaleEnglish
1Rishabh83MaleEnglish
2Ajay78MalePhysics
3Abhinav86MaleChemistry
4Anurag91MaleEnglish
\n", 91 | "
" 92 | ], 93 | "text/plain": [ 94 | " Student Overall Marks Gender Subject\n", 95 | "0 Anish 97 Male English\n", 96 | "1 Rishabh 83 Male English\n", 97 | "2 Ajay 78 Male Physics\n", 98 | "3 Abhinav 86 Male Chemistry\n", 99 | "4 Anurag 91 Male English" 100 | ] 101 | }, 102 | "execution_count": 15, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "a.head()\n" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 9, 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "data": { 118 | "text/plain": [ 119 | "68.21" 120 | ] 121 | }, 122 | "execution_count": 9, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "mean= a['Overall Marks'].mean()\n", 129 | "mean" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 11, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "b=pd.read_csv('mean_robust.csv')" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 12, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "data": { 148 | "text/html": [ 149 | "
\n", 150 | "\n", 163 | "\n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | "
StudentOverall MarksGenderSubject
96vivek55MalePhysics
97Tisha7FemaleEnglish
98Raghav73MaleEnglish
99Ishita82FemaleChemistry
100NaN10000NaNNaN
\n", 211 | "
" 212 | ], 213 | "text/plain": [ 214 | " Student Overall Marks Gender Subject\n", 215 | "96 vivek 55 Male Physics\n", 216 | "97 Tisha 7 Female English\n", 217 | "98 Raghav 73 Male English\n", 218 | "99 Ishita 82 Female Chemistry\n", 219 | "100 NaN 10000 NaN NaN" 220 | ] 221 | }, 222 | "execution_count": 12, 223 | "metadata": {}, 224 | "output_type": "execute_result" 225 | } 226 | ], 227 | "source": [ 228 | "b.tail()" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 16, 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "data": { 238 | "text/plain": [ 239 | "166.54455445544554" 240 | ] 241 | }, 242 | "execution_count": 16, 243 | "metadata": {}, 244 | "output_type": "execute_result" 245 | } 246 | ], 247 | "source": [ 248 | "b['Overall Marks'].mean() #mean not robust" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 18, 254 | "metadata": {}, 255 | "outputs": [ 256 | { 257 | "data": { 258 | "text/plain": [ 259 | "0 88\n", 260 | "dtype: int64" 261 | ] 262 | }, 263 | "execution_count": 18, 264 | "metadata": {}, 265 | "output_type": "execute_result" 266 | } 267 | ], 268 | "source": [ 269 | "b['Overall Marks'].mode() #mode is robust" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [] 285 | } 286 | ], 287 | "metadata": { 288 | "kernelspec": { 289 | "display_name": "Python 3", 290 | "language": "python", 291 | "name": "python3" 292 | }, 293 | "language_info": { 294 | "codemirror_mode": { 295 | "name": "ipython", 296 | "version": 3 297 | }, 298 | "file_extension": ".py", 299 | "mimetype": "text/x-python", 300 | "name": "python", 301 | "nbconvert_exporter": "python", 302 | "pygments_lexer": "ipython3", 303 | "version": "3.7.1" 304 | } 305 | }, 306 | "nbformat": 4, 307 | "nbformat_minor": 2 308 | } 309 | -------------------------------------------------------------------------------- /INTERNSHALA/3 Predictive modeling and ML/2 Logistic Regression/logistic Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#importing required libraries\n", 10 | "\n", 11 | "import pandas as pd\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "%matplotlib inline\n", 14 | "import numpy as np" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 3, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "data=pd.read_csv(\"titanic.csv\")" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 4, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "text/html": [ 34 | "
\n", 35 | "\n", 48 | "\n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", 144 | "
" 145 | ], 146 | "text/plain": [ 147 | " PassengerId Survived Pclass \\\n", 148 | "0 1 0 3 \n", 149 | "1 2 1 1 \n", 150 | "2 3 1 3 \n", 151 | "3 4 1 1 \n", 152 | "4 5 0 3 \n", 153 | "\n", 154 | " Name Sex Age SibSp \\\n", 155 | "0 Braund, Mr. Owen Harris male 22.0 1 \n", 156 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", 157 | "2 Heikkinen, Miss. Laina female 26.0 0 \n", 158 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", 159 | "4 Allen, Mr. William Henry male 35.0 0 \n", 160 | "\n", 161 | " Parch Ticket Fare Cabin Embarked \n", 162 | "0 0 A/5 21171 7.2500 NaN S \n", 163 | "1 0 PC 17599 71.2833 C85 C \n", 164 | "2 0 STON/O2. 3101282 7.9250 NaN S \n", 165 | "3 0 113803 53.1000 C123 S \n", 166 | "4 0 373450 8.0500 NaN S " 167 | ] 168 | }, 169 | "execution_count": 4, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "data.head()" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 5, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "data": { 185 | "text/plain": [ 186 | "0 549\n", 187 | "1 342\n", 188 | "Name: Survived, dtype: int64" 189 | ] 190 | }, 191 | "execution_count": 5, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "data['Survived'].value_counts()" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 6, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "data=pd.get_dummies(data)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 7, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "data.fillna(0,inplace=True)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 9, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "data": { 225 | "text/plain": [ 226 | "(891, 1731)" 227 | ] 228 | }, 229 | "execution_count": 9, 230 | "metadata": {}, 231 | "output_type": "execute_result" 232 | } 233 | ], 234 | "source": [ 235 | "data.shape" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 10, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "train=data[0:699]" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 11, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "test=data[700:890]" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 17, 259 | "metadata": {}, 260 | "outputs": [], 261 | "source": [ 262 | "x_train=train.drop('Survived', axis=1)\n", 263 | "y_train = train[\"Survived\"]" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 25, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "x_test=test.drop('Survived', axis=1)\n", 273 | "true_p = test['Survived']" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 26, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "from sklearn.linear_model import LogisticRegression" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 27, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "logreg=LogisticRegression()" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 28, 297 | "metadata": { 298 | "scrolled": true 299 | }, 300 | "outputs": [ 301 | { 302 | "name": "stderr", 303 | "output_type": "stream", 304 | "text": [ 305 | "C:\\Users\\SARIM\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", 306 | " FutureWarning)\n" 307 | ] 308 | }, 309 | { 310 | "data": { 311 | "text/plain": [ 312 | "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", 313 | " intercept_scaling=1, max_iter=100, multi_class='warn',\n", 314 | " n_jobs=None, penalty='l2', random_state=None, solver='warn',\n", 315 | " tol=0.0001, verbose=0, warm_start=False)" 316 | ] 317 | }, 318 | "execution_count": 28, 319 | "metadata": {}, 320 | "output_type": "execute_result" 321 | } 322 | ], 323 | "source": [ 324 | "logreg.fit(x_train,y_train)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 29, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "pred=logreg.predict(x_test)" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 30, 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "data": { 343 | "text/plain": [ 344 | "array([1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0,\n", 345 | " 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,\n", 346 | " 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,\n", 347 | " 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0,\n", 348 | " 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1,\n", 349 | " 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,\n", 350 | " 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1,\n", 351 | " 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1,\n", 352 | " 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0], dtype=int64)" 353 | ] 354 | }, 355 | "execution_count": 30, 356 | "metadata": {}, 357 | "output_type": "execute_result" 358 | } 359 | ], 360 | "source": [ 361 | "pred" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 31, 367 | "metadata": {}, 368 | "outputs": [ 369 | { 370 | "data": { 371 | "text/plain": [ 372 | "0.8210526315789474" 373 | ] 374 | }, 375 | "execution_count": 31, 376 | "metadata": {}, 377 | "output_type": "execute_result" 378 | } 379 | ], 380 | "source": [ 381 | "logreg.score(x_test,true_p)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 32, 387 | "metadata": {}, 388 | "outputs": [ 389 | { 390 | "data": { 391 | "text/plain": [ 392 | "0.9227467811158798" 393 | ] 394 | }, 395 | "execution_count": 32, 396 | "metadata": {}, 397 | "output_type": "execute_result" 398 | } 399 | ], 400 | "source": [ 401 | "logreg.score(x_train,y_train)" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "# For making test data as a representative of train data, we go for \"Validation\"" 409 | ] 410 | } 411 | ], 412 | "metadata": { 413 | "kernelspec": { 414 | "display_name": "Python 3", 415 | "language": "python", 416 | "name": "python3" 417 | }, 418 | "language_info": { 419 | "codemirror_mode": { 420 | "name": "ipython", 421 | "version": 3 422 | }, 423 | "file_extension": ".py", 424 | "mimetype": "text/x-python", 425 | "name": "python", 426 | "nbconvert_exporter": "python", 427 | "pygments_lexer": "ipython3", 428 | "version": "3.7.1" 429 | } 430 | }, 431 | "nbformat": 4, 432 | "nbformat_minor": 2 433 | } 434 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/8 Histogram.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#Importing library\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "#Loading dataset\n", 20 | "histogram = pd.read_csv(\"Histogram.csv\")" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | "
StudentOverall MarksGenderSubject
0Anish97MaleEnglish
1Rishabh83MaleEnglish
2Ajay78MalePhysics
3Abhinav86MaleChemistry
4Anurag91MaleEnglish
\n", 93 | "
" 94 | ], 95 | "text/plain": [ 96 | " Student Overall Marks Gender Subject\n", 97 | "0 Anish 97 Male English\n", 98 | "1 Rishabh 83 Male English\n", 99 | "2 Ajay 78 Male Physics\n", 100 | "3 Abhinav 86 Male Chemistry\n", 101 | "4 Anurag 91 Male English" 102 | ] 103 | }, 104 | "execution_count": 3, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "#Printing first 5 rows\n", 111 | "histogram.head()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 6, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "#Generating histogram\n", 121 | "#import matplotlib.pyplot as plt\n", 122 | "#\n", 123 | "import matplotlib.pyplot as plt\n", 124 | "%matplotlib inline" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 7, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAADKxJREFUeJzt3W+MZYVZx/HvT8BqoQaQgSB/HNqQWmLSpZkQFGOwWEOhEZpoUmIqL0i2L0oEQ2LW+sL6DpIW1KQh2RYEDeIfoEIKqZKVhDRRdBYJLC4VrGsLrOwQbEHftMDji3s2Gbc7zJ259+5lnvl+ksm998yZPc/hLN+cPXvu3VQVkqSt70fmPYAkaToMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJo4/lhs77bTTanFx8VhuUpK2vL17975aVQvrrXdMg764uMjy8vKx3KQkbXlJ/nOc9bzkIklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0c03eKShLA4q6H57LdAzdfOZftHiueoUtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhPrBj3JOUkeS7I/ybNJbhiWfz7JS0meGr6umP24kqS1jPNP0L0J3FRVTyZ5H7A3yaPD926rqi/MbjxJ0rjWDXpVHQQODs/fSLIfOGvWg0mSNmZD19CTLAIXAk8Mi65P8nSSO5OcMuXZJEkbMHbQk5wE3A/cWFWvA7cDHwB2MDqD/+IaP7czyXKS5ZWVlSmMLEk6mrGCnuQERjG/p6oeAKiqV6rqrap6G/gycNHRfraqdlfVUlUtLSwsTGtuSdIRxrnLJcAdwP6qunXV8jNXrfZJYN/0x5MkjWucu1wuAT4NPJPkqWHZ54BrkuwACjgAfGYmE0qSxjLOXS7fAHKUbz0y/XEkSZvlO0UlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1IT6wY9yTlJHkuyP8mzSW4Ylp+a5NEkzw+Pp8x+XEnSWsY5Q38TuKmqPgRcDHw2yQXALmBPVZ0P7BleS5LmZN2gV9XBqnpyeP4GsB84C7gKuHtY7W7g6lkNKUla34auoSdZBC4EngDOqKqDMIo+cPq0h5MkjW/soCc5CbgfuLGqXt/Az+1MspxkeWVlZTMzSpLGMFbQk5zAKOb3VNUDw+JXkpw5fP9M4NDRfraqdlfVUlUtLSwsTGNmSdJRjHOXS4A7gP1Vdeuqbz0EXDs8vxZ4cPrjSZLGdfwY61wCfBp4JslTw7LPATcDf5XkOuDbwK/PZkRJ0jjWDXpVfQPIGt++bLrjSJI2y3eKSlITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaGOezXCQ1tLjr4XmPoCnzDF2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU1426KkbWOet2oeuPnKmW/DM3RJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhPrBj3JnUkOJdm3atnnk7yU5Knh64rZjilJWs84Z+h3AZcfZfltVbVj+HpkumNJkjZq3aBX1ePAa8dgFknSBCa5hn59kqeHSzKnTG0iSdKmbDbotwMfAHYAB4EvrrVikp1JlpMsr6ysbHJzkqT1bCroVfVKVb1VVW8DXwYueod1d1fVUlUtLSwsbHZOSdI6NhX0JGeuevlJYN9a60qSjo11/8WiJPcClwKnJXkR+H3g0iQ7gAIOAJ+Z4YySpDGsG/SquuYoi++YwSySpAn4TlFJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNHD/vAaTtbnHXw/MeQU14hi5JTRh0SWrCoEtSE+sGPcmdSQ4l2bdq2alJHk3y/PB4ymzHlCStZ5wz9LuAy49YtgvYU1XnA3uG15KkOVo36FX1OPDaEYuvAu4ent8NXD3luSRJG7TZa+hnVNVBgOHx9OmNJEnajJn/pWiSnUmWkyyvrKzMenOStG1tNuivJDkTYHg8tNaKVbW7qpaqamlhYWGTm5MkrWezQX8IuHZ4fi3w4HTGkSRt1ji3Ld4L/APwwSQvJrkOuBn4WJLngY8NryVJc7TuZ7lU1TVrfOuyKc8iSZqA7xSVpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6Qm1v0XiwSLux6ey3YP3HzlXLY7T/63ljbPM3RJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMTvfU/yQHgDeAt4M2qWprGUJKkjZvGZ7n8UlW9OoVfR5I0AS+5SFITkwa9gL9LsjfJzmkMJEnanEkvuVxSVS8nOR14NMlzVfX46hWG0O8EOPfccyfcnDQb8/rYXmmaJjpDr6qXh8dDwFeBi46yzu6qWqqqpYWFhUk2J0l6B5sOepITk7zv8HPgV4B90xpMkrQxk1xyOQP4apLDv86fV9XXpzKVJGnDNh30qvoW8OEpziJJmoC3LUpSEwZdkpqYxjtFjwlvK5Okd+YZuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktTElrkPfTua1733B26+ci7blTQZz9AlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJamJiYKe5PIk30zyQpJd0xpKkrRxmw56kuOALwEfBy4ArklywbQGkyRtzCRn6BcBL1TVt6rq+8BfAFdNZyxJ0kZNEvSzgO+sev3isEySNAfHT/CzOcqy+qGVkp3AzuHl/yT55ia3dxrw6iZ/dquayz7nlmO9xf/H47w9bLt9zi0T7fNPj7PSJEF/EThn1euzgZePXKmqdgO7J9gOAEmWq2pp0l9nK3Gftwf3eXs4Fvs8ySWXfwbOT3Jekh8FPgU8NJ2xJEkbtekz9Kp6M8n1wN8CxwF3VtWzU5tMkrQhk1xyoaoeAR6Z0izrmfiyzRbkPm8P7vP2MPN9TtUP/T2mJGkL8q3/ktTElgh6948YSHJOkseS7E/ybJIbhuWnJnk0yfPD4ynznnXakhyX5F+SfG14fV6SJ4Z9/svhL9zbSHJykvuSPDcc75/rfpyT/Pbw+3pfknuT/Fi345zkziSHkuxbteyoxzUjfzz07OkkH5nWHO/6oG+Tjxh4E7ipqj4EXAx8dtjHXcCeqjof2DO87uYGYP+q17cAtw37/N/AdXOZanb+CPh6Vf0M8GFG+972OCc5C/gtYKmqfpbRDRSfot9xvgu4/Ihlax3XjwPnD187gdunNcS7Puhsg48YqKqDVfXk8PwNRv+Tn8VoP+8eVrsbuHo+E85GkrOBK4GvDK8DfBS4b1il1T4n+QngF4E7AKrq+1X1XZofZ0Y3X/x4kuOB9wIHaXacq+px4LUjFq91XK8C/rRG/hE4OcmZ05hjKwR9W33EQJJF4ELgCeCMqjoIo+gDp89vspn4Q+B3gLeH1z8JfLeq3hxedzvW7wdWgD8ZLjN9JcmJND7OVfUS8AXg24xC/j1gL72P82FrHdeZNW0rBH2sjxjoIMlJwP3AjVX1+rznmaUknwAOVdXe1YuPsmqnY3088BHg9qq6EPhfGl1eOZrhuvFVwHnATwEnMrrkcKROx3k9M/t9vhWCPtZHDGx1SU5gFPN7quqBYfErh/8oNjwemtd8M3AJ8KtJDjC6jPZRRmfsJw9/NId+x/pF4MWqemJ4fR+jwHc+zr8M/EdVrVTVD4AHgJ+n93E+bK3jOrOmbYWgt/+IgeHa8R3A/qq6ddW3HgKuHZ5fCzx4rGeblar63ao6u6oWGR3Tv6+q3wAeA35tWK3bPv8X8J0kHxwWXQb8K42PM6NLLRcnee/w+/zwPrc9zqusdVwfAn5zuNvlYuB7hy/NTKyq3vVfwBXAvwH/DvzevOeZwf79AqM/cj0NPDV8XcHomvIe4Pnh8dR5zzqj/b8U+Nrw/P3APwEvAH8NvGfe8015X3cAy8Ox/hvglO7HGfgD4DlgH/BnwHu6HWfgXkZ/R/ADRmfg1611XBldcvnS0LNnGN0BNJU5fKeoJDWxFS65SJLGYNAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJv4PSsK/jSs3mLQAAAAASUVORK5CYII=\n", 135 | "text/plain": [ 136 | "
" 137 | ] 138 | }, 139 | "metadata": { 140 | "needs_background": "light" 141 | }, 142 | "output_type": "display_data" 143 | } 144 | ], 145 | "source": [ 146 | "#plt.hist(x='Overall Marks',data=histogram)\n", 147 | "#plt.show()\n", 148 | "plt.hist(x='Overall Marks',data=histogram)\n", 149 | "plt.show()" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 14, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAD8CAYAAABXe05zAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAADR5JREFUeJzt3WGMZeVdx/Hvz11oC6WhLbdNBcaBhKBNkwKZECqmUagKbAO+qAlEazWYeaMVTJNmia/6bk1MbU0akg3QoiJVKShhW5RQCGlit+4CIrBgKa6yhXaXVAqtiZT698U9u5muc+ee2Z0zM8/d7yeZzL13zt79n31mv7n7zLnZVBWSpHb81EYPIElaHcMtSY0x3JLUGMMtSY0x3JLUGMMtSY0x3JLUGMMtSY0x3JLUmK1DPOkZZ5xR8/PzQzy1JM2kvXv3vlxVoz7HDhLu+fl59uzZM8RTS9JMSvIffY91q0SSGmO4JakxhluSGmO4JakxhluSGjM13EnOT/L4ko9Xk9y4HsNJkv6/qZcDVtWzwAUASbYA3wbuGXguSdIEq90quRz4VlX1vt5QkrS2Vhvua4E7hxhEktRP73dOJjkZuBq4acLXF4FFgLm5uTUZTpJWMr9915Hb+3ds28BJ1tdqXnFfCTxaVd9d7otVtbOqFqpqYTTq9XZ7SdIxWE24r8NtEknacL3CneQU4JeBu4cdR5I0Ta897qr6b+CdA88iSerBd05KUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmN6hTvJ6UnuSvJMkn1JPjD0YJKk5W3tedxngfur6iNJTgZOGXAmSdIKpoY7yduADwK/DVBVrwOvDzuWJGmSPlsl5wKHgM8neSzJLUlOPfqgJItJ9iTZc+jQoTUfVJI01ifcW4GLgJur6kLgh8D2ow+qqp1VtVBVC6PRaI3HlCQd1ifcB4ADVbW7u38X45BLkjbA1HBX1XeAF5Kc3z10OfD0oFNJkibqe1XJx4E7uitKngd+Z7iRJEkr6RXuqnocWBh4FklSD75zUpIaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTG9/rPgJPuB14AfA29Ulf9xsCRtkF7h7vxSVb082CSSpF7cKpGkxvQNdwH/mGRvksUhB5IkrazvVsmlVfVikncBDyR5pqoeWXpAF/RFgLm5uTUeU5I2p/ntu47c3r9j27r8nr1ecVfVi93ng8A9wMXLHLOzqhaqamE0Gq3tlJKkI6aGO8mpSU47fBv4FeDJoQeTJC2vz1bJu4F7khw+/q+q6v5Bp5IkTTQ13FX1PPD+dZhFktSDlwNKUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmN6hzvJliSPJblvyIEkSStbzSvuG4B9Qw0iSeqnV7iTnAVsA24ZdhxJ0jRbex73GeCTwGmTDkiyCCwCzM3NHf9kkmbG/PZdR27v37Ft6uNa2dRX3Ek+DBysqr0rHVdVO6tqoaoWRqPRmg0oSfpJfbZKLgWuTrIf+CJwWZK/HHQqSdJEU8NdVTdV1VlVNQ9cC3y1qn5z8MkkScvyOm5JakzfH04CUFUPAw8PMokkqRdfcUtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDVmariTvDnJN5L8S5KnknxqPQaTJC2vz//y/j/AZVX1gyQnAV9L8pWq+vrAs0mSljE13FVVwA+6uyd1HzXkUJKkyXrtcSfZkuRx4CDwQFXtHnYsSdIkfbZKqKofAxckOR24J8n7qurJpcckWQQWAebm5tZ80Fkwv33Xkdv7d2zbwEmkn7Ta782lxy91PL+2JRt9Dqu6qqSqXgEeBq5Y5ms7q2qhqhZGo9EajSdJOlqfq0pG3SttkrwF+BDwzNCDSZKW12er5D3A7Um2MA7931TVfcOOJUmapM9VJU8AF67DLJKkHnznpCQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1xnBLUmMMtyQ1Zmq4k5yd5KEk+5I8leSG9RhMkrS8rT2OeQP4RFU9muQ0YG+SB6rq6YFnkyQtY+or7qp6qaoe7W6/BuwDzhx6MEnS8la1x51kHrgQ2D3EMJKk6fpslQCQ5K3Al4Abq+rVZb6+CCwCzM3NHfNA89t3Hbm9f8e2Y36e9dTizLNoteuwnus26fda+vhKJs3X5xw26s+l77mt9vjVzrRWf0arPZ8h9XrFneQkxtG+o6ruXu6YqtpZVQtVtTAajdZyRknSEn2uKglwK7Cvqj49/EiSpJX0ecV9KfBR4LIkj3cfVw08lyRpgql73FX1NSDrMIskqQffOSlJjTHcktQYwy1JjTHcktQYwy1JjTHcktQYwy1JjTHcktQYwy1JjTHcktQYwy1JjTHcktQYwy1JjTHcktQYwy1JjTHcktQYwy1JjTHcktQYwy1JjTHcktQYwy1JjZka7iS3JTmY5Mn1GEiStLI+r7i/AFwx8BySpJ6mhruqHgG+tw6zSJJ62LpWT5RkEVgEmJubW6unXZX57buO3N6/Y9vUx9d7pj6PL7VZzmEIx3M+ff7s+hyzWn1mXo91Op5zW+2vHfr4Y3E8f6fWY771sGY/nKyqnVW1UFULo9ForZ5WknQUryqRpMYYbklqTJ/LAe8E/gk4P8mBJNcPP5YkaZKpP5ysquvWYxBJUj9ulUhSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDXGcEtSYwy3JDWmV7iTXJHk2STPJdk+9FCSpMmmhjvJFuBzwJXAe4Hrkrx36MEkScvr84r7YuC5qnq+ql4HvghcM+xYkqRJ+oT7TOCFJfcPdI9JkjZAqmrlA5JfB361qn63u/9R4OKq+vhRxy0Ci93d84Fnj3GmM4CXj/HXtspznn0n2vmC57xaP1NVoz4Hbu1xzAHg7CX3zwJePPqgqtoJ7Ow13gqS7KmqheN9npZ4zrPvRDtf8JyH1Ger5J+B85Kck+Rk4Frg3mHHkiRNMvUVd1W9keT3gX8AtgC3VdVTg08mSVpWn60SqurLwJcHnuWw495uaZDnPPtOtPMFz3kwU384KUnaXHzLuyQ1ZtOE+0R4W32Ss5M8lGRfkqeS3NA9/o4kDyT5Zvf57Rs961pLsiXJY0nu6+6fk2R3d85/3f3ge2YkOT3JXUme6db7A7O+zkn+sPu+fjLJnUnePGvrnOS2JAeTPLnksWXXNWN/1jXtiSQXrdUcmyLcJ9Db6t8APlFVPwdcAvxed57bgQer6jzgwe7+rLkB2Lfk/h8Df9qd838B12/IVMP5LHB/Vf0s8H7G5z6z65zkTOAPgIWqeh/jCxmuZfbW+QvAFUc9NmldrwTO6z4WgZvXaohNEW5OkLfVV9VLVfVod/s1xn+Zz2R8rrd3h90O/NrGTDiMJGcB24BbuvsBLgPu6g6ZqXNO8jbgg8CtAFX1elW9woyvM+OLHd6SZCtwCvASM7bOVfUI8L2jHp60rtcAf15jXwdOT/KetZhjs4T7hHtbfZJ54EJgN/DuqnoJxnEH3rVxkw3iM8Angf/t7r8TeKWq3ujuz9p6nwscAj7fbQ/dkuRUZnidq+rbwJ8A/8k42N8H9jLb63zYpHUdrGubJdxZ5rGZvdwlyVuBLwE3VtWrGz3PkJJ8GDhYVXuXPrzMobO03luBi4Cbq+pC4IfM0LbIcrp93WuAc4CfBk5lvFVwtFla52kG+z7fLOHu9bb6WZDkJMbRvqOq7u4e/u7hf0J1nw9u1HwDuBS4Osl+xltglzF+BX56909qmL31PgAcqKrd3f27GId8ltf5Q8C/V9WhqvoRcDfw88z2Oh82aV0H69pmCfcJ8bb6bm/3VmBfVX16yZfuBT7W3f4Y8PfrPdtQquqmqjqrquYZr+tXq+o3gIeAj3SHzdo5fwd4Icn53UOXA08zw+vMeIvkkiSndN/nh895Ztd5iUnrei/wW93VJZcA3z+8pXLcqmpTfABXAf8GfAv4o42eZ6Bz/AXG/1R6Ani8+7iK8Z7vg8A3u8/v2OhZBzr/XwTu626fC3wDeA74W+BNGz3fGp/rBcCebq3/Dnj7rK8z8CngGeBJ4C+AN83aOgN3Mt7D/xHjV9TXT1pXxlsln+ua9q+Mr7hZkzl856QkNWazbJVIknoy3JLUGMMtSY0x3JLUGMMtSY0x3JLUGMMtSY0x3JLUmP8DIxgA1ufO93IAAAAASUVORK5CYII=\n", 160 | "text/plain": [ 161 | "
" 162 | ] 163 | }, 164 | "metadata": { 165 | "needs_background": "light" 166 | }, 167 | "output_type": "display_data" 168 | } 169 | ], 170 | "source": [ 171 | "plt.hist(x='Overall Marks',data=histogram, bins=100)\n", 172 | "plt.show()" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": { 179 | "collapsed": true 180 | }, 181 | "outputs": [], 182 | "source": [] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "Python 3", 188 | "language": "python", 189 | "name": "python3" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 3 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython3", 201 | "version": "3.7.1" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 2 206 | } 207 | -------------------------------------------------------------------------------- /INTERNSHALA/3 Predictive modeling and ML/3 Decesion Tree/Decision Tree.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#importing libraries \n", 10 | "\n", 11 | "import pandas as pd \n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "data=pd.read_csv('data_cleaned.csv')" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/plain": [ 32 | "(891, 25)" 33 | ] 34 | }, 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "output_type": "execute_result" 38 | } 39 | ], 40 | "source": [ 41 | "data.shape" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 4, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/html": [ 52 | "
\n", 53 | "\n", 66 | "\n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | "
SurvivedAgeFarePclass_1Pclass_2Pclass_3Sex_femaleSex_maleSibSp_0SibSp_1...Parch_0Parch_1Parch_2Parch_3Parch_4Parch_5Parch_6Embarked_CEmbarked_QEmbarked_S
0022.07.25000010101...1000000001
1138.071.28331001001...1000000100
2126.07.92500011010...1000000001
3135.053.10001001001...1000000001
4035.08.05000010110...1000000001
\n", 216 | "

5 rows × 25 columns

\n", 217 | "
" 218 | ], 219 | "text/plain": [ 220 | " Survived Age Fare Pclass_1 Pclass_2 Pclass_3 Sex_female \\\n", 221 | "0 0 22.0 7.2500 0 0 1 0 \n", 222 | "1 1 38.0 71.2833 1 0 0 1 \n", 223 | "2 1 26.0 7.9250 0 0 1 1 \n", 224 | "3 1 35.0 53.1000 1 0 0 1 \n", 225 | "4 0 35.0 8.0500 0 0 1 0 \n", 226 | "\n", 227 | " Sex_male SibSp_0 SibSp_1 ... Parch_0 Parch_1 Parch_2 Parch_3 \\\n", 228 | "0 1 0 1 ... 1 0 0 0 \n", 229 | "1 0 0 1 ... 1 0 0 0 \n", 230 | "2 0 1 0 ... 1 0 0 0 \n", 231 | "3 0 0 1 ... 1 0 0 0 \n", 232 | "4 1 1 0 ... 1 0 0 0 \n", 233 | "\n", 234 | " Parch_4 Parch_5 Parch_6 Embarked_C Embarked_Q Embarked_S \n", 235 | "0 0 0 0 0 0 1 \n", 236 | "1 0 0 0 1 0 0 \n", 237 | "2 0 0 0 0 0 1 \n", 238 | "3 0 0 0 0 0 1 \n", 239 | "4 0 0 0 0 0 1 \n", 240 | "\n", 241 | "[5 rows x 25 columns]" 242 | ] 243 | }, 244 | "execution_count": 4, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "data.head()" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 5, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "#seperating independent and dependent variables\n", 260 | "\n", 261 | "x = data.drop(['Survived'], axis=1)\n", 262 | "y = data['Survived']" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 6, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "from sklearn.model_selection import train_test_split" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 7, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "train_x,test_x,train_y,test_y = train_test_split(x,y, random_state = 101, stratify=y)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 8, 286 | "metadata": {}, 287 | "outputs": [ 288 | { 289 | "data": { 290 | "text/plain": [ 291 | "0 0.616766\n", 292 | "1 0.383234\n", 293 | "Name: Survived, dtype: float64" 294 | ] 295 | }, 296 | "execution_count": 8, 297 | "metadata": {}, 298 | "output_type": "execute_result" 299 | } 300 | ], 301 | "source": [ 302 | "train_y.value_counts()/len(train_y)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 9, 308 | "metadata": {}, 309 | "outputs": [ 310 | { 311 | "data": { 312 | "text/plain": [ 313 | "0 0.61435\n", 314 | "1 0.38565\n", 315 | "Name: Survived, dtype: float64" 316 | ] 317 | }, 318 | "execution_count": 9, 319 | "metadata": {}, 320 | "output_type": "execute_result" 321 | } 322 | ], 323 | "source": [ 324 | "test_y.value_counts()/len(test_y)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 10, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "#importing decision tree classifier \n", 334 | "\n", 335 | "from sklearn.tree import DecisionTreeClassifier" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 11, 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [ 344 | "clf = DecisionTreeClassifier()" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 12, 350 | "metadata": {}, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "text/plain": [ 355 | "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", 356 | " max_features=None, max_leaf_nodes=None,\n", 357 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 358 | " min_samples_leaf=1, min_samples_split=2,\n", 359 | " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", 360 | " splitter='best')" 361 | ] 362 | }, 363 | "execution_count": 12, 364 | "metadata": {}, 365 | "output_type": "execute_result" 366 | } 367 | ], 368 | "source": [ 369 | "clf.fit(train_x,train_y)" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 13, 375 | "metadata": {}, 376 | "outputs": [ 377 | { 378 | "data": { 379 | "text/plain": [ 380 | "0.9880239520958084" 381 | ] 382 | }, 383 | "execution_count": 13, 384 | "metadata": {}, 385 | "output_type": "execute_result" 386 | } 387 | ], 388 | "source": [ 389 | "clf.score(train_x, train_y)" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": 14, 395 | "metadata": {}, 396 | "outputs": [ 397 | { 398 | "data": { 399 | "text/plain": [ 400 | "0.757847533632287" 401 | ] 402 | }, 403 | "execution_count": 14, 404 | "metadata": {}, 405 | "output_type": "execute_result" 406 | } 407 | ], 408 | "source": [ 409 | "clf.score(test_x, test_y)" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 15, 415 | "metadata": {}, 416 | "outputs": [ 417 | { 418 | "data": { 419 | "text/plain": [ 420 | "array([1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1,\n", 421 | " 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0,\n", 422 | " 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,\n", 423 | " 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,\n", 424 | " 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,\n", 425 | " 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,\n", 426 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0,\n", 427 | " 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0,\n", 428 | " 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0,\n", 429 | " 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0,\n", 430 | " 1, 1, 0], dtype=int64)" 431 | ] 432 | }, 433 | "execution_count": 15, 434 | "metadata": {}, 435 | "output_type": "execute_result" 436 | } 437 | ], 438 | "source": [ 439 | "clf.predict(test_x)" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": null, 445 | "metadata": {}, 446 | "outputs": [], 447 | "source": [] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": null, 452 | "metadata": {}, 453 | "outputs": [], 454 | "source": [] 455 | } 456 | ], 457 | "metadata": { 458 | "kernelspec": { 459 | "display_name": "Python 3", 460 | "language": "python", 461 | "name": "python3" 462 | }, 463 | "language_info": { 464 | "codemirror_mode": { 465 | "name": "ipython", 466 | "version": 3 467 | }, 468 | "file_extension": ".py", 469 | "mimetype": "text/x-python", 470 | "name": "python", 471 | "nbconvert_exporter": "python", 472 | "pygments_lexer": "ipython3", 473 | "version": "3.7.1" 474 | } 475 | }, 476 | "nbformat": 4, 477 | "nbformat_minor": 2 478 | } 479 | -------------------------------------------------------------------------------- /INTERNSHALA/3 Predictive modeling and ML/4 K means/K-Means.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "ename": "ImportError", 10 | "evalue": "cannot import name 'pyplot' from 'matplotlib.pyplot' (C:\\Users\\SARIM\\Anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py)", 11 | "output_type": "error", 12 | "traceback": [ 13 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 14 | "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", 15 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpyplot\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'matplotlib'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'inline '\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcluster\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mKMeans\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 16 | "\u001b[1;31mImportError\u001b[0m: cannot import name 'pyplot' from 'matplotlib.pyplot' (C:\\Users\\SARIM\\Anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py)" 17 | ] 18 | } 19 | ], 20 | "source": [ 21 | "import pandas as pd\n", 22 | "import numpy as np\n", 23 | "from matplotlib.pyplot import pyplot as plt\n", 24 | "%matplotlib inline \n", 25 | "from sklearn.cluster import KMeans" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "data=pd.read_csv(\"student_evaluation.csv\")" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "data.shape" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "data.head()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "pd.isnull(data).sum()" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "data.describe()" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "kmeans = KMeans(n_cluster=2)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "kmeans.fit(data)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "pred=kmeans.predict(data)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "pred" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "pd.series(pred).value_counts()" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "kmeans.inertia_" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "kmeans.score(data)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "SSE = []" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "for cluster in range(1,20):\n", 152 | " kmeans = KMeans(n_jobs = -1, n_clusters = cluster)\n", 153 | " kmean.fit(data)\n", 154 | " SSE.append(kmeans.inertia_)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "frame = pd.Dataframe({'Cluster':range(1,20), 'SSE':SSE})" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "plt.figure(figsize=(12,6))\n", 173 | "plt.plot(frame['Cluster'], frame['SSE'], marker='o')" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "from sklearn.preprocessing import StandardScaler\n", 183 | "\n", 184 | "scaler = StandardScaler()\n", 185 | "\n", 186 | "data_scaled = scaler.fit_transform(data)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "pd.Dataframe(data_scaled).describe()" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "SSE_scaled = []" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [ 213 | "for cluster in range(1,20):\n", 214 | " kmeans = KMeans(n_jobs = -1, n_clusters = cluster)\n", 215 | " kmean.fit(data_scaled)\n", 216 | " SSE.append(kmeans.inertia_)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "frame_scaled = pd.Dataframe({'Cluster':range(1,20), 'SSE':SSE_scaled})\n", 226 | "plt.plot(frame_scaled['Cluster'], frame_scaled['SSE'], marker='o')\n", 227 | "plt.xlabel(\"Clusters\")\n", 228 | "plt.ylabel(\"SSE\")" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "kmeans = KMeans(n_jobs = -1, n_clusters = 4)\n", 238 | "kmean.fit(data_scaled)\n", 239 | "pred = kmeans.predict(data_scaled)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "pred" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "frame = pd.DataFrame(data_scaled)" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "frame['cluster'] = pred" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 1, 272 | "metadata": {}, 273 | "outputs": [ 274 | { 275 | "ename": "NameError", 276 | "evalue": "name 'frame' is not defined", 277 | "output_type": "error", 278 | "traceback": [ 279 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 280 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", 281 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mframe\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mframe\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'cluster'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m==\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 282 | "\u001b[1;31mNameError\u001b[0m: name 'frame' is not defined" 283 | ] 284 | } 285 | ], 286 | "source": [ 287 | "frame.loc[frame['cluster']==2,:]" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 3, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "data=pd.read_csv(\"Final+Test+Data+Set.csv\")" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 4, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "data": { 306 | "text/html": [ 307 | "
\n", 308 | "\n", 321 | "\n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | "
Loan_IDGenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Status
0LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanY
1LP001003MaleYes1GraduateNo45831508.0128.0360.01.0RuralN
2LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanY
3LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanY
4LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanY
\n", 423 | "
" 424 | ], 425 | "text/plain": [ 426 | " Loan_ID Gender Married Dependents Education Self_Employed \\\n", 427 | "0 LP001002 Male No 0 Graduate No \n", 428 | "1 LP001003 Male Yes 1 Graduate No \n", 429 | "2 LP001005 Male Yes 0 Graduate Yes \n", 430 | "3 LP001006 Male Yes 0 Not Graduate No \n", 431 | "4 LP001008 Male No 0 Graduate No \n", 432 | "\n", 433 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n", 434 | "0 5849 0.0 NaN 360.0 \n", 435 | "1 4583 1508.0 128.0 360.0 \n", 436 | "2 3000 0.0 66.0 360.0 \n", 437 | "3 2583 2358.0 120.0 360.0 \n", 438 | "4 6000 0.0 141.0 360.0 \n", 439 | "\n", 440 | " Credit_History Property_Area Loan_Status \n", 441 | "0 1.0 Urban Y \n", 442 | "1 1.0 Rural N \n", 443 | "2 1.0 Urban Y \n", 444 | "3 1.0 Urban Y \n", 445 | "4 1.0 Urban Y " 446 | ] 447 | }, 448 | "execution_count": 4, 449 | "metadata": {}, 450 | "output_type": "execute_result" 451 | } 452 | ], 453 | "source": [ 454 | "data.head()" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [] 463 | } 464 | ], 465 | "metadata": { 466 | "kernelspec": { 467 | "display_name": "Python 3", 468 | "language": "python", 469 | "name": "python3" 470 | }, 471 | "language_info": { 472 | "codemirror_mode": { 473 | "name": "ipython", 474 | "version": 3 475 | }, 476 | "file_extension": ".py", 477 | "mimetype": "text/x-python", 478 | "name": "python", 479 | "nbconvert_exporter": "python", 480 | "pygments_lexer": "ipython3", 481 | "version": "3.7.1" 482 | } 483 | }, 484 | "nbformat": 4, 485 | "nbformat_minor": 2 486 | } 487 | -------------------------------------------------------------------------------- /INTERNSHALA/2 Understanding the Statistics for Data science/1 Mode.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Measuring the central tendency" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 6, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/html": [ 27 | "
\n", 28 | "\n", 41 | "\n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | "
StudentOverall MarksGenderSubject
0Anish97MaleEnglish
1Rishabh83MaleEnglish
2Ajay78MalePhysics
3Abhinav86MaleChemistry
4Anurag91MaleEnglish
5Ajeet15MalePhysics
6Varun83MaleChemistry
7Rajeev64MaleChemistry
8sanjay66MaleChemistry
9NISHA58FemalePhysics
10SURESH62MalePhysics
11Aniket62MaleEnglish
12manu20MalePhysics
13ALOK85MaleEnglish
14Pranav49MaleChemistry
15Deep92MaleEnglish
16Prashant21MalePhysics
17Kunal15MalePhysics
18Soham46MaleChemistry
19mayur100MaleChemistry
20shaan18MaleChemistry
21shivangi70FemalePhysics
22Sam82FemaleEnglish
23Dhruv88MaleEnglish
24tushar88MaleEnglish
25Neelam100FemaleChemistry
26yash86MalePhysics
27Nitin60MaleEnglish
28Arun88MaleChemistry
29Sumit19MalePhysics
...............
70Abhishek91MaleChemistry
71simran0FemalePhysics
72Angel99FemaleEnglish
73juvina78FemaleEnglish
74ajith63MaleEnglish
75Tanya98FemaleChemistry
76Vaibhav98MalePhysics
77Sneha98FemaleEnglish
78prince64MaleChemistry
79jay81MalePhysics
80Avinash76MaleEnglish
81sanchit80MaleChemistry
82Rohan97MalePhysics
83dinesh18MalePhysics
84Lily90FemaleChemistry
85diksha71FemaleChemistry
86vishal67MaleEnglish
87Rutuja73FemaleChemistry
88akash93MalePhysics
89mahesh78MalePhysics
90vedant74MaleEnglish
91Ram99MalePhysics
92Dawn73FemalePhysics
93ankur17MaleChemistry
94Smriti0FemaleChemistry
95Aaditya77MaleChemistry
96vivek55MalePhysics
97Tisha7FemaleEnglish
98Raghav73MaleEnglish
99Ishita82FemaleChemistry
\n", 481 | "

100 rows × 4 columns

\n", 482 | "
" 483 | ], 484 | "text/plain": [ 485 | " Student Overall Marks Gender Subject\n", 486 | "0 Anish 97 Male English\n", 487 | "1 Rishabh 83 Male English\n", 488 | "2 Ajay 78 Male Physics\n", 489 | "3 Abhinav 86 Male Chemistry\n", 490 | "4 Anurag 91 Male English\n", 491 | "5 Ajeet 15 Male Physics\n", 492 | "6 Varun 83 Male Chemistry\n", 493 | "7 Rajeev 64 Male Chemistry\n", 494 | "8 sanjay 66 Male Chemistry\n", 495 | "9 NISHA 58 Female Physics\n", 496 | "10 SURESH 62 Male Physics\n", 497 | "11 Aniket 62 Male English\n", 498 | "12 manu 20 Male Physics\n", 499 | "13 ALOK 85 Male English\n", 500 | "14 Pranav 49 Male Chemistry\n", 501 | "15 Deep 92 Male English\n", 502 | "16 Prashant 21 Male Physics\n", 503 | "17 Kunal 15 Male Physics\n", 504 | "18 Soham 46 Male Chemistry\n", 505 | "19 mayur 100 Male Chemistry\n", 506 | "20 shaan 18 Male Chemistry\n", 507 | "21 shivangi 70 Female Physics\n", 508 | "22 Sam 82 Female English\n", 509 | "23 Dhruv 88 Male English\n", 510 | "24 tushar 88 Male English\n", 511 | "25 Neelam 100 Female Chemistry\n", 512 | "26 yash 86 Male Physics\n", 513 | "27 Nitin 60 Male English\n", 514 | "28 Arun 88 Male Chemistry\n", 515 | "29 Sumit 19 Male Physics\n", 516 | ".. ... ... ... ...\n", 517 | "70 Abhishek 91 Male Chemistry\n", 518 | "71 simran 0 Female Physics\n", 519 | "72 Angel 99 Female English\n", 520 | "73 juvina 78 Female English\n", 521 | "74 ajith 63 Male English\n", 522 | "75 Tanya 98 Female Chemistry\n", 523 | "76 Vaibhav 98 Male Physics\n", 524 | "77 Sneha 98 Female English\n", 525 | "78 prince 64 Male Chemistry\n", 526 | "79 jay 81 Male Physics\n", 527 | "80 Avinash 76 Male English\n", 528 | "81 sanchit 80 Male Chemistry\n", 529 | "82 Rohan 97 Male Physics\n", 530 | "83 dinesh 18 Male Physics\n", 531 | "84 Lily 90 Female Chemistry\n", 532 | "85 diksha 71 Female Chemistry\n", 533 | "86 vishal 67 Male English\n", 534 | "87 Rutuja 73 Female Chemistry\n", 535 | "88 akash 93 Male Physics\n", 536 | "89 mahesh 78 Male Physics\n", 537 | "90 vedant 74 Male English\n", 538 | "91 Ram 99 Male Physics\n", 539 | "92 Dawn 73 Female Physics\n", 540 | "93 ankur 17 Male Chemistry\n", 541 | "94 Smriti 0 Female Chemistry\n", 542 | "95 Aaditya 77 Male Chemistry\n", 543 | "96 vivek 55 Male Physics\n", 544 | "97 Tisha 7 Female English\n", 545 | "98 Raghav 73 Male English\n", 546 | "99 Ishita 82 Female Chemistry\n", 547 | "\n", 548 | "[100 rows x 4 columns]" 549 | ] 550 | }, 551 | "execution_count": 6, 552 | "metadata": {}, 553 | "output_type": "execute_result" 554 | } 555 | ], 556 | "source": [ 557 | "a= pd.read_csv(\"mode.csv\")\n", 558 | "a" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": 8, 564 | "metadata": {}, 565 | "outputs": [ 566 | { 567 | "data": { 568 | "text/plain": [ 569 | "0 Male\n", 570 | "dtype: object" 571 | ] 572 | }, 573 | "execution_count": 8, 574 | "metadata": {}, 575 | "output_type": "execute_result" 576 | } 577 | ], 578 | "source": [ 579 | "a['Gender'].mode()" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": null, 585 | "metadata": {}, 586 | "outputs": [], 587 | "source": [] 588 | } 589 | ], 590 | "metadata": { 591 | "kernelspec": { 592 | "display_name": "Python 3", 593 | "language": "python", 594 | "name": "python3" 595 | }, 596 | "language_info": { 597 | "codemirror_mode": { 598 | "name": "ipython", 599 | "version": 3 600 | }, 601 | "file_extension": ".py", 602 | "mimetype": "text/x-python", 603 | "name": "python", 604 | "nbconvert_exporter": "python", 605 | "pygments_lexer": "ipython3", 606 | "version": "3.7.1" 607 | } 608 | }, 609 | "nbformat": 4, 610 | "nbformat_minor": 2 611 | } 612 | --------------------------------------------------------------------------------