├── OSmoduleTOlistDirectory.py ├── arithmeticOperations.py ├── array_numpy.py ├── array_operations.py ├── bikeRentelSystem.py ├── biodataUsingClass.py ├── calculatorUsingClass.py ├── classInstances.py ├── colored_module.py ├── conditional.py ├── constructor.py ├── copy_file.py ├── dataPreprocessing.py ├── dataProcessing.py ├── data_types.py ├── deepLearningAndModel.py ├── dictionary.py ├── dictionaryFunctions.py ├── escapeSequences.py ├── factorial.py ├── fileOperations&dataCleaning.py ├── findFUNC.py ├── functions.py ├── hollowSquare.py ├── json.py ├── letter.py ├── listComprehension.py ├── listFunctions.py ├── listIteration.py ├── listSlicing.py ├── newfile.py ├── newfile1.py ├── newfile2.py ├── newfile3.py ├── numpy.py ├── pandas.py ├── primeornot.py ├── printingPoem.py ├── project1.py ├── project2.py ├── project3onCNN.py ├── project4onNLP.py ├── project5onRecommendation.py ├── project6onImageClassification.py ├── project7onNLPandChatbot.py ├── pyjokes.py ├── pyramid.py ├── queueUsingList.py ├── randomModule.py ├── replace.py ├── rockPaperScissor.py ├── sets.py ├── slicing_concat.py ├── sorting.py ├── stackUsingList.py ├── startswith.py ├── staticMethodINclass.py ├── statistics.py ├── stringFormatting.py ├── stringManipulations.py ├── stringToList.py ├── table.py ├── textTOspeech.py ├── tuple.py ├── tuples.py ├── usingListFun.py ├── wishing.py ├── youtubeTranscriptSummarizer.py └── zipFunction.py /OSmoduleTOlistDirectory.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # Select the directory whose content you want to list 4 | directory_path = '/' 5 | 6 | # Use the os module to list the directory content 7 | contents = os.listdir(directory_path) 8 | 9 | # Print the contents of the directory 10 | print(contents) 11 | -------------------------------------------------------------------------------- /arithmeticOperations.py: -------------------------------------------------------------------------------- 1 | #Write a program to perform different Arithmetic operations on numbetrs in python 2 | n1=int(input("Enter first number:")) 3 | n2=int(input("Enter second number:")) 4 | sum=n1+n2 5 | sub=n1-n2 6 | mul=n1*n2 7 | div=n1/n2 8 | mod=n1%n2 9 | fdiv=n1//n2 10 | exp=n1**n2 11 | print("Addition is:",sum) 12 | print("Substraction is:",sub) 13 | print("Multiplication is:",mul) 14 | print("Division is:",div) 15 | print("Modulo division is:",mod) 16 | print("Floor division is:",fdiv) 17 | print("Exponent is:",exp) -------------------------------------------------------------------------------- /array_numpy.py: -------------------------------------------------------------------------------- 1 | #Write a program to demonstrate arrays in numpy 2 | import numpy as np 3 | a = np.array(42) 4 | b = np.array([1, 2, 3, 4, 5]) 5 | c = np.array([[1, 2, 3], [4, 5, 6]]) 6 | d = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]]) 7 | print("Entered array is:",a,"\nand its dimension is:",a.ndim) 8 | print("\nEntered array is:",b,"\nand its dimension is:",b.ndim) 9 | print("\nEntered array is:",c,"\nand its dimension is:",c.ndim) 10 | print("\nEntered array is:",d,"\nand its dimension is:",d.ndim) 11 | -------------------------------------------------------------------------------- /array_operations.py: -------------------------------------------------------------------------------- 1 | '''Write a program to demonstrate array indexing such as slicing, integer array indexing 2 | and Boolean array indexing along with their basic operations in NumPy.''' 3 | import numpy as np 4 | a=np.arange(10,1,-2) 5 | print("Sequential array with nagative step value:",a) 6 | newarr=[a[3],a[1],a[2]] 7 | print("Elements at these indices are:",newarr) 8 | a=np.arange(20) 9 | print("Array is:",a) 10 | print("a[-8:17:1]=",a[-8:17:1]) 11 | print("a[10:]=",a[10:]) -------------------------------------------------------------------------------- /bikeRentelSystem.py: -------------------------------------------------------------------------------- 1 | class bikeShop: 2 | def _init_(self,stock): 3 | self.stock=stock 4 | def displayBike(self): 5 | print("Total Bikes",self.stock) 6 | def rentForBike(self,q): 7 | if q<=0: 8 | print("Enter the positive value or greater than zero") 9 | elif q>self.stock: 10 | print("Enter the value(less than stock)") 11 | else: 12 | self.stock=self.stock-q 13 | print("Total Prices",q*100) 14 | print("Total Bikes",self.stock) 15 | 16 | while True: 17 | obj=bikeShop(100) 18 | uc=int(input(''' 19 | 1 Display Stocks 20 | 2 Rent A Bike 21 | 3 Exit 22 | ''')) 23 | if uc==1: 24 | obj.displayBike() 25 | elif uc==2: 26 | n=int(input("Enter The Quantity:")) 27 | obj.rentForBike(n) 28 | else: 29 | break 30 | -------------------------------------------------------------------------------- /biodataUsingClass.py: -------------------------------------------------------------------------------- 1 | class Programmer: 2 | company = "Microsoft" 3 | def __init__(self, name, salary, pin): 4 | self.name = name 5 | self.salary = salary 6 | self.pin = pin 7 | 8 | 9 | p = Programmer("Harry", 1200000, 245001) 10 | print(p.name, p.salary, p.pin, p.company) 11 | r = Programmer("Rohan", 1200000, 245001) 12 | print(r.name, r.salary, r.pin, r.company) 13 | -------------------------------------------------------------------------------- /calculatorUsingClass.py: -------------------------------------------------------------------------------- 1 | class Calculator: 2 | def __init__(self, n): 3 | self.n = n 4 | 5 | def square(self): 6 | print(f"The square is {self.n*self.n}") 7 | 8 | def cube(self): 9 | print(f"The cube is {self.n*self.n*self.n}") 10 | 11 | def squareroot(self): 12 | print(f"The squareroot is {self.n**1/2}") 13 | 14 | a = Calculator(4) 15 | a.square() 16 | a.cube() 17 | a.squareroot() 18 | -------------------------------------------------------------------------------- /classInstances.py: -------------------------------------------------------------------------------- 1 | class Demo: 2 | a = 4 3 | 4 | o = Demo() 5 | print(o.a) # Prints the class attribute because instance attribute is not present 6 | o.a = 0 # Instance attribute is set 7 | print(o.a) # Prints the instance attribute because instance attribute is present 8 | print(Demo.a) # Prints the class attribute 9 | -------------------------------------------------------------------------------- /colored_module.py: -------------------------------------------------------------------------------- 1 | from termcolor import colored 2 | 3 | print(colored("H","red"),colored("e","yellow"),colored("l","green"),colored("l","cyan"),colored("o","blue"),colored(",","magenta"),colored("w","red"),colored("o","yellow"),colored("r","green"),colored("l","cyan"),colored("d","blue")) -------------------------------------------------------------------------------- /conditional.py: -------------------------------------------------------------------------------- 1 | def greet (name): 2 | return"Hello"+name 3 | print(greet("Alice")) -------------------------------------------------------------------------------- /constructor.py: -------------------------------------------------------------------------------- 1 | class Employee: 2 | language = "Python" # This is a class attribute 3 | salary = 1200000 4 | 5 | def __init__(self, name, salary, language): # dunder method which is automatically called 6 | self.name = name 7 | self.salary = salary 8 | self.language = language 9 | print("I am creating an object") 10 | 11 | 12 | def getInfo(self): 13 | print(f"The language is {self.language}. The salary is {self.salary}") 14 | 15 | @staticmethod 16 | def greet(): 17 | print("Good morning") 18 | 19 | 20 | harsh = Employee("Harsh", 1300000, "JavaScript") 21 | # harsh.name = "Harsh" 22 | print(harsh.name, harsh.salary, harsh.language) 23 | 24 | rohan = Employee() 25 | -------------------------------------------------------------------------------- /copy_file.py: -------------------------------------------------------------------------------- 1 | infile=input("Enter 1st file name:") 2 | outfile=input("Enter 2nd file name:") 3 | f1=open("firstfile.txt",'r') 4 | f2=open("secondfile.txt",'w+') 5 | content=f1.read() 6 | f2.write(content) -------------------------------------------------------------------------------- /dataPreprocessing.py: -------------------------------------------------------------------------------- 1 | #Missing Value Treatment, Data Discretization, Feature Selection using Variance & Correlation 2 | 3 | #Data Preprocessing 4 | '''Data Preprocessing involves cleaning and engineering data in a way that it can be used as input to several important data science tasks such as data visualization, machine learning, deep learning, and data analytics. 5 | Some of the most common data preparation tasks include feature scaling, handling missing values, categorial variable encoding, data discretization.''' 6 | 7 | #Feature Scaling 8 | '''A dataset can have different attributes. The attributes can have different magnitudes, variances, standard deviation, mean value etc. 9 | For instance, salary can be in thousands, whereas age is normallly a two-digit number. 10 | The difference in the scale or magnitude of attributes can actually affect statistical models. 11 | For instance, variables wirh bigger ranges dominate those with smaller ranges for linear models.''' 12 | 13 | #Standardization 14 | '''Standardization is the process of centering a variable at zero and standardizing the data variance to 1. 15 | To standardize a dataset, you simply have to subtract each data point from the mean 16 | of all the data points and divide the d 17 | result by the standard deviation of the data. 18 | Feature Scaling is applied on numeric data only.''' 19 | 20 | import pandas as pd 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | import seaborn as sns 24 | titanic_data=sns.load_dataset("titanic") 25 | titanic_data=titanic_data["age","fare","prices"]] 26 | titanic_data.head() 27 | titanic_data.describe() 28 | from sklearn.preprocessing import StandardScaler 29 | scaler=StandardScaler() 30 | scaler.fit(titanic_data) 31 | titanic_data_scaled=scaler.transform(titanic_data) 32 | titanic_data_scaled=pd.DataFrame(titanic_data_scaled,columns=titanic_data.columns) 33 | titanic_data_scaled.head() 34 | sns.kdeplot(titanic_data_scaled["age"]) 35 | 36 | #Min/Max Scaling 37 | '''In min/max scaling, you subtract each value by the minimum value and divide the result by the difference between minimum and maximum value in the dataset.''' 38 | 39 | from sklearn.preprocessing import MinMaxScaler 40 | scaler=MinMaxScaler() 41 | scaler.fit(titanic_data) 42 | titanic_data_scaled=scaler.transform(titanic_data) 43 | titanic_data_scaled=pd.DataFrame(titanic_data_scaled,columns=titanic_data.columns) 44 | titanic_data_scaled.head() 45 | sns.kdeplot(titanic_data_scaled["age"]) 46 | 47 | #Handling Missing Data 48 | '''Missing values are those observations in the dataset that do not contain any value. 49 | Missing values can totally change data patterns and therefore it is extremely important to understand why missing values occur in the dataset and how to handle them.''' 50 | 51 | #Handling Missing Numerical Data 52 | '''To handle missing numerical data, we can usee statistical techniques. The use of statistical techniques or algorithms to replace missing values with statistically generated values is called imputation''' 53 | 54 | titanic_data=sns.load_dataset("titanic") 55 | titanic_data.head() 56 | titanic_data=titanic_data[["survived","pclass","age","fare"]] 57 | titanic_data.head() 58 | titanic_data.isnull().mean() 59 | titanic_data.isnull().sum() 60 | median=titanic_data.age.median() 61 | print(median) 62 | mean=titanic_data.age.mean() 63 | print(mean) 64 | titanic_data["Median_Age"]=titanic_data.age.fillna(median) 65 | titanic_data["Mean_Age"]=titanic_data.age.fillna(mean) 66 | titanic_data["Mean_Age"]=np.round(titanic_data["Mean_Age"],1) 67 | titanic_data.head(20) 68 | 69 | #Frequent Category Imputation 70 | '''One of the most common ways of handling missing values in a categorial column is to replace the missing values with the most frequent occuring values i.e., the mode of the column.''' 71 | 72 | import matplotlib.pyplot as plt 73 | import seaborn as sns 74 | 75 | titanic_data=sns.load_dataset("titanic") 76 | titanic_data=titanic_data[["embark_town","age","fare"]] 77 | titanic_data.head() 78 | titanic_data.isnull().mean() 79 | titanic_data.embark_town.value_counts().sort_values(ascending=False).plot.bar() 80 | plt.xlabel("Embark Town") 81 | plt.ylabel("Number of Passengers") 82 | titanic_data.embark_town.mode() 83 | titanic_data.embark_town.fillna("Southampton",inplace=True) 84 | 85 | #Categorial Data Encoding 86 | '''Models based on statistical algorithms such as machine learning and deep learning work with numbers. 87 | A dataset can contain numerical, categorical, datetime, and mixed variables. 88 | A mechanism is needed to convert categorical data to its numeric counterpart so that the data can be used to build statistical models. 89 | The techniques used to convert numeric data into categorical data are called categorical data encoding schemes.''' 90 | 91 | #One Hot Encoding 92 | '''One Hot Encoding is one of the most commonly used categorical encoding schemes. 93 | In one hot encoding for each unique value in the categorical column a new column is added. 94 | Integer 1 is added to the column that corresponds to the original label and all the remaining column are filled with zeros.''' 95 | 96 | titanic_data=sns.load_dataset("titanic") 97 | titanic_data.head() 98 | titanic_data=titanic_data[["sex","class","embark_town"]] 99 | titanic_data.head() 100 | 101 | import pandas as pd 102 | temp=pd.get_dummies(titanic_data["sex"]) 103 | temp.head() 104 | pd.concat([titanic_data["sex"],pd.get_dummies(titanic_data["sex"])],axis=1).head() 105 | temp=pd.get_dummies(titanic_data["embark_town"]) 106 | temp.head() 107 | 108 | #Label Encoding 109 | '''In label encoding, labels are replaced by integers. 110 | That is why label encoding is also called as Integer Encoding.''' 111 | 112 | from sklearn.preprocessing import LabelEncoder 113 | le=LabelEncoder() 114 | le.fit(titanic_data["class"]) 115 | titanic_data["le_class"]=le.transform(transform(titanic_data["class"]) 116 | titanic_data.head() 117 | 118 | #Data Discretization 119 | '''The process of converting continuous numeric values such as price, age, and weight into discrete intervals is called discretization or binning.''' 120 | 121 | #Equal Width Discretization 122 | '''The most common type of discretization approach is fixed width discretization.''' 123 | 124 | import matplotlib.pyplot as plt 125 | import seaborn as sns 126 | import pandas as pd 127 | import numpy as np 128 | 129 | import warnings 130 | warnings.filterwarnings("ignore") 131 | diamond_data=sns.load_dataset("diamonds") 132 | diamonds_data.head() 133 | sns.distplot(diamond_data["price"]) 134 | 135 | '''The histogram for price column shows that the data is positively skewed.''' 136 | 137 | price_range=diamond_data["price"].max()-diamond_data["price"].min() 138 | print(price_range) 139 | price_range/10 140 | lower_interval=int(np.floor(diamond_data["price"].min())) 141 | upper_interval=int(np.ceil(diamond_data["price"].max())) 142 | interval_length=int(np.round(price_range/10)) 143 | print(lower_interval) 144 | print(upper_interval) 145 | print(interval_length) 146 | total_bins=[i for i in range(lower_interval,upper_interval+interval_length,interval_length)] 147 | print(total_bins) 148 | bin_labels=['Bin_no_'+str(i) for i in range(l,len(total_bins))] 149 | print(bin_labels) 150 | diamond_data['price_bins']=pd.cut(x=diamond_data['price'],bins=total_bins,labels=bin_labels,include_lowest=True) 151 | diamond_data.head() 152 | diamond_data.groupby('price_bins')['price'].count().plot.bar() 153 | plt.xticks(rotation=45) 154 | '''The output shows that the price of most of the diamonds lies in the first bin or the first interval.''' 155 | 156 | #Handling Outliers 157 | '''Outliers are the values that are too far from the rest of the observation.''' 158 | 159 | #Outlier Trimming 160 | '''As the name suggests it refers to simply removing the outliers beyond a certain threshold value. 161 | One of the main advantage of outlier trimming is that it is extremely quick and doesn't distort the data.''' 162 | 163 | titanic_data=sns.load_dataset("titanic") 164 | titanic_data.head() 165 | sns.boxplot(y="age",data=titanic_data) 166 | 167 | '''One of the most common ways to remove the outliers is to find the Inter Quartile Range(IQR), multiply it by 1.5 and then subtract it from the first quartile value(0.25 quantile). 168 | To find the upper limit, add the product of IQR and 1.5 to the 3rd quartile value(0.75 quantile)''' 169 | 170 | IQR=titanic_data["age"].quantile(0.75)-titanic_data["age"].quantile(0.25) 171 | lower_age_limit=titanic_data["age"].quantile(0.25)-(IQR*1.5) upper_age_limit=titanic_data["age"].quantile(0.75)+(IQR*1.5) 172 | print(lower_age_limit) 173 | print(upper_age_limit) 174 | age_outliers=np.where(titanic_data["age"]>upper_age_limit,True,np.where(titanic_data["age"]0.6: 224 | corr_col=correlation_matrix.columns[i] 225 | correlated_features_matrix.add(corr_col) 226 | len(correlated_features_matrix) 227 | print(correlated_features_matrix) 228 | filtered_dataset=features.drop(correlated_features_matrix,axis=1) 229 | filtered_dataset.head() 230 | -------------------------------------------------------------------------------- /dataProcessing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | tips=pd.read_csv("examples/tips.csv") 3 | tips.head() 4 | 5 | import seaborn as sns 6 | tips["tip_pct"]=tips["tip"]/(tips["total_bill"]-tips["tip"]) 7 | tips.head() 8 | 9 | #Histograms & Density Plots 10 | '''A Histogram is a kind of bar plot that gives a discretized display of continuous data.''' 11 | tips["tip_pct"].plot.hist(bins=50) 12 | tips["tip_pct"].plot.density() 13 | 14 | #Scatter or Point Plots 15 | import numpy as np 16 | macro=pd.read_csv("examples/macrodata.csv") 17 | data=macro[["cpi","mi","tbilrate","unemp"]] 18 | trans_data=np.log(data).diff().dropna() 19 | trans_data.tail() 20 | ax=sns.regplot(x="m1",y="unemp",data=trans_data) 21 | #ax.title("Changes in log(m1) versus log(unemp)") 22 | 23 | #Pairplot 24 | sns.pairplot(trans_data,diag_kind="kde",plot_kws={"alpha":0.5}) 25 | sns.catplot(x="day",y="tip_pct",hue="time",col="smoker",kind="bar",data=tips[tips.tip_pct<1]) 26 | sns.catplot(x="day",y="tip_pct",row="time",col="smoker",kind="bar",data=tips[tips.tip_pct<1]) 27 | 28 | #Box Plot explains 5 number theory 29 | sns.catplot(x="tip_pct",y="day",kind="box",data=tips[tips.tip_pct<0.5]) 30 | 31 | #Data Aggregation and Group Operations 32 | df=pd.DataFrame({"key1":["a","a",None,"b","b","a",None],"key2":pd.Series([1,2,1,2,1,None,1],dtype="Int64"),"data1":np.random.standard_normal(7),"data2":np.random.standard_normal(7)}) 33 | print(df) 34 | grouped=df["data1"].groupby(df["key1"]) 35 | print(grouped) 36 | grouped.mean() 37 | means=df["data1"].groupby([df["key1"],df["key2"]]).mean() 38 | print(means) 39 | sums=df["data1"].groupby([df["key1"],df["key2"]]).sum() 40 | sum.unstack() 41 | states=np.array(["OH","CA","CA","OH","CA","OH"]) 42 | years=[2005,2005,2006,2005,2006,2005,2006] 43 | df["data1"].groupby([states,years]).mean() 44 | df.groupby("key1").mean() 45 | df.groupby("key2").count() 46 | df.groupby(["key1","key2"]).mean() 47 | df.groupby("key1",dropna=False).size() 48 | df.groupby(["key1","key2"],dropna=False).size() 49 | df.groupby("key1").count() 50 | for name,group in df.groupby("key1"): 51 | print(name) 52 | print(group) 53 | df.groupby("key1")["data1"].sum() 54 | df.groupby(["key1","key2"])["data2"].mean() 55 | tips.head() 56 | tips.groupby(["day","smoker"])["tip_pct"].mean() 57 | tips.groupby(["day","smoker"])["tip_pct"].agg("mean") 58 | tips.groupby(["day","smoker"])["tip_pct"].agg(["mean","std","count"]) 59 | tips.groupby(["day","smoker"])["tip_pct"].agg(["average","mean"),("stddev",np.std)]) 60 | functions=["count","mean","max"] 61 | result=tips.groupby(["day","smoker"])[["tip_pct","total_bill"]].agg(functions) 62 | print(functions) 63 | -------------------------------------------------------------------------------- /data_types.py: -------------------------------------------------------------------------------- 1 | #Write a program to demonstrate different numeric data types 2 | a=5 3 | b=5.7 4 | c=2+7j 5 | d="python" 6 | print("Value of a:",a,"\t\t\tDatatype is:",type(a)) 7 | print("Value of b:",b,"\t\tDatatype is:",type(b)) 8 | print("Value of c:",c,"\t\tDatatype is:",type(c)) 9 | print("Value of d:",d,"\t\tDatatype is:",type(d)) 10 | -------------------------------------------------------------------------------- /deepLearningAndModel.py: -------------------------------------------------------------------------------- 1 | +#Introduction to Deep Learning and Model on Iris Dataset 2 | #Deep Learning Framework 3 | '''Deep Learning is a field within Machine Learning that deals with building amd using Neural Network Models. 4 | Neural Networks mimic the functioning of a human brain. 5 | Neural Networks with more than three layers are typically categorised as Deep Learning Networks.''' 6 | #Perceptron 7 | '''The Perceptron is the unit of learning in an artificial neural networks.A Perceptron resembles a human brain cell. 8 | A Perceptron is a single cell or node in a neural network. 9 | In Deep Learning, we replace slope of model with weights called as w and intercept with the bias called as b. 10 | Weights and Biases become the parameters for a neural network. 11 | The number of weights equals the number of inputs/features.''' 12 | 13 | #Artificial Neural Network 14 | '''An ANN is a network of perceptrons. A deep neural network usually has three or more layers. 15 | Each node has its own weights, biases and activation function.Each node is connected to all the nodes in the next layer forming a dense network. 16 | Training an ANN means determining the right values for these parameters and hyperparameters such that it maximizes the accuracy of predictions for the given use case.''' 17 | 18 | #Neural Network Architecture 19 | #Input Layer 20 | '''The input to Deep Learning model is usually a vector of Numeric values. 21 | Vectors are usually defined using NumPy arrays. It represents the feature variables or independent variables that are used for prediction as well as training.''' 22 | #Hidden Layer 23 | '''An ANN can have one or more hidden layers. The more the layers are the deep the network is. 24 | Each hidden layer can have one or more nodes. Typically, the node count is configured in range of 2^n. Ecamples are 8,16,32,64,128 etc. 25 | A neural network is defined by the number of layers and nodes. 26 | The output of each node in previous layer will become the input for every node in the current layers. 27 | When there are more nodes and layers it usually results in better accuracy. As a general practice, start with small number and keep adding until an acceptable accuracy levels are obtained.''' 28 | #Weights and Biases 29 | '''They form the basis for Deep Learning Algorithms. Weights and Biases are trainable parameters in a neural network model. 30 | Each input for each node will have an associated weight with it.''' 31 | #Activation Functions 32 | '''An activation function plays an important role in creating the output of the node in the neural network. 33 | An activation function takes the matrix output of the node and determines if and how the node will propagate information to the next layer. 34 | The main objective of activation function is that it converts the output to a non-linear value. They serve as a critical step in helping a neural network learn specific patterns in the data. 35 | TanH:- A TanH function normalizes the output in the range of (-1 to +1) 36 | ReLu:- Rectified Linear Unit- A ReLu produces a zero if the output is negative. Else, it will produce the same input verbatim. 37 | Softmax Function:- This is used in the case of classification problems. It produces a vector of probabilities for each of the possible classes in the outcomes. The class with the highest probability will be considered as the final class. 38 | These all activation functions are added as hyperparameters in the model.''' 39 | 40 | #Output Layer 41 | '''The output layer is the final layer in the neural network where desired predictions are obtained. ''' 42 | #Training a Neural Network Model 43 | '''Set up and initialisation:- If error is high then it adjusts weights and biases by the process of gradient descent to improve accuracy. 44 | Forward Propagation:- Movement from Input to hidden layer and then output layer.''' 45 | #Measure Accuracy and Error 46 | '''Back Propagation:- If error is high then it adjusts weights amd biases by the process of gradient descent to improve accuracy. 47 | Gradient Descent is the process of repeating the forward and backward propagation in order to reduce error and move closer to the desired model. 48 | Batches and Epochs:- 10000/10(1000) 49 | Validation and Testing''' 50 | 51 | #Deep Learning Example- Iris Dataset 52 | import pandas as pd 53 | import numpy as np 54 | import matplotlib.pyplot as plt 55 | import os 56 | import tensorflow as tf 57 | from sklearn.model_selection import train_test_split 58 | from sklearn.preprocessing import StandardScaler 59 | import warnings 60 | warnings.filterwarnings("ignore") 61 | 62 | '''Prepare input data for deep learning 63 | Load data into pandas dataframe 64 | Convert the dataframe into numpy array 65 | Scale the feature dataset 66 | Use of one hot encoding for the target variable 67 | Split the dataset into training and test datasets 68 | Load Data and Review content''' 69 | 70 | iris_data=pd.read_csv("iris.csv") 71 | print(iris_data.head()) 72 | '''Use label encoder to convert String to Numeric values for the target variable''' 73 | from sklearn.preprocessing import LabelEncoder 74 | label_encoder=LabelEncoder() 75 | iris_data['Species']=label_encoder.fit_transform(iris_data['Species']) 76 | print(iris_data.head()) 77 | #Converting input to numpy array 78 | np_iris=iris_data.to_numpy() 79 | print(np_iris.shape) 80 | #Separate features and target variables 81 | X_data=np_iris[:,0:4] 82 | Y_data=np_iris[:,4] 83 | print("\n Features before Scaling: \n---------") 84 | print(X_data[:5,:]) 85 | print("\ntarget before one-hot ending: \n---------") 86 | print(Y_data[:5]) 87 | #Create a standard scaler object that if fit on the input data 88 | scaler=StandardScaler().fit(X_data) 89 | #scale tge numeric feature variable 90 | X_data=scaler.transform(X_data) 91 | #convert target variable as a one-hot encoded array 92 | Y_data=tf.keras.utils.to_categorical(Y_data,3) 93 | print("\n Features after Scaling: \n----------") 94 | print(X_data[:5,:]) 95 | print("\ntarget after one-hot encoding: \n----------") 96 | print(Y_data[:5]) 97 | #Splitting the data into training and test sets 98 | X_train,X_test,Y_train,Y_test=train_test_split(X_data,Y_data,test_size=0.10) 99 | print("\n Train test Dimensions: \n----------") 100 | print(X_train.shape,X_test.shape,Y_train.shape,Y_test.shape) 101 | '''Create a Model 102 | Number of hidden layers 103 | Number of nodes in each layer 104 | Activation functions 105 | Loss function and accuracy measurements. ''' 106 | from tensorflow import keras 107 | #Number of classes in the target variable 108 | NB_CLASSES=3 109 | #Create a sequential model in keras 110 | model=tf.keras.models.Sequential() 111 | #add the first hidden layer 112 | model.add(keras.layers.Dense(128,#Number of nodes 113 | input_shape=(4,),#number of input variables 114 | name="Hidden-Layer-1",#Logical name 115 | activation="relu"))#activation function 116 | #add a second hidden layer 117 | model.add(keras.layers.Dense(128,name="Hidden-Layer-2",activation="relu")) 118 | #add an output layer with softmax function 119 | model.add(keras.layers.Dense(NB_CLASSES,name="Output-Layer",activation="softmax")) 120 | #compile the model with loss and metrics 121 | model.compile(loss="categorical_crossentropy",metrics=["accuracy"]) 122 | #print the model summary 123 | model.summary() 124 | #Make it verbose so we can see the process 125 | VERBOSE=1 126 | #Set hyperparameters for training 127 | #Set batch size 128 | BATCH_SIZE=16 129 | #Set the number of epochs 130 | EPOCHS=20 131 | #Set the validation split. 20% of the training dataset will be used for validation 132 | VALIDATION_SPLIT=0.2 133 | print("\nTraining Progress: \n------------") 134 | '''Fitting the model. This will perform the entire training cycle, included forward propagation, loss computation, backward propagation and gradient descent.''' 135 | history=model.fit(X_train,Y_train,batch_size=BATCH_SIZE,epochs=EPOCHS,verbose=VERBOSE,validation_split=VALIDATION_SPLIT) 136 | print("Accuracy During Training: \n------------") 137 | import matplotlib.pyplot as plt 138 | #Plot the accuracy of the model after each epoch 139 | pd.DataFrame(history.history)["accuracy"].plot(figsize=(8,5)) 140 | plt.title("Accuracy improvement after each epoch") 141 | plt.show() 142 | #Evaluate the model against the test dataset and print the result 143 | print("\nEvaluate against test dataset: \n------------") 144 | model.evaluate(X_test,Y_test) 145 | #Saving a model 146 | model.save("iris_save") 147 | #Load the model 148 | loaded_model=keras.models.load_model("iris_save") 149 | #print the model summary 150 | loaded_model.summary() 151 | #Predictions with Deep Learning Model 152 | #raw prediction data 153 | prediction_input=[[2.6,12.,2.4,4.4]] 154 | #scale the prediction data with the same scaling object 155 | scaled_input=scaler.transform(prediction_input) 156 | #get the raw prediction probabilities 157 | raw_prediction=loaded_model.predict(scaled_input) 158 | print("Raw Prediction Output (Probabilities):",raw_prediction) 159 | #Find Prediction 160 | prediction=np.argmax(raw_prediction) 161 | print("Prediction is",label_encoder.inverse_transform([prediction])) -------------------------------------------------------------------------------- /dictionary.py: -------------------------------------------------------------------------------- 1 | #Write a program to demonstrate working with dictionaries in python 2 | #empty dictionary 3 | my_dict={} 4 | print("Empty dictionary is:",my_dict) 5 | #dictonary with integer keys 6 | my_dict={1:'apple',2:'ball'} 7 | print("Dictionary with integer keys:",my_dict) 8 | #dictionary with mixed keys 9 | my_dict={'name':'rishi',1:[2,4,3]} 10 | print("Dictionary with mixed keys",my_dict) 11 | #using dict.fromkeys() 12 | my_dict=dict.fromkeys("abcd",'alphabet') 13 | print("Dictionary created by using dict.fromkeys method=",my_dict) 14 | #using get method 15 | my_dict={'name':'jack','age':25} 16 | print(my_dict['name']) #output jack 17 | #changing and adding dictionary elements 18 | my_dict['age']=18 #update value 19 | my_dict['class']="B.Tech" #updating value 20 | print("After changing and adding the values,the new dictionary=",my_dict) 21 | #using items() 22 | print("Items in the dictionary is:",my_dict.items()) 23 | #using keys() 24 | print("Keys in the dictionary is:",my_dict.keys()) 25 | #using values() 26 | print("Values in the dictionary is:",my_dict.values()) -------------------------------------------------------------------------------- /dictionaryFunctions.py: -------------------------------------------------------------------------------- 1 | d={ 2 | 'name':'python', 3 | 'fees':8000, 4 | 'duration':'2 months' 5 | } 6 | print(d) 7 | print(type(d)) 8 | print(d['fees']) 9 | for n in d: 10 | print(n) 11 | print(d[n]) 12 | print(d.get('name')) 13 | for a in d.keys(): 14 | print(a) 15 | for a in d.values(): 16 | print(a) 17 | for a,b in d.items(): 18 | print(a,b) 19 | del d['fees'] 20 | print(d) 21 | print(d.pop('duration')) 22 | print(d) 23 | d=dict(name='python',fees=8000) 24 | print(d) 25 | d.update({'fees':10000}) 26 | print(d) 27 | print(d.clear()) 28 | print(d) 29 | d['desc']="This is Python" 30 | print(d) 31 | course={ 32 | 'php':{'duration':'3 months','fees':15000}, 33 | 'java':{'duration':'2 months','fees':10000}, 34 | 'python':{'duration':'1 months','fees':12000}, 35 | } 36 | print(course) 37 | print(course['php']) 38 | print(course['php']['fees']) 39 | for k,v in course.items(): 40 | print(k,v) 41 | for k,v in course.items(): 42 | print(k,v['duration'],v['fees']) 43 | course['java']['fees']=20000 44 | print(course) 45 | -------------------------------------------------------------------------------- /escapeSequences.py: -------------------------------------------------------------------------------- 1 | letter = "Dear Hariram,\n\tThis python course is nice.\nThanks!" 2 | print(letter) 3 | -------------------------------------------------------------------------------- /factorial.py: -------------------------------------------------------------------------------- 1 | n = int(input("Enter the number: ")) 2 | product = 1 3 | for i in range(1, n+1): 4 | product = product * i 5 | 6 | print(f"The factorial of {n} is {product}") 7 | -------------------------------------------------------------------------------- /fileOperations&dataCleaning.py: -------------------------------------------------------------------------------- 1 | #unique values & value counts 2 | import numpy as np 3 | import pandas as pd 4 | obj=pd.Series(["c","a","d","a","a","b","b","c","c"]) 5 | uniques=obj.unique() 6 | print(uniques) 7 | print(obj.value_counts()) 8 | 9 | #data loading 10 | df=pd.read_csv("examples/ex1.csv") 11 | df.head() 12 | pd.read_csv("examples/ex2.csv") 13 | pd.read_csv("examples/ex2.csv",header=None) 14 | pd.read_csv("examples/ex2.csv",names=["a","b","c","d","message"]) 15 | names=["a","b","c","d","message"] 16 | pd.read_csv("examples/ex2.csv",names=names,index_col="message") 17 | result=pd.read_csv("examples/ex3.txt",sep="\s+") 18 | print(result) 19 | pd.read_csv("examples/ex4.csv",skiprows=[0,2,3]) 20 | result.to_csv("out.csv") #saving as a csv file 21 | 22 | #data cleaning & preparation 23 | #handling missing data 24 | float_data=pd.Series([1.2,-3.5,np.nan,0]) 25 | print(float_data) 26 | float_data.isna() 27 | string_data=pd.Series(["aardvark",np.nan,None,"avacado"]) 28 | print(string_data) 29 | string_data.isna() 30 | float_data=pd.Series([1,2,None],dtype="float64") 31 | print(float_data) 32 | float_data.isna() 33 | data=pd.Series([1,np.nan,3.5,np.nan,7]) 34 | data.dropna() 35 | data[data.notna()] 36 | data=pd.DataFrame([1.,6.5,3.],[1.,np.nan,np.nan],[np.nan,np.nan,np.nan],[np.nan,6.5,3.]]) 37 | print(data) 38 | data.dropna() 39 | data.dropna(how="all") 40 | data[4]=np.nan 41 | print(data) 42 | data.dropna(axis="columns",how="all") 43 | df=pd.DataFrame(np.random.standard_normal((7,3))) 44 | df.iloc[:4,1]=np.nan 45 | df.iloc[:2,2]=np.nan 46 | print(df) 47 | df.dropna() 48 | df.dropna(thresh=2) 49 | 50 | #filling in missing data 51 | print(df) 52 | df.fillna(0) 53 | df.fillna({1:0.5,2:0.9}) 54 | df=pd.DataFrame(np.random.standard_normal((6,3))) 55 | df.iloc[2:,1]=np.nan 56 | df.iloc[4:,2]=np.nan 57 | print(df) 58 | df.fillna(method="ffill") 59 | df.fillna(method="ffill",limit=2) 60 | data=pd.DataFrame({"k1":["one","two"]*3+["two"],"k2":[1,1,2,3,3,4,4]}) 61 | print(data) 62 | data.duplicated() 63 | data.drop_duplicates() 64 | data['v1']=range(7) 65 | print(data) 66 | data.drop_duplicates(subset=["k1"]) 67 | data.drop_duplicates(["k1","k2"],keep="last") 68 | data=pd.Series([1.,-999.,2,-999.,-1000.,3.]) 69 | print(data) 70 | data.replace(-999,np.nan) 71 | data.replace([-999,1000],np.nan) 72 | data.replace([-999,1000],[np.nan,0]) 73 | data.replace({-999:np.nan,-1000:0}) 74 | data=pd.DataFrame(np.arange(12).reshape((3,4)),index=["Ohio","Colorado","New York"],columnns=["one","two","three","four"]) 75 | print(data) 76 | def transform(x): 77 | return x[:4].upper() 78 | data.index=data.index.map(transform) 79 | print(data) 80 | data.rename(index=str.title,columns=str.upper) 81 | data.rename(index={"OHIO":"INDIANA"},columns={"three":"peekaboo"}) 82 | -------------------------------------------------------------------------------- /findFUNC.py: -------------------------------------------------------------------------------- 1 | name = "Ramlal is a good boy and Jonny is a bad boy. " 2 | 3 | print(name.find(" ")) 4 | -------------------------------------------------------------------------------- /functions.py: -------------------------------------------------------------------------------- 1 | #simple function 2 | def showdata(): 3 | print("WELCOME TO CSVTU") 4 | showdata() 5 | #function with arguments 6 | def sum(a,b): 7 | print(a+b) 8 | sum(10,20) 9 | sum(40,20) 10 | def sum(a,b=1): 11 | print(a+b) 12 | sum(10) 13 | sum(40,20) 14 | #function with return type 15 | def square(x): 16 | return x*x,x**2 17 | s=square(5) 18 | print(s) 19 | -------------------------------------------------------------------------------- /hollowSquare.py: -------------------------------------------------------------------------------- 1 | ''' 2 | *** 3 | * * for n = 3 4 | *** 5 | ''' 6 | n = int(input("Enter the number: ")) 7 | for i in range(1, n+1): 8 | if(i==1 or i==n): 9 | print("*"* n, end="") 10 | else: 11 | print("*", end="") 12 | print(" "* (n-2), end="") 13 | print("*", end="") 14 | print("") 15 | -------------------------------------------------------------------------------- /json.py: -------------------------------------------------------------------------------- 1 | import json 2 | d={ 3 | 'course_name':'Python', 4 | 'fees':15000 5 | } 6 | f=json.dumps(d) 7 | print(type(f))#string 8 | print(f) 9 | d='{"cname":"Python","fees":12000,"duration":"2 months"}' 10 | x=json.loads(d) 11 | print(type(x))#dictionary 12 | print(x) 13 | for a in x: 14 | print(a,x[a]) 15 | #How to read and write JSON file in python 16 | import json 17 | file=open("posts.json","r") 18 | x=file.read() 19 | finaldata=json.loads(x) 20 | for a in finaldata: 21 | print(a) 22 | print(a['title'],a['userId']) 23 | -------------------------------------------------------------------------------- /letter.py: -------------------------------------------------------------------------------- 1 | letter = '''Dear <|Name|>, 2 | You are selected! 3 | <|Date|> ''' 4 | 5 | print(letter.replace("<|Name|>", "Hridyesh Kumar").replace("<|Date|", "24 September 2050")) 6 | -------------------------------------------------------------------------------- /listComprehension.py: -------------------------------------------------------------------------------- 1 | l=[] 2 | for a in range(1,101): 3 | l.append(a) 4 | print(l) 5 | n=[m for m in range(1,101) if m%2==0] 6 | print(n) 7 | s="hello" 8 | d=[g for g in s] 9 | print(d) 10 | -------------------------------------------------------------------------------- /listFunctions.py: -------------------------------------------------------------------------------- 1 | l=[20,30,50,60] 2 | print(l) 3 | #del()=>delete element through index 4 | del l[1] 5 | print(l) 6 | #pop()=>delete element through index and returns the value of deleted element 7 | print(l.pop(2)) 8 | print(l) 9 | #remove()=>delete element through value rather than indexing 10 | l.remove(20) 11 | print(l) 12 | #clear()=>it returns a empty list 13 | l.clear() 14 | print(l) 15 | l=[20,30,40,50] 16 | l[0]=90 17 | print(l) 18 | l.insert(0,10) 19 | print(l) 20 | l.append(70) 21 | print(l) 22 | n=[60,80] 23 | l.append(n) 24 | print(l) 25 | l.extend(n) 26 | print(l) 27 | l=[10,20,20,10,30,40,10,50] 28 | a=l.count(10) 29 | print(l) 30 | print(a) 31 | m=max(l) 32 | print(m) 33 | l1=["Hello","World"] 34 | k=max(l1) 35 | print(k) 36 | m=min(l) 37 | print(m) 38 | k=min(l1) 39 | print(k) 40 | l.sort() 41 | print(l) 42 | l.reverse() 43 | print(l) 44 | l1.reverse() 45 | print(l1) 46 | a=l1.index("World") 47 | print(a) 48 | -------------------------------------------------------------------------------- /listIteration.py: -------------------------------------------------------------------------------- 1 | l=[10,20,30,40,60,80,90] 2 | t=len(l) 3 | for a in range(t): 4 | print(l[a]) 5 | print("") 6 | for a in l: 7 | print(a) 8 | print("") 9 | for a in range(t-1,-1,-1): 10 | print(l[a]) 11 | -------------------------------------------------------------------------------- /listSlicing.py: -------------------------------------------------------------------------------- 1 | #LIST=>Mutable, and have multiple data types 2 | l=[10,20,30,50,"Hello"] 3 | print(l[3],l[4]) 4 | print(l[0:2]) 5 | print(l[0: :2]) 6 | print(l[3: ]) 7 | print(l[-1: :-2]) 8 | print(l[-1: :-1]) 9 | -------------------------------------------------------------------------------- /newfile.py: -------------------------------------------------------------------------------- 1 | #conditional statements 2 | x=10 3 | if x>=10: 4 | print("YES") 5 | else: 6 | print("NO") 7 | #Loops 8 | list_a=[10,20,30,40,50] 9 | for i in list_a: 10 | print(i) 11 | x=5 12 | while x>=0: 13 | print(x) 14 | x=x-1 15 | #Dictionary 16 | '''A dictionary stores a collection of key-value pairs, where key and value are python objects. 17 | Each key is associated with a value so that a value can be conveniently retrieved, inserted modified or deleted given a particular key. 18 | One approach for creating a dictionary is to use curly braces{}''' 19 | dict={} 20 | print(dict) 21 | d1={"a":"some value","b":[1,2,3,4]} 22 | print(d1) 23 | print(type(d1)) 24 | d1[7]="an integer" 25 | print(d1) 26 | d1[5]="some value" 27 | print(d1) 28 | d1["dummy"]="another value" 29 | print(d1) 30 | del d1[5] 31 | print(d1) 32 | print(d1.pop("dummy")) 33 | print(d1) 34 | print(d1.keys()) 35 | print(d1.values()) 36 | # If we need to iterate over both the keys and values, we can use the items method over the keys and values as 2-tuples 37 | print(list(d1.items())) 38 | d1.update({"b":"fool","c":12}) 39 | print(d1) 40 | #Categorize a list of words by their letter as a dictionary of lists 41 | words=["apple","bat","bar","atom","book","cook"] 42 | by_letter={} 43 | for word in words: 44 | #print(word) 45 | letter=word[0] #letter=a 46 | if letter not in by_letter: 47 | by_letter[letter]=[word] #by_letter["a"]=["apple"] by_letter["b"]=["bat"] 48 | else: 49 | by_letter[letter].append(word) #by_letter["b"].append("bar") 50 | print(by_letter) 51 | # Set 52 | '''A set is an unordered collection of unique elements. A set can be created in two ways via the set function or via the set literal with curly braces{}''' 53 | print(set([2,2,2,3,4,3,4,1,2,5])) 54 | a={1,2,3,4,5} 55 | b={3,4,5,6,7,8} 56 | print(a.union(b)) 57 | print(a|b) 58 | print(a.intersection(b)) 59 | print(a&b) 60 | # List,Set and Dictionary comprehension 61 | '''List Comprehension are a convenient and widely used python language feature . 62 | It allows us to concisely form a new list by filtering the elements of a collection, transforming the elements passing the filter into one concise expression. 63 | Filter out string with length greater then 2 and convert them to upper case.''' 64 | strings=["a","as","bat","car","dove","python"] 65 | result=[] 66 | for i in strings: 67 | if len(i)>2: 68 | result.append(i.upper()) 69 | print(result) 70 | print([x.upper() for x in strings if len(x)>2]) 71 | -------------------------------------------------------------------------------- /newfile1.py: -------------------------------------------------------------------------------- 1 | #"python" or 'python 'both are same 2 | #tuple is a fixed length,immutable sequence of python objects which,once assigned ,cannot be changed. 3 | tup_a=(4,5,6) 4 | print(tup_a) 5 | print(type(tup_a)) 6 | tup=tuple('string') 7 | print(tup) 8 | print(tup_a) 9 | #tup_a[0]=20 cannot change value of tuple 10 | print(tup_a[0]) 11 | print(tup[0:3]) #last index is excluded 12 | print(tup[:]) #start:stop 13 | print(tup[::2]) #start:stop:step 14 | nested_tup=(4,5,6),(7,8) 15 | print(nested_tup) 16 | print(nested_tup[0]) 17 | print(nested_tup[0][1]) 18 | tuple=(4,None,'fool')+('bar',)#concatenate tuple 19 | print(tuple) 20 | print(('fool','bar')*4)#multiplying size of tuple 21 | tup=(4,5,6) 22 | a,b,c=tup 23 | print(a,b) 24 | print(a,b,c) 25 | print(tup[::-1]) 26 | print(tup[-1]) 27 | a=(1,2,2,2,2,3,4,2) 28 | print(a.count(2)) 29 | # lists are variable length and their contents can be modified in place.Lists are mutable.We can define them using square brackets[] or using list type function. 30 | list1=[2,3,7,None] 31 | print(list1) 32 | print(type(list1)) 33 | gen=range(20) 34 | print(gen) 35 | print(list(range(20))) 36 | # Adding and removing elements 37 | # Elements can be appended to the end of the list with the append method 38 | list2=['fool','peeka','bar'] 39 | list2.append('war') 40 | print(list2) 41 | list2.insert(1,'red') 42 | print(list2) 43 | print(list2.pop(2)) 44 | print(list2) 45 | list2.remove('fool') 46 | print(list2) 47 | # concatenate 48 | print([4,None,'fool']+[7,8,(2,3)]) 49 | x=[4,None,'fool'] 50 | x.extend([7,8,(2,3)]) 51 | print(x) 52 | a=[7,2,5,1,3] 53 | a.sort() 54 | print(a) 55 | a.sort(reverse=True) 56 | print(a) 57 | b=["saw","small","He","foxes","six"] 58 | b.sort(key=len) 59 | print(b) 60 | seq=[7,2,3,7,5,6,0,1] 61 | print(seq[1:5]) 62 | seq[3:5]=[6,3] 63 | print(seq) 64 | print(seq[:5]) 65 | print(seq[3:]) 66 | print(seq[-1]) 67 | print(seq[::2]) 68 | print(seq[::-1]) 69 | -------------------------------------------------------------------------------- /newfile2.py: -------------------------------------------------------------------------------- 1 | #Nested List Comprehension 2 | data=[["John","Emily","Michael","Mary","Steven"],["Maria","Juan","Javier","Natalia","Pilar"]] 3 | print(data) 4 | #We want to get a single list containing all names with two or more a's in them. 5 | interest=[] 6 | for names in data: 7 | enough=[name for name in names if name.count("a")>=2] 8 | interest.extend(enough) 9 | print(interest) 10 | result=[name for names in data for name in names if name.count("a")>=2] 11 | print(result) 12 | #Indentation is colon mark which is 4 space or a tab 13 | x=10 14 | if x>5: 15 | print("X is greater than 5") 16 | else: 17 | print("X is not greater than 5") 18 | #FUNCTIONS 19 | '''Functions are the primary and most important method of code organisation and reuse in python. 20 | Functions are declared with the def keyword. A function contains a block of code with an optimal use of the return keyword.''' 21 | def fun(x,y): 22 | return x+y 23 | print(fun(10,20)) 24 | result=fun(20,30) 25 | print(result) 26 | def fun_with_return(x): 27 | print(x) 28 | result=fun_with_return("hello") 29 | print(result) 30 | def fun_with_return(): 31 | print("hello") 32 | result=fun_with_return() 33 | print(result) 34 | #Positional Arguments 35 | #Keyword Arguments 36 | def fun(x,y,z=1.5): 37 | if z>1: 38 | return z*(x+y) 39 | else: 40 | return z/(x+y) 41 | print(fun(5,6)) 42 | print(fun(5,6,z=0.7)) 43 | print(fun(x=10,y=20,z=30)) 44 | print(fun(5,6,0.7)) 45 | a=[] 46 | def fun(): 47 | for i in range(5): 48 | a.append(i) 49 | print(fun()) 50 | print(a) 51 | def fun(): 52 | global a 53 | a=[] 54 | for i in range(5): 55 | a.append(i) 56 | fun() 57 | print(a) 58 | def f(): 59 | a=5 60 | b=6 61 | c=7 62 | return a,b,c 63 | a,b,c=f() 64 | print(a,b,c) 65 | def f(): 66 | a=5 67 | b=6 68 | c=7 69 | return {"a":a,"b":b,"c":c} 70 | print(f()) 71 | states=[" Alabama ","Georgia!","georgia","Georgia","Florida","south carolina##","West virginia?"] 72 | import re # regular expressions 73 | def clean_strings(strings): 74 | result=[] 75 | for value in strings: 76 | value=value.strip() 77 | value=re.sub("[!#?]","",value) 78 | value=value.title() 79 | result.append(value) 80 | return result 81 | print(clean_strings(states)) 82 | #Lambda Functions 83 | '''Python has support for anonymous or lambda functions, which are a way of writing functions consisting of a single statement, result of which is the return value''' 84 | def short_function(x): 85 | return x*2 86 | print(short_function(20)) 87 | equiv=lambda x:x*2 88 | print(equiv(20)) 89 | equiv=lambda x,y:x*y*2 90 | print(equiv(20,40)) 91 | def apply_to_list(some_list,f): 92 | return [f(x) for x in some_list] 93 | ints=[4,0,1,5,6] 94 | print(apply_to_list(ints,lambda x:x*2)) 95 | strings=["foo(","card","bar","aaaa","abab"] 96 | #sorting based on unique characters=>set(x) 97 | strings.sort(key=lambda x:len(set(x))) 98 | print(strings) 99 | #ERRORS AND EXEPTION HANDLING 100 | print(float("1.2345")) 101 | #print(float("something"))#ValueError 102 | def attempt_float(x): 103 | try: 104 | return float(x) 105 | except: 106 | return x 107 | #The code in the except part of the block will only be executed if float(x) raises and exception 108 | print(attempt_float("1.2345")) 109 | print(attempt_float("something")) 110 | #NUMPY 111 | '''NumPy, short for Numerical Python, is one of the most important fundamental packages for numerical computing in python''' 112 | import numpy as np 113 | arr=np.arange(1_000_000) 114 | print(arr) 115 | list=list(range(1_000_000)) 116 | print(list[1:10]) 117 | %timeit arr2=arr*2 118 | %timeit list2=[x*2 for x in list] 119 | '''One of the key features of NumPy is its N-dimensional array object or n-D array, which is fast, flexible container for large datasets in python.''' 120 | data=np.array([[1.5,0.1,3],[0,-3,6.5]]) 121 | print(data) 122 | print(data*10) 123 | print(data+data) 124 | print(data.shape) 125 | print(data.dtype) 126 | print(data.ndim) 127 | data1=[6,7.5,8.0,1] 128 | arr1=np.array(data1) 129 | print(arr1) 130 | print(arr1.ndim) 131 | print(np.zeros(10)) 132 | print(np.zeros((3,6))) 133 | print(np.ones((3,6))) 134 | print(np.arange(15)) 135 | arr1=np.array([1,2,3],dtype=np.float64) 136 | arr2=np.array([1,2,3],dtype=np.int32) 137 | print(arr1.dtype) 138 | print(arr2.dtype) 139 | arr=np.array([1,2,3,4,5]) 140 | print(arr.dtype) 141 | float_arr=arr.astype(np.float64) 142 | print(float_arr.dtype) 143 | print(float_arr) 144 | arr=np.array([1.,2.,3.],[4.,5.,6.]) 145 | print(arr) 146 | print(arr*arr) 147 | print(arr-arr) 148 | print(1/arr) 149 | print(arr**2) 150 | arr2=np.array([0.,4.,1.],[7.,2.,12.]) 151 | print(arr2) 152 | print(arr2>arr) 153 | arr=np.arange(10) 154 | print(arr) 155 | print(arr[5]) 156 | print(arr[5:8]) 157 | arr[5:8]=12 158 | print(arr) 159 | arr_slice=arr[5:8] 160 | print(arr_slice) 161 | arr_slice[1]=12345 162 | print(arr_slice) 163 | arr2d=np.array([[1,2,3],[4,5,6],[7,8,9]]) 164 | print(arr2d) 165 | print(arr2d[2]) 166 | print(arr2d[2][1]) 167 | print(arr2d[:2]) 168 | print(arr2d[:2,1:]) 169 | arr=np.arange(15).reshape((3,5)) 170 | print(arr) 171 | arr=np.arange(15).reshape((5,3)) 172 | print(arr) 173 | print(arr.T)#Transpose 174 | arr=np.array([[0,1,0],[1,2,-2],[6,3,2],[-1,0,-1],[1,0,1]]) 175 | print(arr) 176 | print(np.dot(arr.T,arr))#Matrix Multiplication 177 | print(arr.T@arr)#Matrix Multiplication 178 | a 179 | -------------------------------------------------------------------------------- /newfile3.py: -------------------------------------------------------------------------------- 1 | #PANDAS 2 | '''Pandas contains data structures and data manipulation tools designed to make data cleaning and analysis fast and convenient in Python. 3 | Series and Dataframe 4 | Series is a one-dimensional array like object containing a sequence of value.''' 5 | import pandas as pd 6 | obj=pd.Series([4,7,-5,3]) 7 | print(obj) 8 | obj2=pd.Series([4,7,-5,3],index=["d","b","a","c"]) 9 | print(obj2) 10 | print(obj2["a"]) 11 | obj2["d"]=6 12 | print(obj2) 13 | print(obj2[["c","a","d"]]) 14 | obj2=pd.Series([4,7,-5,3,5],index=["d","b","a","a","c"]) 15 | print(obj2) 16 | print(obj2[obj2>0]) 17 | print(obj2*2) 18 | import numpy as np 19 | np.exp(obj2) 20 | sdata={"Ohio":35000,"Texas}":71000,"Oregon":16000,"Utah":5000} 21 | obj3=pd.Series(sdata) 22 | print(obj3) 23 | print(obj3.to_dict()) 24 | #DATA FRAME 25 | '''A DataFrame represents a rectangular table of data and contains an ordered, named collection of columns each of which can be a different value type. 26 | The DataFrame has both a row index and column index.''' 27 | data={"states":["Ohio","Ohio","Ohio","Nevada","Nevada","Nevada"],"year":[2000,2001,2002,2001,2002,2003],"pop":[1.5,1.7,3.6,2.4,2.9,3.2]} 28 | frame=pd.DataFrame(data) 29 | print(frame) 30 | print(frame.head()) 31 | print(frame.tail()) 32 | print(pd.DataFrame(data,columns=["year","states","pop"])) 33 | frame2=pd.DataFrame(data,columns=["year","states","pop","debt"]) 34 | print(frame2) 35 | print(frame2.columns) 36 | print(frame2["states"]) 37 | print(frame2.year) 38 | print(frame2[["states","year"]]) 39 | print(frame2.loc[1]) 40 | print(frame2.iloc[2]) 41 | frame2["debt"]=16.5 42 | print(frame2) 43 | frame2["debt"]=np.arange(6.) 44 | print(frame2) 45 | frame2["eastern"]=frame2["states"]=="ohio" 46 | print(frame2) 47 | del frame2["eastern"] 48 | print(frame2.columns) 49 | frame2.index.name="year" 50 | frame2.columns.name="state" 51 | print(frame2) 52 | data=pd.DataFrame(np.arange(16).reshape((4,4)),index=["Ohio","Colorado","Utah","New York"],columns=["one","two","three","four"]) 53 | print(data) 54 | print(data["two"]) 55 | print(data[["three","one"]]) 56 | print(data[data["three"]>5]) 57 | print(data.loc["Colorado"]) 58 | print(data.loc[["Colorado","New York"],["two","three"]]) 59 | print(data.iloc[0:2,0:3]) 60 | print(data.loc[data.three>=2]) 61 | -------------------------------------------------------------------------------- /numpy.py: -------------------------------------------------------------------------------- 1 | #NUMPY 2 | '''NumPy, short for Numerical Python, is one of the most important fundamental packages for numerical computing in python''' 3 | import numpy as np 4 | arr=np.arange(1_000_000) 5 | print(arr) 6 | list=list(range(1_000_000)) 7 | print(list[1:10]) 8 | %timeit arr2=arr*2 9 | %timeit list2=[x*2 for x in list] 10 | '''One of the key features of NumPy is its N-dimensional array object or n-D array, which is fast, flexible container for large datasets in python.''' 11 | data=np.array([[1.5,0.1,3],[0,-3,6.5]]) 12 | print(data) 13 | print(data*10) 14 | print(data+data) 15 | print(data.shape) 16 | print(data.dtype) 17 | print(data.ndim) 18 | data1=[6,7.5,8.0,1] 19 | arr1=np.array(data1) 20 | print(arr1) 21 | print(arr1.ndim) 22 | print(np.zeros(10)) 23 | print(np.zeros((3,6))) 24 | print(np.ones((3,6))) 25 | print(np.arange(15)) 26 | arr1=np.array([1,2,3],dtype=np.float64) 27 | arr2=np.array([1,2,3],dtype=np.int32) 28 | print(arr1.dtype) 29 | print(arr2.dtype) 30 | arr=np.array([1,2,3,4,5]) 31 | print(arr.dtype) 32 | float_arr=arr.astype(np.float64) 33 | print(float_arr.dtype) 34 | print(float_arr) 35 | arr=np.array([1.,2.,3.],[4.,5.,6.]) 36 | print(arr) 37 | print(arr*arr) 38 | print(arr-arr) 39 | print(1/arr) 40 | print(arr**2) 41 | arr2=np.array([0.,4.,1.],[7.,2.,12.]) 42 | print(arr2) 43 | print(arr2>arr) 44 | arr=np.arange(10) 45 | print(arr) 46 | print(arr[5]) 47 | print(arr[5:8]) 48 | arr[5:8]=12 49 | print(arr) 50 | arr_slice=arr[5:8] 51 | print(arr_slice) 52 | arr_slice[1]=12345 53 | print(arr_slice) 54 | arr2d=np.array([[1,2,3],[4,5,6],[7,8,9]]) 55 | print(arr2d) 56 | print(arr2d[2]) 57 | print(arr2d[2][1]) 58 | print(arr2d[:2]) 59 | print(arr2d[:2,1:]) 60 | arr=np.arange(15).reshape((3,5)) 61 | print(arr) 62 | arr=np.arange(15).reshape((5,3)) 63 | print(arr) 64 | print(arr.T)#Transpose 65 | arr=np.array([[0,1,0],[1,2,-2],[6,3,2],[-1,0,-1],[1,0,1]]) 66 | print(arr) 67 | print(np.dot(arr.T,arr))#Matrix Multiplication 68 | print(arr.T@arr)#Matrix Multiplication -------------------------------------------------------------------------------- /pandas.py: -------------------------------------------------------------------------------- 1 | #PANDAS 2 | '''Pandas contains data structures and data manipulation tools designed to make data cleaning and analysis fast and convenient in Python. 3 | Series and Dataframe 4 | Series is a one-dimensional array like object containing a sequence of value.''' 5 | import pandas as pd 6 | obj=pd.Series([4,7,-5,3]) 7 | print(obj) 8 | obj2=pd.Series([4,7,-5,3],index=["d","b","a","c"]) 9 | print(obj2) 10 | print(obj2["a"]) 11 | obj2["d"]=6 12 | print(obj2) 13 | print(obj2[["c","a","d"]]) 14 | obj2=pd.Series([4,7,-5,3,5],index=["d","b","a","a","c"]) 15 | print(obj2) 16 | print(obj2[obj2>0]) 17 | print(obj2*2) 18 | import numpy as np 19 | np.exp(obj2) 20 | sdata={"Ohio":35000,"Texas}":71000,"Oregon":16000,"Utah":5000} 21 | obj3=pd.Series(sdata) 22 | print(obj3) 23 | print(obj3.to_dict()) 24 | #DATA FRAME 25 | '''A DataFrame represents a rectangular table of data and contains an ordered, named collection of columns each of which can be a different value type. 26 | The DataFrame has both a row index and column index.''' 27 | data={"states":["Ohio","Ohio","Ohio","Nevada","Nevada","Nevada"],"year":[2000,2001,2002,2001,2002,2003],"pop":[1.5,1.7,3.6,2.4,2.9,3.2]} 28 | frame=pd.DataFrame(data) 29 | print(frame) 30 | print(frame.head()) 31 | print(frame.tail()) 32 | print(pd.DataFrame(data,columns=["year","states","pop"])) 33 | frame2=pd.DataFrame(data,columns=["year","states","pop","debt"]) 34 | print(frame2) 35 | print(frame2.columns) 36 | print(frame2["states"]) 37 | print(frame2.year) 38 | print(frame2[["states","year"]]) 39 | print(frame2.loc[1]) 40 | print(frame2.iloc[2]) 41 | frame2["debt"]=16.5 42 | print(frame2) 43 | frame2["debt"]=np.arange(6.) 44 | print(frame2) 45 | frame2["eastern"]=frame2["states"]=="ohio" 46 | print(frame2) 47 | del frame2["eastern"] 48 | print(frame2.columns) 49 | frame2.index.name="year" 50 | frame2.columns.name="state" 51 | print(frame2) 52 | data=pd.DataFrame(np.arange(16).reshape((4,4)),index=["Ohio","Colorado","Utah","New York"],columns=["one","two","three","four"]) 53 | print(data) 54 | print(data["two"]) 55 | print(data[["three","one"]]) 56 | print(data[data["three"]>5]) 57 | print(data.loc["Colorado"]) 58 | print(data.loc[["Colorado","New York"],["two","three"]]) 59 | print(data.iloc[0:2,0:3]) 60 | print(data.loc[data.three>=2]) -------------------------------------------------------------------------------- /primeornot.py: -------------------------------------------------------------------------------- 1 | number=int(input("Enter the number=")) 2 | if number>1: 3 | for i in range (2,int(number/2)+1): 4 | if (number%i)==0: 5 | print(number,"is not a prime number") 6 | break 7 | else: 8 | print(number,"is a prime number") 9 | else: 10 | print(number,"is not a prime number") -------------------------------------------------------------------------------- /printingPoem.py: -------------------------------------------------------------------------------- 1 | print(''' Twinkle, twinkle, little star, 2 | How I wonder what you are! 3 | Up above the world so high, 4 | Like a diamond in the sky. 5 | 6 | When the blazing sun is gone, 7 | When he nothing shines upon, 8 | Then you show your little light, 9 | Twinkle, twinkle, all the night. 10 | 11 | Then the trav'ller in the dark, 12 | Thanks you for your tiny spark, 13 | He could not see which way to go, 14 | If you did not twinkle so. 15 | 16 | In the dark blue sky you keep, 17 | And often thro' my curtains peep, 18 | For you never shut your eye, 19 | Till the sun is in the sky. 20 | 21 | 'Tis your bright and tiny spark, 22 | Lights the trav'ller in the dark: 23 | Tho' I know not what you are, 24 | Twinkle, twinkle, little star.''') 25 | -------------------------------------------------------------------------------- /project1.py: -------------------------------------------------------------------------------- 1 | #Project on Regression and Random Forest Regression 2 | #Regression Problems in Machine Learning 3 | '''Machine Learning is a branch of Artificial Intelligence that enables computer programs to automatically learn and improve from experience. 4 | Machine Learning Algorithms learn from datasets and then based on the patterns identified from the datasets make predictions on unseen data. 5 | ML algorithms can be broadly categorized into two types: 6 | 1. Supervised Learning 7 | 2. Unsupervised Learning 8 | Supervised ML algorithms are those algorithms where the input dataset and the corresponding output or true prediction is available and the algorithms try to find the relationship between inputs and outputs. 9 | In unsupervised ML algorithms, the true labels for the outputs are not known. Rather, the algorithms try to find similar patterns in the data. E.g., Clustering. 10 | Supervised learning algorithms are further divided into two types: 11 | 1. Regression Algorithms 12 | 2. Classification Algorithms 13 | Regression algorithms predict a continuous value e.g.,the price of a house. 14 | Classification algorithms predict a discrete value e.g., whether a incoming email is Spam/Ham.''' 15 | 16 | import pandas as pd 17 | import numpy as np 18 | import seaborn as sns 19 | #sns.get_dataset_names() 20 | #Importing the dataset and printing the dataset header 21 | tips_df=sns.load_dataset("tips") 22 | tips_df.head() 23 | 24 | '''We will be using machine learning algorithms to predict the tip for a particular record based on the remaining features such as total_bill, gender, day, time etc. 25 | Dividing Data into Features and Labels''' 26 | x=tips_df.drop(['tip'],axis=1) 27 | y=tips_df["tip"] 28 | x.head() 29 | y.head() 30 | 31 | #Converting Categorical Data to Numbers 32 | '''ML Algorithms can only work with numbers. It is important to convert categorical data into a numeric format''' 33 | #Numeric Variables 34 | numerical=x.drop(['sex','smoker','day','time'],axis=1) 35 | numerical.head() 36 | #DataFrame that contains only categorical columns 37 | categorical=x.filter(['sex','smoker','day','time']) 38 | categorical.head() 39 | categorical["day"].value_counts() 40 | 41 | '''One of the most common approaches to convert a categorical column to a numeric one is via one-hot encoding. 42 | In one-hot encoding, for every unique value in the original columns, anew column is created.''' 43 | 44 | cat_numerical=pd.get_dummies(categorical) 45 | cat_numerical.head() 46 | '''The final step is to join the numerical columns with the one-hot encoded columns.''' 47 | x=pd.concat([numerical,cat_numerical],axis=1) 48 | x.head() 49 | #Divide Data into Training and Test Sets 50 | '''We divide the dataset into two sets i.e., train and test set. 51 | The dataset is trained via the train set and evaluated on the test set.''' 52 | 53 | from sklearn.model_selection import train_test_split 54 | x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20,random_state=0) 55 | 56 | #Data Scaling/Normalization 57 | '''The final step before data is passed to ML algorithm is to scale the data. 58 | Some columns of the dataset contain small values, while the others contain very large values.It is better to convert all values to a uniform scale.''' 59 | 60 | from sklearn.preprocessing import StandardScaler 61 | sc=StandardScaler() 62 | x_train=sc.fit_transform(x_train) 63 | x_test=sc.transform(x_test) 64 | '''We have converted data into a format that can be sured to train ML algorithms for regression.''' 65 | 66 | #Linear Regression 67 | '''Linear Regression is a linear model that assumes a linear relationship between inputs and outputs and minimizes the cost of error between the predicted and actual output using functions like mean absolute error.''' 68 | 69 | #Advantages 70 | '''Linear Regression is a simple to implement and easily interpretable algorithm. 71 | It takes less time to train, even for huge datasets. 72 | Linear Regression coefficients are easy to interpret. 73 | Importing Linear Regression model from sklearn.''' 74 | 75 | from sklearn.linear_model import LinearRegression 76 | lin_reg=LinearRegression() 77 | regressor=lin_reg.fit(X_train,y_train) 78 | y_pred=regressor.predict(X_test) 79 | 80 | '''Once you have trained a model and have made predictions on the test set, the next step is to know how well your model has performed for making predictions on the unknown test set. 81 | There are various metrics to check that. 82 | Mean Absolute Error (MAE) is calculated by taking the average of absolute error obtained by subtracting real values from predicted values. 83 | Mean Squared Error (MSE) is similar to MAE. However, the error for each record is squared in case of MSE. 84 | Root Mean Squared Error (RMSE) is the under root of mean squared error.''' 85 | 86 | from sklearn import metrics 87 | print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,y_pred)) 88 | print('Mean Squared Error:',metrics.mean_squared_error(y_test,y_pred)) 89 | print('Root Mean Squared Error:',metrics.mean_squared_error(y_test,y_pred)) 90 | 91 | '''By looking at the MAE, it can be concluded that, on average there is an error of 0.70 for predictions, which means that on average there is an error of 0.70 for predictions, which means that on average, the predicted tip values are 0.70$ more or less than the actual tip values.''' 92 | 93 | #Random Forest Regression 94 | '''Random Forest Regression is tree-based algorithm. 95 | Ensemble modelling technique.''' 96 | #Advantages 97 | '''You have lots of missing data or imbalance dataset (0(200) and 1(1000)). 98 | With a large number of trees or models, you can avoid overfitting. Overfitting occurs when ML models performs better on the training set but worse on the test set.''' 99 | 100 | from sklearn.ensemble import RandomForestRegressor 101 | rf_reg=RandomForestRegressor(random_state=42,n_estimators=500) 102 | regressor=rf_reg.fit(X_train,y_train) 103 | y_pred=regressor.predict(X_test) 104 | from sklearn import metrics 105 | print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,y_pred)) 106 | print('Mean Squared Error:',metrics.mean_squared_error(y_test,y_pred)) 107 | print('Root Mean Squared Error:',np.sqrt(metrics.mean_squared_error(y_test,y_pred))) 108 | 109 | -------------------------------------------------------------------------------- /project2.py: -------------------------------------------------------------------------------- 1 | #Project on Logistic Regression & Clustering 2 | #Classification Problems in Machine Learning 3 | '''Classification problems are the type of problems where you have to predict a deiscrete value i.e., whether the student will pass the exam or not.''' 4 | 5 | import pandas as pd 6 | import numpy as np 7 | #importing the dataset 8 | churn_df=pd.read_csv("Churn_Modelling.csv") 9 | churn_df.head() 10 | 11 | '''The exited column contains information regarding whether or not the customer exited the bank after six months.''' 12 | 13 | #Removing unnecessary columns 14 | churn_df=churn_df.drop(['RowNumber','CustomerId','Surname'],axis=1) 15 | churn_df.head() 16 | #Dividing Data into Features and Labels 17 | X=churn_df.drop(['Exited'],axis=1) 18 | y=churn_df['Exited'] 19 | X.head() 20 | y.head() 21 | #Converting Categorical Data to Numbers 22 | numerical=X.drop(['Geography','Gender'],axis=1) 23 | numerical.head() 24 | categorical=X.filter(['Geography','Gender']) 25 | categorical.head() 26 | cat_numerical=pd.get_dummies(categorical) 27 | cat_numerical.head() 28 | X=pd.concat([numerical,cat_numerical],axis=1) 29 | X.head() 30 | #Dividing Data into Training and Test Sets 31 | from sklearn.model_selection import train_test_split 32 | X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=0) 33 | 34 | #Data Scaling/Normalization 35 | from sklearn.preprocessing import StandardScaler 36 | sc=StandardScaler() 37 | X_train=sc.fit_transform(X_train) 38 | X_test=sc.transform(X_test) 39 | 40 | '''Binary Classification problems are those classification problems where there are only two possible values for the output level. 41 | Whether a customer will leave the bank after a certain period or not.''' 42 | 43 | #Logistic Regression 44 | '''Logistic Regression is a linear model, which makes classification by passing the output of linear regression through a sigmoid function. 45 | Importing logistic regression classifier from sklearn''' 46 | 47 | from sklearn.linear_model import LogisticRegression 48 | log_clf=LogisticRegression() 49 | classifier=log_clf.fit(X_train,y_train) 50 | y_pred=classifier.predict(X_test) 51 | 52 | '''There are various metrics to evaluate a classification method. 53 | Some of the most commonly used classification metrics are F1 score, recall, precision, accuracy and confusion matrix. 54 | True Negatives(TN/tn):True Negatives are those output labels that are actually false and the model also predicted them as false. 55 | True Positives(TP/tp):True Positives are those output labels that are actually true and the model also predicted them as true. 56 | False Negatives(FN/fn):False Negatives are those output labels that are actually true but the model predicted them as false. 57 | False Positives(FP/fp):False Positives are those output labels that are actually false but the model also predicted them as true.''' 58 | 59 | #Precision 60 | '''It is obtained by dividing true positives by the sum of true positive and false positive. 61 | Precision=tp/(tp+fp)''' 62 | #Recall 63 | '''It is obtained by dividing true positives by the sum of true positives and false negatives. 64 | Recall=tp/(tp+fn)''' 65 | 66 | #Evaluating the algorithm on the test set 67 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score 68 | print(confusion_matrix(y_test,y_pred)) 69 | print(classification_report(y_test,y_pred)) 70 | print(accuracy_score(y_test,y_pred)) 71 | 72 | #Random Forest Classifier 73 | from sklearn.ensemble import RandomForestClassifier 74 | rf_clf=RandomForestClassifier(random_state=42,n_estimators=500) 75 | classifiers=rf_clf.fit(X_train,y_train) 76 | y_pred=classifier.predict(X_test) 77 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score 78 | print(confusion_matrix(y_test,y_pred)) 79 | print(classification_report(y_test,y_pred)) 80 | print(accuracy_score(y_test,y_pred)) 81 | 82 | #Clustering 83 | '''Clustering algorithms are unsupervised algorithms where the training data is not labeled. 84 | Rather, the algorithms cluster or group the datasets based on common characteristics.''' 85 | 86 | #K-Means Clustering 87 | '''K-Means Clustering is one of the most commonly used algorithms for clustering, K refers to the number of clusters that you want your data to be grouped into. 88 | In K-Means clustering, the number of clusters has to be defined before K clustering can be applied to the data points.''' 89 | 90 | #Steps for K-Means Clustering 91 | '''1.Randomly assign centroid values for each cluster. 92 | 2.Calculate the euclidean distance between each data point and centroid values of all the clusters. 93 | 3.Assign the data point to the cluster of the centroid with the shortest distance. 94 | 4.Calculate and update centroid values based on the mean values of the coordinates of all the data points of the corresponding cluster. 95 | 5.Repeat steps 2-4 until new centroid values for all the clusters are different from the previous centroid values.''' 96 | 97 | import numpy as np 98 | import pandas as pd 99 | from sklearn.cluster import KMeans 100 | import matplotlib.pyplot as plt 101 | 102 | #Customer Segmentation using K-Means Clustering 103 | '''In this project, you will see how to segment customers based on their incomes and past spending habits. 104 | You will then identify customers who have high incomes and higher spending.''' 105 | 106 | dataset=pd.read_csv("Mall_Customers.csv") 107 | dataset.head() 108 | '''The output shows that the dataset contains 200 records and 5 cloumns. 109 | Plotting the histogram for the annual income column.''' 110 | import warnings 111 | warnings.filterwarnings("ignore") 112 | sns.distplot(dataset["Annual Income (k$)"],kde=False,bins=50) 113 | '''The output shows that most of the customers have incomes between 60 and 90K per year. 114 | Plotting the histogram for the spending score column.''' 115 | sns.distplot(dataset["Spending Score (1-100)"],kde=False,bins=50,color="red") 116 | '''The output shows that most of the customers have a spending score between 40 and 60. 117 | Plotting regression plot for annual income against spending score.''' 118 | sns.regplot(x="Annual Income (k$)",y="Spending Score (1-100)",data=dataset) 119 | '''There is no linear relationship between annual income and spending. 120 | Plotting regression plot for age and spending score''' 121 | sns.regplot(x="Age",y="Spending Score (1-100)",data=dataset) 122 | '''The output confirms an inverse linear relationship between age and spending score. 123 | Young people have higher spending compared to older people.''' 124 | dataset=dataset.filter(["Annual Income(k$)","Spending Score (1-100)"],axis=1) 125 | dataset.head() 126 | km_model=KMeans(n_clusters=4) 127 | km_model.fit(dataset) 128 | print(km_model.cluster_centers_) 129 | print(km_model.labels_) 130 | plt.scatter(dataset.values[:,0],dataset.values[:,1],c=km_model.labels_,cmap='rainbow') 131 | plt.scatter(km_model.cluster_centers_[:,0],km_model.cluster_centers_[:,1],s=100,c='black') 132 | #Elbow method to get the optimal number of cluaters 133 | loss=[] 134 | for i in range(1,11): 135 | km=KMeans(n_clusters=i).fit(dataset) 136 | loss.append(km.inertia_) 137 | plt.plot(range(1,11),loss) 138 | plt.title('Finding optimal number of vlusters via elbow method') 139 | plt.xlabel('Number of clusters') 140 | plt.ylabel('loss') 141 | plt.show() 142 | km_model=KMeans(n_clusters=5) 143 | km_model.fit(dataset) 144 | print(km_model.cluster_centers_) 145 | print(km_model.labels_) 146 | plt.scatter(dataset.values[:,0],dataset.values[:,1],c=km_model.labels_,cmap='rainbow') 147 | plt.scatter(km_model.cluster_centers_[:,0],km_model.cluster_centers_[:,1],s=100,c='black') 148 | #Filtering all records with cluster id 1 149 | cluster_map=pd.DataFrame() 150 | cluster_map['data_indx']=dataset.index.values 151 | cluster_map['cluster']=km_model.labels_ 152 | print(cluster_map) 153 | cluster_map=cluster_map[cluster_map.clusters==1] 154 | cluster_map.head() 155 | '''These are the customers who have high incomes and high spending and these customers should be targeted during marketing campaigns.''' 156 | -------------------------------------------------------------------------------- /project3onCNN.py: -------------------------------------------------------------------------------- 1 | #Project on Image Classification using Convolutional Neural Networks 2 | #Convolutional Neural Networks 3 | '''A Convolutional Neural Network (CNN) is a type of artificial neural network that is used in image recognition and processing that is specifically designed to process pixel data.''' 4 | #CNN Model on MNIST Dataset for written digit classification 5 | '''MNIST Dataset is the handwritten numbers taken as images. All images are grey scale.''' 6 | from keras.datasets import mnist 7 | #from keras.preprocessing.image import load_img, array_to_img 8 | from tensorflow.keras.utils import to_categorical 9 | from keras.models import Sequential 10 | from keras.layers import Dense 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | #Load the data 14 | (X_train,y_train),(X_test,y_test)=mnist.load_data() 15 | print(X_train.shape) 16 | print(y_train.shape) 17 | print(X_test.shape) 18 | print(y_test.shape) 19 | #Understand the image format 20 | X_train[0].shape 21 | plt.imshow(X_train[0],cmap="gray") 22 | y_train[0] 23 | #Preprocessing the image data 24 | image_height,image_width=28,28 25 | X_train=X_train.reshape(60000,image_height*image_width) 26 | X_test=X_test.reshape(10000,image_height*image_width) 27 | print(X_train.shape) 28 | print(X_test.shape) 29 | print(X_train[0]) 30 | X_train=X_train.astype('float32') 31 | X_test=X_test.astype('float32') 32 | X_train/=255.0 33 | X_test/=255.0 34 | print(X_train[0]) 35 | print(y_train.shape) 36 | print(y_test.shape) 37 | '''Converting the target value into 10 bins. So, we will see that the output from a model will then go into one of these bins.''' 38 | y_train=to_categorical(y_train,10) 39 | y_test=to_categorical(y_test,10) 40 | print(y_train.shape) 41 | print(y_test.shape) 42 | print(y_train[0]) 43 | #Building the model 44 | model=Sequential() 45 | model.add(Dense(512,activation='relu',input_shape=(784,))) 46 | model.add(Dense(512,activation='relu')) 47 | model.add(Dense(10,activation="softmax")) 48 | #Compile the model 49 | model.compile(optimizer="adam",loss='categorical_crossentropy',metrics=["accuracy"]) 50 | model.summary() 51 | history=model.fit(X_train,y_train,epochs=20,validation_data=(X_test,y_test)) 52 | plt.plot(history.history['accuracy']) 53 | #Evaluating the model 54 | score=model.evaluate(X_test,y_test) 55 | '''In neural networks, we only have fully connected layer, otherwise known as dense layer. With Convolutional Neural Networks, we have more operations such as the convolution operation, max pooling, flattening and also a fully connected layer.''' 56 | from keras.layers import Conv2D, MaxPooling2D,Flatten,Dense 57 | from keras.models import Sequential 58 | from keras.datasets import mnist 59 | from tensorflow.keras.utils import to_categorical 60 | (X_train,y_train),(X_test,y_test)=mnist.load_data() 61 | print(X_train.shape) 62 | print(X_test.shape) 63 | print(y_train.shape) 64 | print(y_test.shape) 65 | X_train=X_train.reshape(60000,28,28,1) 66 | X_test=X_test.reshape(10000,28,28,1) 67 | X_train=X_train.astype('float32') 68 | X_test=X_test.astype('float32') 69 | X_train/=255.0 70 | X_test/=255.0 71 | y_train=to_categorical(y_train,10) 72 | y_test=to_categorical(y_test,10) 73 | print(X_train.shape) 74 | print(X_test.shape) 75 | print(y_train.shape) 76 | print(y_test.shape) 77 | #CNN Model Development 78 | cnn=Sequential() 79 | cnn.add(Conv2D(32,kernal_size=(3,3),input_size=(28,28,1),padding='same',activation='relu')) 80 | cnn.add(MaxPooling2D()) 81 | cnn.add(Conv2D(32,kernal_size=(3,3),padding='same',activation='relu')) 82 | cnn.add(MaxPooling2D()) 83 | cnn.add(Flatten()) 84 | cnn.add(Dense(64,activation='relu')) 85 | cnn.add(Dense(10,activation='softmax')) 86 | cnn.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy']) 87 | print(cnn.summary()) 88 | history_cnn=cnn.fit(X_train,y_train,epochs=12,verbose=1,validation_data=(X_train,y_train)) 89 | plt.plot(history_cnn.history['accuracy']) 90 | plt.plot(history_cnn.history['val_accuracy']) -------------------------------------------------------------------------------- /project4onNLP.py: -------------------------------------------------------------------------------- 1 | #Project on Spam/Ham Classification using NLP 2 | #Natural Language Processing- NLP 3 | '''NLP is a field concerned with the ability of a computer to understand, analyze, manipulate and potentially generate human language. 4 | NLP is a broad umbrella that encompasses many topics. Some of them are sentiment analysis, topic modelling, text classification etc. 5 | NLTK:- Natural Language ToolKit: The NLTK is the most utilized package for handling natural language processing tasks. It is an open source library.''' 6 | #Spam/Ham Classification using Natural Language Processing 7 | #pip install NLTK 8 | import nltk 9 | import pandas as pd 10 | import numpy as np 11 | dataset=pd.read_csv("SMSSpamCollection.tsv",sep="\t",header=None) 12 | dataset.columns=['label','body_txt'] 13 | dataset.head() 14 | dataset['body_txt'][0] 15 | dataset['body_txt'][1] 16 | #What is the shape of the data 17 | print("Input data has {} rows and {} columns".format(len(dataset),len(dataset.columns))) 18 | #How many Spam/Ham are there 19 | print("Out of {} rows,{} are spam and {} are ham".format(len(dataset),len(dataset[dataset['label']=='spam']),len(dataset[dataset['labal']=='ham']))) 20 | #How much missing data is there 21 | print("Number of null in label: {}".format(dataset['label'].isnull().sum())) 22 | print("Number of null in text: {}".format(dataset['body_text'].isnull().sum())) 23 | '''Preprocessing text data:- Cleaning up the text data is necessary to highlight attributes that you are going to use in ML algorithms. 24 | Cleaning or preprocessing the data consists of a number of steps. 25 | Remove Punctuation 26 | Tokenization 27 | Remove Stopwords 28 | Lemmatize/Stemming''' 29 | import string 30 | string.punctuation 31 | def remove_punct(text): 32 | text_nopunct="".join([char for char in text if char not in string.punctuation]) 33 | return text_nopunct 34 | dataset['body_txt_clean']=dataset['body_text'].apply(lambda x:remove_punct(x)) 35 | dataset.head() 36 | #Tokenization 37 | '''Tokenizing is splitting some string or sentence into a list of words''' 38 | import re 39 | def tokenize(text): 40 | tokens=re.split('\W',text) 41 | return tokens 42 | dataset['body_text_tokenized']=dataset['body_text_clean'].apply(lambda x:tokenize(x.lower())) 43 | dataset.head() 44 | '''Remove Stopwords:- These are commonly used words like the, and, but,if that don't contribute much to the meaning of a sentence.''' 45 | stopwords=nltk.corpus.stopwords.words('english') 46 | def remove_stopwords(tokenized_list): 47 | text=[word for word in tokenized_list if word not in stopwords] 48 | return text 49 | dataset['body_text_nostop']=dataset['body_text_tokenized'].apply(lambda x:remove_stopwords(x)) 50 | dataset.head() 51 | '''Stemming:- Stemming is the process of reducing inflected or derived words to their stem or root.''' 52 | ps=nltk.PorterStemmer() 53 | def stemming(tokenized_text): 54 | text=[ps.stem(word) for word in tokenized_text] 55 | return text 56 | dataset['body_text_stemmed']=dataset['body_text_nostop'].apply(lambda x:stemming(x)) 57 | dataset.head() 58 | '''Lemmatization:- It is the process of grouping together the inflected forms of a word so they can be analysed as a single term, identified by the word's lemma.For e.g. type, typing and typed are forms of the same lemma type.''' 59 | wn=nltk.WordNetLemmatizer() 60 | def lemmatizing(tokenized_text): 61 | text=[wn.lemmatize(word) for word in tokenized_text] 62 | return text 63 | dataset['body_text_lemmatized']=dataset['body_text_nostop'].apply(lambda x:lemmatizing(x)) 64 | dataset.head() 65 | '''Vectorization:- This is defined as the process of encoding text as integers to create feature vectors. In out ontext we will be taking individual text messages and converting it to a numeric vector that represents that text message. 66 | Count Vectorization:- This creates a document-term matrix where the entry of each cell will be a count of the number of times that word occured in that document.''' 67 | from sklearn.feature_extraction.text import CountVectorizer 68 | def clean_text(text): 69 | text="".join([word.lower() for word in text if word not in string.punctuation]) 70 | tokens=re.split('\W',text) 71 | text=[ps.stem(word) for word in tokens if word not in stopwords] 72 | return text 73 | count_vect=CountVectorizer(analyzer=clean_text) 74 | X_count=count_vect.fit_transform(dataset['body_text']) 75 | print(X_count.shape) 76 | #Apply count vectorizer to a smaller sample 77 | data_sample=dataset[0:20] 78 | count_vect_sample=CountVectorizer(analyzer=clean_text) 79 | X_count_sample=count_vect_sample.fit_transform(data_sample['body_text']) 80 | print(X_count_sample.shape) 81 | '''Sparse Matrix:- A matrix in which most entries are 0. In the interest of efficient storage, a sparse matrix will be stored by only storing the locations of the non-zero elements.''' 82 | print(X_count_sample) 83 | X_counts_df=pd.DataFrame(X_count_sample.toarray()) 84 | print(X_counts_df) 85 | '''TF-IDF(Term Frequency,Inverse Document Frequency):- Creates a document term matrix where the column represents single unique terms(unirams) but the cell represents a weighting meant to represent how important a word is to a document.''' 86 | from sklearn .feature_extraction.text import TfidfVectorizer 87 | tfidf_vect=TfidfVectorizer(analyzer=clean_text) 88 | X_tfidf=tfidf_vect.fit_transform(dataset['body_text']) 89 | print(X_tfidf.shape) 90 | #Apply TfidfVectorizer to a smaller sample 91 | data_sample=dataset[0:20] 92 | tfidf_vect_sample=TfidfVectorizer(analyzer=clean_text) 93 | X_tfidf_sample=tfidf_vect_sample.fit_transform(data_sample['body_text']) 94 | print(X_tfidf_sample.shape) 95 | X_tfidf_df=pd.DataFrame(X_tfidf_sample.toarray()) 96 | X_tfidf_df.columns=tfidf_vect_sample.get_feature_names() 97 | print(X_tfidf_df) 98 | 99 | #Feature Engineering: Feature Creation 100 | dataset=pd.read_csv("SMSSpamCollection.tsv",sep="\t",header=None) 101 | dataset.columns=['label','body_text'] 102 | dataset.head() 103 | #Create feature for text message length 104 | dataset['body_len']=dataset['body_text'].apply(lambda x:len(x)-x.count(" ")) 105 | dataset.head() 106 | #create feature for % of text that is punctuation 107 | def count_punct(text): 108 | count=sum([1 for char in text if char in string.punctuation]) 109 | return round(count/(len(text)-text.count(" ")),3)*100 110 | dataset['punct%']=dataset['body_text'].apply(lambda x:count_punct(x)) 111 | dataset.head() 112 | import matplotlib.pyplot as plt 113 | import numpy as np 114 | bins=np.linspace(0,200,40) 115 | plt.hist(dataset['body_len'],bins) 116 | plt.title('Body Length Distribution') 117 | plt.show() 118 | bins=np.linspace(0,50,40) 119 | plt.hist(dataset['punct%'],bins) 120 | plt.title('Punctuation % Distribution') 121 | plt.show() 122 | 123 | #Building Machine Learning Classifiers using Random Forest Model 124 | import nltk 125 | import pandas as pd 126 | import re 127 | from sklearn.feature_extraction.text import TfidfVectorizer 128 | import string 129 | dataset=pd.read_csv("SMSSpamCollection.tsv",sep="\t",header=None) 130 | dataset.columns=['label','body_text'] 131 | dataset.head() 132 | def count_punct(text): 133 | count=sum([1 for char in text if char in string.punctuation]) 134 | return round(count/(len(text)-text.count(" ")),3)*100 135 | dataset['punct%']=dataset['body_text'].apply(lambda x:count_punct(x)) 136 | dataset['body_len']=dataset['body_text'].apply(lambda x:len(x)-x.count(" ")) 137 | dataset.head() 138 | def clean_text(text): 139 | text="".join([word.lower() for word in text if word not in string.punctuation]) 140 | tokens=re.split('\W',text) 141 | text=[ps.stem(word) for word in tokens if word not in stopwords] 142 | return text 143 | tfidf_vect=TfidfVectorizer(analyzer=clean_text) 144 | X_tfidf=tfidf_vect.fit_transform(dataset['body_text']) 145 | X_feaures=pd.concat([datset['body_len'],dataset['punct%'],pd.DataFrame(X_tfidf.toarray())],axis=1) 146 | X_feaures.head() 147 | 148 | #Model using K-Fold cross validation 149 | from sklearn.ensemble import RandomForestClassifer 150 | from sklearn.model_selection import KFold, cross_val_score 151 | rf=RandomForestClassifier(n_jobs=1) 152 | k_fold=KFold(n_splits=5) 153 | cross_val_score(rf,X_features,dataset['label'],cv=k_fold,scoring='accuracy',n_jobs=1) 154 | 155 | #Model using Train Test Split 156 | from sklearn.metrics import precision_recall_fscore_support as score 157 | from sklearn.model_selection import train_test_split 158 | X_train, X_test, y_train, y_test=train_test_split(X_features, dataset['label'],test_size=0.3,random_state=0) 159 | rf=RandomForestClassifier(n_estimators=500,max_depth=20,n_jobs=-1) 160 | rf_model=rf.fit(X_train,y_train) 161 | sorted(zip(rf_model.feature_importances_,X_train.columns),reverse=True)[0:10] 162 | y_pred=rf_model.predict(X_test) 163 | precision,recall,fscore,support=score(y_test,y_pred,pos_label='spam',average='binary') 164 | print('Precision {} / Recall {} /Acccuracy {}'.format(round(precision,3),round(recall,3),round((y_pred==y_test).sum()/len(y_pred),3))) 165 | -------------------------------------------------------------------------------- /project5onRecommendation.py: -------------------------------------------------------------------------------- 1 | #Movie Recommender System 2 | '''Recommender Systems, also labelled as recommendation systems, are statistical algorithms that recommend products to users based on similarities between the buying trends of various user or similarities between the products. 3 | 4 | Collaborative Filtering:- The process used to calculate similaritiies between the buying trends of various users or similarities between products is called collaborative filtering. 5 | 6 | User based collaborative filtering:- If two user X and Y, like products A and B and there is another user Z who likes product A, then the product B will also be recommended to user Z. 7 | 8 | Item-based collaborative filtering:- Inthis products are recommended based on similarities between themselves. For instance if a user likes product A and product A has properties X and Y will be recommended to the user.''' 9 | 10 | import numpy as np 11 | import pandas as pd 12 | import matplotlib.pyplot as plt 13 | import seaborn as sns 14 | '''The dataset contains around 100,000 movie reviews applied to 9,000 movies by 600 users.''' 15 | movie_ids_titles=pd.read_csv("movies.csv") 16 | movie_ids_titles.head() 17 | movie_ids_titles.shape 18 | movie_ids_ratings=pd.read_csv("ratings.csv") 19 | movie_ids_ratings.head() 20 | movie_ids_ratings.shape 21 | '''Data Preprocessing:- We need a dataframe that consists of userId, movieId, title and ratings''' 22 | movie_ids_titles.drop(['genres'],inplace=True,axis=1) 23 | movie_ids_titles.head() 24 | movie_ids_ratings.drop(["timestamp"],inplace=True,axis=1) 25 | movie_ids_ratings.head() 26 | merged_movie_df=pd.merge(movie_ids_ratings,movie_ids_titles,on='movieId') 27 | merged_movie_df.head() 28 | '''Data Visualization:- Let's first group the dataset by title and see what information we can get regarding the ratings of movies.''' 29 | merged_movie_df.groupby('title').describe() 30 | merged_movie_df.groupby('title')['rating'].mean().head() 31 | '''Let's sort the movie titles by the descending order of the average user ratings''' 32 | merged_movie_df.groupby('title')['rating'].mean().sort_values(ascending=False).head() 33 | '''Let's now print the movies in the descending order of their rating counts''' 34 | merged_movie_df.groupby('title')['rating'].count().sort_values(ascending=False).head() 35 | '''A movie which is rated by large number of people is usually a good movie. 36 | Let's create a dataframe that shows the title, mean rating and the rating counts.''' 37 | movie_rating_mean_count=pd.DataFrame(columns=['rating_mean','rating_count']) 38 | movie_rating_mean_count["rating_mean"]=merged_movie_df.groupby('title')['rating'].mean() 39 | movie_rating_mean_count["rating_count"]=merged_movie_df.groupby('title')['rating'].count() 40 | movie_rating_mean_count.head() 41 | '''The above dataframe contains movie title, average rating (ratings mean) and the number of rating_counts 42 | We will plot a histogram to see how the average ratings are distributed. ''' 43 | plt.figure(figsize=(10,8)) 44 | sns.set_style("darkgrid") 45 | movie_rating_mean_count['rating_mean'].hist(bins=30,color='purple') 46 | #Distribution for rating counts 47 | plt.figure(figsize=(10,8)) 48 | sns.set_style("darkgrid") 49 | movie_rating_mean_count['rating_count'].hist(bins=33,color='green') 50 | '''There are around 7000 movies with less than 10 rating counts. The number of movies decrease with an increase in ratings counts. Movies with more than 50 rating are very few. 51 | It is also interesting to see the relationship between mean ratings and rating counts of a movie.''' 52 | plt.figure(figsize=(10,8)) 53 | sns.set_style("darkgrid") 54 | sns.regplot(x="rating_mean",y="rating_count",data=movie_rating_mean_count,color="brown") 55 | '''From the above graph in the top right portion,you can see that the movies with a higher number of rating counts tend to have higher mean ratings as well. 56 | Let's sort our dataset by rating counts and see the average ratings of the movies with the top 5 highest number of ratings.''' 57 | movie_rating_mean_count.sort_values("rating_count",ascending=False).head() 58 | 59 | #Item Based Collaborative Filtering 60 | '''In item based collaborative filtering, products are recommended based on common characteristics. 61 | The first step is to create a dataframe where each movie is represented by a column and rows contain user ratings for movies.''' 62 | user_movie_rating_matrix=merged_movie_df.pivot_table(index="userId",columns="title",values="rating") 63 | print(user_movie_rating_matrix) 64 | user_movie_rating_matrix.shape 65 | '''The Dataset contains 610 unique users and 9719 unique movies. 66 | Now we will find the movie recommendation based on a single movie and then based on multiple movies. 67 | Finding recommendations based on a single movie. Suppose we want to find the recommendation based on the movie Pulp Fiction. 68 | First we will filter the column that contains the user ratings for the movie.''' 69 | pulp_fiction_ratings=user_movie_rating_matrix["Pulp Fiction (1994)"] 70 | '''Next, we will find the correlation between the user ratings of all the movies and the user ratings for the movie pulp fiction''' 71 | pulp_fiction_correlations=pd.DataFrame(user_movie_rating_matrix.corrwith(pulp_fiction_ratings,columns=["pf_corr"]) 72 | pulp_fiction_correlations.sort_values("pf_corr",ascending=False).head(5) 73 | '''Correlation itself is not giving meaningful results, one solution to this problem can be that in addition to the correlation between the movies, we also use rating counts, for the correlated movie as a criteria for finding the best revommendation.''' 74 | pulp_fiction_correlations=pulp_fiction_correlations.join(movie_rating_mean_count["rating_count"]) 75 | pulp_fiction_correlations.head() 76 | '''The pf_corr column contains some NaN values. This is because there can be movies that are rated by users who did not rate Pulp Fiction (1994). In such cases, correlation will be null. 77 | We will remove all the movies with null correlation with Pulp Fiction (1994).''' 78 | pulp_fiction_correlations.dropna(inplace=True) 79 | pulp_fiction_correlations.sort_values("pf_corr",ascending=False).head() 80 | '''A better way is to find the movies with the rating counts of atleast 50 and having the highest correlation with Pulp Fiction (1994).''' 81 | pulp_fiction_correlations_50=pulp_fiction_correlations[pulp_fiction_correlations['rating_count']>50] 82 | pulp_fiction_correlation_50.sort_values("pf_corr",ascending=False).head() 83 | '''Finding the recommendation based on multiple movies. The first step is to create a dataframe, which contains a correlation between all the movies in our dataset in the form of a matrix.''' 84 | all_movie_correlations=user_movie_rating_matrix.corr(method="pearson",min_periods=50) 85 | all_movie_correlations.head() 86 | '''Now suppose a new user logs into the website. The user has already watched three movies and has given ratings to those movies.''' 87 | movie_data=[['Forrest Gump (1994)',4.0],['Fight Club (1999)',3.5],['Interstellar (2014)',4.0]] 88 | test_movies=pd.DataFrame(movie_data,columns=['Movie_Name','Movie_Rating']) 89 | test_movies.head() 90 | '''We will be recommending movies from our dataset based on the ratings by a new user for these three movies.''' 91 | print(test_movies['Movie_Name'][0]) 92 | print(test_movies['Movie_Rating'][0]) 93 | '''From all_movie_correlations dataframe, let's obtain correlation values for the movies related to Forrest Gump (1994)''' 94 | all_movie_correlations['Forrest Gump (1994)'].dropna() 95 | '''Next, we will iterate through the three movies in the test_movies dataframe, find the correlated movies, and then multiply the correlation of all the correlated movies with the ratings of the input movie. 96 | The correlated movies, along with the weighted correlation are appended to an empty series named recommended movies.''' 97 | recommended_movies=pd.Series() 98 | for i in range(0,2): 99 | movie=all_movie_correlations[test_movies['Movie_Name'][i]].dropna() 100 | movie=movie.map(lambda movie_corr:movie_corr*test_movies["Movie_Rating"][i]) 101 | recommended_movies=recommended_movies.append(movie) 102 | print(recommended_movies) 103 | '''To get a final recommendation, you can sort the movies in the descending order of the weighted correlation''' 104 | recommended_movies.sort_values(inplace=True,ascending=False) 105 | print(recommended_movies.head(10)) 106 | -------------------------------------------------------------------------------- /project6onImageClassification.py: -------------------------------------------------------------------------------- 1 | #Project on Image Classification/Recognition using CNN on CIFAR-10 Dataset 2 | '''In this project we will be using CIFAR-10 dataset. This dataset includes thousands of pictures of 10 different kinds of objects like airplanes, automobiles, birds and so on. 3 | Each image in the dataset includes a matching label so we know what kind of image it is. 4 | The images in the CIFAR-10 dataset are only 32x32 pixels.''' 5 | import keras 6 | from keras.datasets import cifar10 7 | from keras.models import Sequential 8 | from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D 9 | from pathlib import Path 10 | from tensorflow.keras.utils import to_categorical 11 | #Load the dataset 12 | (X_train,y_train),(X_test,y_test)=cifar10.load_data() 13 | #Normalize the data 14 | X_train=X_train.astype('float32') 15 | X_test=X_test.astype('float32') 16 | X_train/=255.0 17 | X_test/=255.0 18 | #Convert class vectors to binary class matrices 19 | y_train=to_categorical(y_train,10) 20 | y_test=to_categorical(y_test,10) 21 | model=Sequential() 22 | model.add(Conv2D(32,(3,3),padding='same',input_shape=(32,32,3),activation='relu')) 23 | model.add(Conv2D(32,(3,3),activation='relu')) 24 | model.add(MaxPooling2D(pool_size=(2,2))) 25 | model.add(Dropout(0.25)) 26 | 27 | model.add(Conv2D(64,(3,3),padding='same',activation='relu')) 28 | model.add(Conv2D(32,(3,3),activation='relu')) 29 | model.add(MaxPooling2D(pool_size=(2,2))) 30 | model.add(Dropout(0.25)) 31 | 32 | model.add(Flatten()) 33 | model.add(Dense(512,activation='relu')) 34 | model.add(Dropout(0.5)) 35 | model.add(Dense(10,activation='softmax')) 36 | 37 | #Compile the model 38 | model.compile( 39 | loss='categorical_crossentropy', 40 | optimizer='adam', 41 | metrics=['accuracy']) 42 | model.summary() 43 | 44 | #Train the model 45 | model.fit( 46 | X_train, 47 | y_train, 48 | batch_size=32, 49 | epochs=25, 50 | validation_data=(X_test,y_test), 51 | shuffle=True) 52 | 53 | #Save the neural network architecture 54 | model_structure=model.to_json() 55 | f=Path("model_structure.json") 56 | f.write_text(model_structure) 57 | 58 | #Save the trained neural network weights 59 | model.save_weights("model_weight.h5") 60 | 61 | #Making Predictions on the images 62 | from keras.models import model_from_json 63 | from pathlib import Path 64 | from keras.preprocessing import image 65 | import numpy as np 66 | class_labels=["Planes","car","Bird","Cat","Deer","Dog","Frog","Horse","Boat","Truck"] 67 | #load the json file that contains the model structure 68 | f=Path("model_structure.json") 69 | model_structure=f.read_text() 70 | #Recreate the keras model object from the json data 71 | model=model_from_json(model_structure) 72 | #Load an image file to test 73 | import matplotlib.pyplot as plt 74 | from tensorflow.keras.utils import load_img,img_to_array 75 | img=load_img("dog.png",target_size=(32,32)) 76 | plt.imshow(img) 77 | #Convert the image to a numpy array 78 | from tensorflow.keras.utils import img_to_array 79 | image_to_test=img_to_array(img) 80 | list_of_images=np.expand_dims(image_to_test,axis=0) 81 | #make predictions using the model 82 | results=model.predict(list_of_images) 83 | #since we are only testing one image, we only need to check the first result 84 | single_result=results[0] 85 | #We will get a likelihood score for all 10 possible classes.Find out which class has the highest score 86 | most_likely_class_index=int(np.argmax(single_result)) 87 | class_likelihood=single_result[most_likely_class_index] 88 | #Print the result 89 | print("This is a image of a {} likelihood:{:2f}".format(class_label,class_likelihood)) -------------------------------------------------------------------------------- /project7onNLPandChatbot.py: -------------------------------------------------------------------------------- 1 | #Project on Sentiment Analysis using NLP and Chatbot using NLP 2 | #Sentiment Classification using NLP and Classification Algorithm 3 | '''Sentiment Analysis is a means to identify the view or emotion behind a situation. 4 | It basically means to analyze and find the emotion or intent behind a piece of text or speech or any model of communication. 5 | This burger has a very bad taste- negative review 6 | I ordered this pizza today- neutral sentiment/review 7 | I love this cheese sandwich, its so delicious- positive review''' 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | import seaborn as sns 11 | import re 12 | 13 | import nltk 14 | from nltk.corpus import stopwords 15 | from nltk.stem import WordNetLemmatizer 16 | 17 | from sklearn.feature_extraction.text import CountVectorizer 18 | from sklearn.model_selection import GridSearchCV 19 | from sklearn.ensemble import RandomForestClassifier 20 | 21 | from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_score,roc_curve 22 | from sklearn.metrics import classification_report, plot_confusion_matrix 23 | 24 | df_train=pd.read_csv("train.txt",delimiter=";",names=['text','label']) 25 | df_val=pd.read_csv("val.txt",delimiter=";",names=['text','label']) 26 | 27 | df=pd.concat([df_train,df_val]) 28 | df.reset_index(inplace=True,drop=True) 29 | print("Shape of the dataframe:",df.shape) 30 | df.sample(5) 31 | 32 | import warnings 33 | warnings.filterwarnings("ignore") 34 | sns.countplot(df.label) 35 | 36 | '''Positive Sentiment- joy,love,surprise 37 | Negative Sentiment- anger,sadness,fear 38 | Now we will create a custom encoder to convert categorical target labels to numerical i.e. 0 and 1''' 39 | 40 | def custom_encoder(df): 41 | df.replace(to_replace="surprise",value=1,inplace=True) 42 | df.replace(to_replace="love",value=1,inplace=True) 43 | df.replace(to_replace="joy",value=1,inplace=True) 44 | df.replace(to_replace="fear",value=0,inplace=True) 45 | df.replace(to_replace="anger",value=0,inplace=True) 46 | df.replace(to_replace="sadness",value=0,inplace=True) 47 | custom_encoder(df['label']) 48 | sns.countplot(df.label) 49 | '''Preprocessing Steps:- 50 | Get rid of any characters apart from alphabets 51 | Convert the string to lowercase because Python is case-sensitive 52 | 3 check and remove the stopwords 53 | Perform Lemmatization''' 54 | 55 | lm=WordNetLemmatizer() 56 | def text_transformation(df_col): 57 | corpus=[] 58 | for item in df_col: 59 | new_item=re.sub('[^a-zA-Z]',' ',str(item)) 60 | new_item=new_item.lower() 61 | new_item=new_item.split() 62 | new_item=[lm.lemmatize(word) for word in new_item if word not in set(stopwords.words('english'))] 63 | corpus.append(' '.join(str(x) for x in new_item)) 64 | return corpus 65 | corpus=text_transformation(df['text']) 66 | cv=CountVectorizer(ngram_range=(1,2)) 67 | traindata=cv.fit_transform(corpus) 68 | x=traindata 69 | y=df.label 70 | '''Now we will fit the data into grid search and view the best parameters using the best_params attribute''' 71 | parameters={'max_features':('auto','sqrt'),'n_estimators':[5,10],'max_depth':[10,None],'min_samples_leaf':[5],'min_samples_leaf':[1],'bootstrap':[True]} 72 | grid_search=GridSearchCV(RandomForestClassifier(),parameters,cv=5,return_train_score=True,n_jobs=-1) 73 | grid_search.fit(x,y) 74 | grid_search.best_params_ 75 | '''We can view all the models and their respective parameters,mean test score and rank as GridSearch CV''' 76 | for i in range(8): 77 | print('Parameters:',grid_search.cv_results_['params'][i]) 78 | print('Mean test Score:',grid_search.cv_results_[mean_test_score'][i]) 79 | print("Rank:",grid_search.cv_results_['rank_test_score']) 80 | 81 | '''Now we will choose the best parameter obtained from GridSearchCV and create a final random forest classifier model and then train our model.''' 82 | 83 | rfc=RandomForestClassifier(max_features=grid_search.best_params_['max_features'],max_depth=grid_search.best_params_['max_depth'],n_estimators=grid_search.best_params_['n_estimators'],min_samples_split=grid_search.best_params_['min_samples_split'],min_samples_leaf=grid_search.best_params_['min_samples_leaf'],bootstrap=grid_search.best_params_['bootstrap']) 84 | rfc.fit(x,y) 85 | 86 | #Test Data Transformation 87 | test_df=pd.read_csv('test.txt',delimiter=';',names=['text','label']) 88 | X_test,y_test=test_df.text,test_df.label 89 | #encode the labels into two classes 0 and 1 90 | test_df=custom_encoder(y_test) 91 | #preprocessing of text 92 | test_corpus=text_transformation(X_test) 93 | #convert the text data into vectors 94 | testdata=cv.transform(test_corpus) 95 | #predict the target 96 | predictions=rfc.predict(testdata) 97 | 98 | #Model Evaluation 99 | '''We will evaluate our model using various metrics such as accuracy score, recall score confusion matrix.''' 100 | 101 | acc_score=accuracy_score(y_test,predictions) 102 | pre_score=precision_score(y_test,predictions) 103 | rec_score=recall_score(y_test,predictions) 104 | print('Accuracy Score:',acc_score) 105 | print('Precision Score:',pre_score) 106 | print('Recall Score:',rec_score) 107 | print("-"*50) 108 | cr=classification_report(y_test,predictions) 109 | print(cr) 110 | '''ROC Curve- We will plot probability of the class using the predict_proba() method of random forest classifier 111 | and then we will plot the curve.''' 112 | predictions_probability=rfc.predict_proba(testdata) 113 | fpr,tpr,thresfolds=roc_curve(y_test,predictions_probability[:,1]) 114 | plt.plot(fpr,tpr) 115 | plt.plot([0,1]) 116 | plt.title('ROC Curve') 117 | plt.xlabel('False Positive Rate') 118 | plt.ylabel('True Positive Rate') 119 | plt.show() 120 | '''As we can see that our model performed very well in classifying the sentiments, with an accuracy score, precision score and recall score of approx 96% 121 | Now we will check for custom input as well and let our model identity the sentiment of the input statement.''' 122 | 123 | def expression_check(prediction_input): 124 | if prediction_input==0: 125 | print("Input statement has negative sentiment") 126 | elif prediction_input==1: 127 | print("Input statement has positive sentiment") 128 | else: 129 | print("Invalid Statement") 130 | '''Function to take the input statement and performs the same transformation as we did earlier''' 131 | def sentiment_predictor(input): 132 | input=text_transformation(input) 133 | transformed_input=cv.transform(input) 134 | predictions=rfc.predict(transformed_input) 135 | expression_check(prediction) 136 | input1=["Sometimes I just don't want to go out"] 137 | input2=["I bought a new phone and it's so good"] 138 | sentiment_predictor(input1) 139 | sentiment_predictor(input2) 140 | '''Input statement has negative sentiment 141 | Input statement has positive statement''' 142 | 143 | #Chatbot using NLP and Neural Networks in Python 144 | '''Tag means classes 145 | Patterns means what user is going to ask 146 | Response is chatbot reponse''' 147 | data={"intents":[{"tag":"greetings","patterns":["Hello","How are you?","Hi There","Hi", "What's up"],"responses":["Howdy Partner!","Hello","How are you doing?","Greetings!","How do you do"]},{"tag":"age","patterns":["how old are you","when is your birthday","when was you born"],"responses":["I am 24 years old","I was born in 1966","My birthday is July 3rd and I was born in 1996","03/07/1996"]},{"tag":"date","patterns":["what are you doing this weekend","do you want to hangout sometime?","what are your plans for this week"],"responses":["I am available this week","I don't have any plans","I am not busy"]},{"tag":"name","patterns":["what's your name","what are you called","who are you"],"responses":["My name is Kippi","I'm Kippi","Kippi"]},{"tag":"goodbye","patterns":["bye","g2g","see ya","adios","cya"],"responses":["It was nice speaking to you","See you later","Speak Soon"]},]} 148 | '''For each tag we created, we would specify patterns. Essentially this defines the different ways of how a user may pose a query to the chatbot. 149 | The chatbot would then take these patterns and use them as training data to determine what someone is asking and the chatbot reponse would be relevant to that question.''' 150 | import json 151 | import string 152 | import random 153 | import nltk 154 | import numpy as np 155 | from nltk.stem import WordNetLemmatizer 156 | import tensorflow as tf 157 | from tensorflow.keras import Sequential 158 | from tensorflow.keras.layers import Dense,Dropout 159 | nltk.download("punkt") 160 | nltk.download("wordnet") 161 | '''In order to create our training data below steps to be followed 162 | Create a vocabulary of all the words used in the patterns 163 | Create a list of the classes-tag of each intent 164 | Create a list of all the patterns within the intents file 165 | Create a list of all the associated tags to go with each patterns in the intents file. 166 | Initializing lemmatizer to get stem of words''' 167 | lemmatizer=OrdNetLemmatizer() 168 | words=[] 169 | classes=[] 170 | doc_x=[] 171 | doc_y=[] 172 | '''Loop through all the intents 173 | Tokenize each pattern and append token to words, the patterns and the associated tag to their associated list''' 174 | for intent in data["intents"]: 175 | for pattern in intent["patterns"]: 176 | tokens=nltk.word_tokenize(pattern) 177 | words.extend(tokens) 178 | doc_x.append(pattern) 179 | doc_y.append(intent["tag"]) 180 | if intent["tag"] not in classes: 181 | classes.append(intent["tag"]) 182 | #Lemmatize all the words in the vocab and convert them to lowercase 183 | words=[lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation] 184 | '''Sorting the vocab and classes in alphabetical order and taking the set to ensure no duplicates occur''' 185 | words=sorted(set(words) 186 | classes=sorted(set(classes)) 187 | print(words) 188 | print(classes) 189 | print(doc_x) 190 | print(doc_y) 191 | #List for training data 192 | training=[] 193 | out_empty=[0]*len(classes) 194 | #creating a bag of words model 195 | for idx,doc in enumerate(doc_x): 196 | bow=[] 197 | text=lemmmatizer.lemmatize(doc.lower()) 198 | for word in words: 199 | bow.append(1) if word in text else bow.append(0) 200 | output_row=list(out_empty) 201 | output_row[classes.index(doc_y[idx])]=1 202 | training.append([bow,output_row]) 203 | random.shuffle(training) 204 | training=np.array(training,dtype=object) 205 | train_X=np.array(list(training[:,0])) 206 | train_y=np.array(list(training[:,1])) 207 | '''The model will look at the features and predict the tag associated with the features and then will select an appropriate message/response from the tag.''' 208 | input_shape=(len(train_X[0]),) 209 | output_shape=len(train_y[0]) 210 | epochs=500 211 | from tensorflow.keras.models import Sequential 212 | from tensorflow.keras.layers import Dense,Dropout 213 | #Create a Sequential model 214 | model=Sequential() 215 | model.add(Dense(128,input_shape=input_shape,activation='relu')) 216 | model.add(Dropout(0.5)) 217 | model.add(Dense(64,activation='relu')) 218 | model.add(Dropout(0.3)) 219 | model.add(Dense(output_shape,activation='softmax')) 220 | #Create the Adam optimizer with a specified learning rate 221 | adam=tf.keras.optimizers.Adam(learning_rate=0.01) 222 | #compile the model using the Adam optimizer 223 | model.compile(loss='categorical_crossentropy',optimizer=adam,metrics=['accuracy']) 224 | print(model.summary()) 225 | model.fit(x=train_X,y=train_y,epochs=500,verbose=1) 226 | def clean_text(text): 227 | tokens=nltk.word_tokenize(text) 228 | tokens=[lemmatizer.lemmatize(word) for word in tokens] 229 | return tokens 230 | def bag_of_words(text,vocab): 231 | tokens=clean_text(text) 232 | bow=[0]*len(vocab) 233 | for w in tokens: 234 | for idx,word in enumerate(vocab): 235 | if word==w: 236 | bow[idx]=1 237 | return np.array(bow) 238 | def pred_class(text,vocab,labels): 239 | bow=bag_of_words(text,vocab) 240 | result=model.predict(np.array([bow]))[0] 241 | thresh=0.2 242 | y_pred=[[idx,res] for idx,res in enumerate(result) if res>thresh] 243 | y_pred.sort(key=lambda x:x[1],reverse=True) 244 | return_list=[] 245 | for r in y_pred: 246 | return_list.append(labels[r[0]]) 247 | return return_list 248 | def get_response(intents_list,intent_json): 249 | tag=intents_list[0] 250 | list_of_intents=intents_json["intents"] 251 | for i in list_of_intents: 252 | if i["tag"]==tag: 253 | result=random.choice(i["responses"]) 254 | break 255 | return result 256 | #Running the chatbot 257 | while True: 258 | message=input("") 259 | intents=pred_class(message,words,classes) 260 | result=get_response(intents,data) 261 | print(result) -------------------------------------------------------------------------------- /pyjokes.py: -------------------------------------------------------------------------------- 1 | import pyjokes 2 | 3 | print("Printing Jokes...") 4 | 5 | # This prints a random joke 6 | joke = pyjokes.get_joke() 7 | print(joke) -------------------------------------------------------------------------------- /pyramid.py: -------------------------------------------------------------------------------- 1 | ''' 2 | For n = 3 3 | * 4 | *** 5 | ***** 6 | 7 | For n = 5 8 | * 9 | *** 10 | ***** 11 | ******** 12 | ********** 13 | 14 | ''' 15 | 16 | n = int(input("Enter the number: ")) 17 | for i in range(1, n+1): 18 | print(" "* (n-i), end="") 19 | print("*"* (2*i-1), end="") 20 | print("") 21 | -------------------------------------------------------------------------------- /queueUsingList.py: -------------------------------------------------------------------------------- 1 | il=[] 2 | while True: 3 | c=int(input(''' 4 | 1 Enqueue 5 | 2 Dequeue 6 | 3 Front Elements 7 | 4 Rear Elements 8 | 5 Display Elements 9 | 6 Exit 10 | ''')) 11 | if c==1: 12 | n=input("Enter The Value:") 13 | l.append(n) 14 | print(l) 15 | elif c==2: 16 | if len(l)==0: 17 | print("Empty Queue") 18 | else: 19 | del l[0] 20 | print(l) 21 | elif c==3: 22 | if len(l)==0: 23 | print("Empty Queue") 24 | else: 25 | print("Front Queue Value=>",l[0]) 26 | elif c==4: 27 | if len(l)==0: 28 | print("Empty Queue") 29 | else: 30 | print("Rear Queue Value=>",l[-1]) 31 | elif c==5: 32 | print("Display Queue=>",l) 33 | elif c==6: 34 | break 35 | else: 36 | print("Invalid Operation") 37 | -------------------------------------------------------------------------------- /randomModule.py: -------------------------------------------------------------------------------- 1 | from random import randint 2 | 3 | class Train: 4 | 5 | def __init__(self, trainNo): 6 | self.trainNo = trainNo 7 | 8 | def book(self, fro, to): 9 | print(f"Ticket is booked in train no: {self.trainNo} from {fro} to {to}") 10 | 11 | def getStatus(self): 12 | print(f"Train no: {self.trainNo} is running on time") 13 | 14 | def getFare(self, fro, to): 15 | print(f"Ticket fare in train no: {self.trainNo} from {fro} to {to} is {randint(222, 5555)}") 16 | 17 | 18 | t = Train(12399) 19 | t.book("Rampur", "Delhi") 20 | t.getStatus() 21 | t.getFare("Rampur", "Delhi") 22 | -------------------------------------------------------------------------------- /replace.py: -------------------------------------------------------------------------------- 1 | name = "Hariram is a good boy and Geeta is a good girl." 2 | 3 | print(name.replace(" ", " ")) 4 | print(name) # Strings are immutable which means that you cannot change them by running functions on them 5 | -------------------------------------------------------------------------------- /rockPaperScissor.py: -------------------------------------------------------------------------------- 1 | import random 2 | l=["rock","scissor","paper"] 3 | ''' 4 | rock vs paper => paper wins 5 | rock vs scissor => rock wins 6 | paper vs scissor => scissor wins 7 | 8 | ''' 9 | while True: 10 | ocount=0 11 | ucount=0 12 | uc=int(input(''' 13 | Game Start..... 14 | 1 Yes 15 | 2 No | Exit 16 | ''')) 17 | if uc==1: 18 | for a in range(1,6): 19 | userInput= int(input(''' 20 | 1 Rock 21 | 2 Scissor 22 | 3 Paper 23 | ''' )) 24 | if userInput==1: 25 | uchoice="rock" 26 | elif userInput==2: 27 | uchoice="scissor" 28 | elif userInput==3: 29 | uchoice="paper" 30 | Ochoice=random.choice(l) 31 | if Ochoice==uchoice: 32 | print("Opponent choice",Ochoice) 33 | print("User choice",uchoice) 34 | print("Game Draw") 35 | ucount=ucount+1 36 | ocount=ocount+1 37 | elif(uchoice=="rock" and Ochoice=="scissor") or (uchoice=="paper" and Ochoice=="rock") or (uchoice=="scissor" and Ochoice=="paper"): 38 | print("Opponent Choice",Ochoice) 39 | print("User choice",uchoice) 40 | print("You Win") 41 | ucount=ucount+1 42 | else: 43 | print("Opponent Choice",Ochoice) 44 | print("User choice",uchoice) 45 | print("Opponent Win") 46 | ocount=ocount+1 47 | if ucount==ocount: 48 | print("Final Game Draw....." ) 49 | print("User Score",ucount ) 50 | print("Opponent Score",ocount ) 51 | elif ucount>ocount: 52 | print("Final You Win The Game....." ) 53 | print("User Score",ucount ) 54 | print("Opponent Score",ocount ) 55 | else: 56 | print("Final Opponent Win The Game....." ) 57 | print("User Score",ucount ) 58 | print("Opponent Score",ocount ) 59 | else: 60 | break 61 | -------------------------------------------------------------------------------- /sets.py: -------------------------------------------------------------------------------- 1 | s={10,20,30,40} 2 | print(s) 3 | for a in s: 4 | print(a) 5 | l=[10,20,30,40] 6 | s=set(l) 7 | print(s) 8 | s={10,20,30,40,50} 9 | s.remove(20) 10 | print(s) 11 | s.discard(50) 12 | print(s) 13 | print(s.pop()) 14 | print(s) 15 | s.clear() 16 | print(s) 17 | l=[10,80,90] 18 | s={10,20,30,40,50} 19 | s.add(60) 20 | print(s) 21 | s.update(l) 22 | print(s) 23 | -------------------------------------------------------------------------------- /slicing_concat.py: -------------------------------------------------------------------------------- 1 | import pandas as pd # type: ignore 2 | 3 | # Initializing the nested list with Data set 4 | player_list = [['M.S.Dhoni', 36, 75, 5428000], 5 | ['A.B.D Villers', 38, 74, 3428000], 6 | ['V.Kohli', 31, 70, 8428000], 7 | ['S.Smith', 34, 80, 4428000], 8 | ['C.Gayle', 40, 100, 4528000], 9 | ['J.Root', 33, 72, 7028000], 10 | ['K.Peterson', 42, 85, 2528000]] 11 | 12 | 13 | # creating a pandas dataframe 14 | df = pd.DataFrame(player_list, columns=['Name', 'Age', 'Weight', 'Salary']) 15 | df # data frame before slicing 16 | -------------------------------------------------------------------------------- /sorting.py: -------------------------------------------------------------------------------- 1 | list1 = [(1,2),(3,3),(1,1)] 2 | list1.sort() 3 | print(list1) 4 | list1.sort(reverse=True) 5 | print(list1) 6 | # Original list of strings 7 | words = ["apple", "banana", "kiwi", "orange", "grape"] 8 | words.sort() 9 | print("Sorted in alphabetical order:",words) 10 | # Sorting by length using the len() function as the key 11 | words.sort(key=len) 12 | # Displaying the sorted list 13 | print("Sorted by Length:", words) 14 | # Original list of tuples 15 | people = [("Alice", 25), ("Bob", 30), ("Charlie", 22), ("David", 28)] 16 | # Sorting by the second element of each tuple (age) 17 | people.sort(key=lambda x: x[1]) 18 | # Displaying the sorted list 19 | print("Sorted by Age in tuple:", people) 20 | # Original list of dictionaries 21 | students = [ 22 | {"name": "Alice", "age": 25}, 23 | {"name": "Bob", "age": 30}, 24 | {"name": "Charlie", "age": 22}, 25 | {"name": "David", "age": 28}] 26 | # Sorting by the 'age' key in each dictionary 27 | students.sort(key=lambda x: x["age"]) 28 | # Displaying the sorted list 29 | print("Sorted by Age in dictionary:", students) -------------------------------------------------------------------------------- /stackUsingList.py: -------------------------------------------------------------------------------- 1 | l=[] 2 | while True: 3 | c=int(input(''' 4 | 1 Push Elements 5 | 2 Pop Elements 6 | 3 Peek Elements 7 | 4 Display Elements 8 | 5 Exit 9 | ''')) 10 | if c==1: 11 | n=input("Enter The Value:") 12 | l.append(n) 13 | print(l) 14 | elif c==2: 15 | if len(l)==0: 16 | print("Empty Stack") 17 | else: 18 | p=l.pop() 19 | print(p) 20 | print(l) 21 | elif c==3: 22 | if len(l)==0: 23 | print("Empty Stack") 24 | else: 25 | print("Last Stack Value=>",l[-1]) 26 | elif c==4: 27 | print("Display Stack=>",l) 28 | elif c==5: 29 | break 30 | else: 31 | print("Invalid Operation") 32 | -------------------------------------------------------------------------------- /startswith.py: -------------------------------------------------------------------------------- 1 | l = ["Ram", "Soham", "Sachin", "Rahul"] 2 | 3 | for name in l: 4 | if(name.startswith("S")): 5 | print(f"Hello {name}") 6 | -------------------------------------------------------------------------------- /staticMethodINclass.py: -------------------------------------------------------------------------------- 1 | class Calculator: 2 | def __init__(self, n): 3 | self.n = n 4 | 5 | def square(self): 6 | print(f"The square is {self.n*self.n}") 7 | 8 | def cube(self): 9 | print(f"The cube is {self.n*self.n*self.n}") 10 | 11 | def squareroot(self): 12 | print(f"The squareroot is {self.n**1/2}") 13 | 14 | @staticmethod 15 | def hello(): 16 | print("Hello there!") 17 | 18 | a = Calculator(4) 19 | a.hello() 20 | a.square() 21 | a.cube() 22 | a.squareroot() 23 | -------------------------------------------------------------------------------- /statistics.py: -------------------------------------------------------------------------------- 1 | '''Write a program to compute summary statistics such as mean, median, mode, standard 2 | deviationand variance of the given different types of data.''' # type: ignore 3 | import numpy as np 4 | a=np.array([[1,23,78],[98,60,75],[79,25,48]]) 5 | print("Entered array:",a) 6 | #Minimum function 7 | print("Minimum=",np.amin(a)) 8 | #Maximum Function 9 | print("Maximum=",np.amax(a)) 10 | #Mean Function 11 | print("Mean=",np.mean(a)) 12 | #Median Function 13 | print("Median=",np.median(a)) 14 | #std Function 15 | print("Standard Deviation=",np.std(a)) 16 | #var Function 17 | print("Variance=",np.var(a)) -------------------------------------------------------------------------------- /stringFormatting.py: -------------------------------------------------------------------------------- 1 | #String Formating 2 | #named indexes: 3 | txt1="Welcome to {fname} {lname}".format(fname="AI",lname="World !!!") 4 | #numbered indexes: 5 | txt2="Welcome to {0} {1}".format("AI","World !!!") 6 | #empty placeholders 7 | txt3="Welcome to {} {}".format("AI","World !!!") 8 | txt4="Welcome to {b:10} {a}".format(a="AI",b="World !!!") 9 | ''' ^ ---- use it for center 10 | < ---- use it for left align 11 | > ---- use it for right align''' 12 | txt5="Welcome to {a:^10} {b}".format(a="AI",b="World !!!") 13 | print(txt1) 14 | print(txt2) 15 | print(txt3) 16 | print(txt4) 17 | print(txt5) 18 | -------------------------------------------------------------------------------- /stringManipulations.py: -------------------------------------------------------------------------------- 1 | #data aggregation & grouping operations, Visualisation using Matplotlib 2 | #String Manipulations 3 | val="a,b,,guido" 4 | val.split(",") 5 | pieces=[x.strip() for x in val.split(",") 6 | print(pieces) 7 | first,second,third=pieces 8 | first+"::"+second+"::"+third 9 | "::".join(pieces) 10 | 11 | #Data Wrangling 12 | data=pd.Series(np.random.uniform(size=9),index=[["a","a","a","b","b","c","c","d","d"],[1,2,3,1,3,1,2,2,3]]) 13 | print(data) 14 | data.index 15 | data['b'] 16 | data['b'][3] 17 | data["b":"c"] 18 | data.loc[["b","d"]] 19 | data.unstack() 20 | frame=pd.DataFrame(np.arange(12).reshape((4,3)),index=[["a","a","b","b"],["Green","Red","Green"]]) 21 | print(frame) 22 | frame.index.names=["key1","key2"] 23 | frame.columns.names=["state","color"] 24 | frame.index.nlevels 25 | 26 | #combining and merging datasets 27 | '''pandas.merge-> 28 | Connect rows in DataFrames based on one or more keys 29 | pandas.concat-> 30 | Concatenate or stack objects together along an axis 31 | combine_first-> 32 | Splice together overlapping data to fill in miing values in one object with values from another''' 33 | df1=pd.DataFrame({"key":["b","b","a","c","a","a","b"],"data1":pd.Series(range(7),dtype="Int64")}) 34 | df2=pd.DataFrame({"key":["a","b","d"],"data2":pd.Series(range(3),dtype="Int64")}) 35 | print(df1) 36 | print(df2) 37 | pd.merge(df1,df2) 38 | df3=pd.DataFrame({"lkey":["b","b","a","c","a","a","b"],"data1":pd.Series(range(7),dtype="Int64")}) 39 | df4=pd.DataFrame({"rkey":["a","b","d"],"data2":pd.Series(range(3),dtype="Int64")}) 40 | pd.merge(df3,df4,left_on="lkey",right_on="rkey") 41 | pd.merge(df1,df2,how="outer") 42 | pd.merge(df3,df4,left_on="lkey",right_on="rkey",how="outer") 43 | df1=pd.DataFrame({"key":["b","b","a","c","a","b"],"data1":pd.Series(range(6),dtype="Int64")}) 44 | df2=pd.DataFrame({"key":["a","b","a","b","d"],"data2":pd.Series(range(5),dtype="Int64")}) 45 | print(df1) 46 | print(df2) 47 | pd.merge(df1,df2,on="key",how="left") 48 | pd.merge(df1,df2,how="inner") 49 | left=pd.DataFrame({"key1":["foo","foo","bar"],"key2":["one","two","three"],"lval":pd.Series([1,2,3],dtype="Int64")}) 50 | right=pd.DataFrame({"key1":["foo","foo","bar","bar"],"key2":["one","one","one","two"],"rval":pd.Series([4,5,6,7],dtype="Int64")}) 51 | print(left) 52 | print(right) 53 | pd.merge(left,right,on=["key1","key2"],how="outer") 54 | left1=pd.DataFrame({"key":["a","b","a","a","b","c"],"value":pd.Series(range(6),dtype="Int64")}) 55 | right1=pd.DataFrame({"group_val":[3.5,7]},index=["a","b"]) 56 | print(left1) 57 | print(right1) 58 | pd.merge(left1,right1,left_on="key",right_index=True) 59 | 60 | #Concatenating along an axis 61 | arr=np.arange(12).reshape((3,4)) 62 | print(arr) 63 | np.concatenate([arr,arr],axis=1) 64 | np.concatenate([arr,arr]) 65 | s1=pd.Series([0,1],index=["a","b"],dtype="Int64") 66 | s2=pd.Series([2,3,4],index=["c","d","e"],dtype="Int64") 67 | s3=pd.Series([5,6],index=["f","g"],dtype="Int64") 68 | pd.concat([s1,s2,s3]) 69 | pd.concat([s1,s2,s3],axis="columns") 70 | a=pd.Series([np.nan,2.5,0.0,3.5,4.5,np.nan],index=["f","e","d","c","b","a"]) 71 | b=pd.Series([0.,np.nan,2.,np.nan,np.nan,5.],index=["a","b","c","d","e","f"]) 72 | print(a) 73 | print(b) 74 | np.where(pd.isna(a),b,a) 75 | a.combine_first(b) 76 | 77 | #Plotting and Visualisation 78 | import matplotlib.pyplot as plt 79 | data=np.arange(10) 80 | print(data) 81 | plt.plot(data) 82 | 83 | #Plots in Matplotlib reside within a figure object 84 | fig=plt.figure() 85 | ax1=fig.add_subplot(2,2,1) 86 | ax1.hist(np.random.standard_normal(100),bins=20,color="black",alpha=0.6) 87 | ax2=fig.add_subplot(2,2,2) 88 | ax2.scatter(np.arange(30),np.arange(30)+3*np.random.standard_normal(30)) 89 | ax3=fig.add_subplot(2,2,3) 90 | ax3.plot(np.random.standard_normal(50).cumsum(),color="black",linestyle="dashed") 91 | ax4=fig.add_subplot(2,2,4) 92 | fig,axes=plt.subplots(2,2,sharex=True,sharey=True) 93 | for i in range(2): 94 | for j in range(2): 95 | axes[i,j].hist(np.random.standard_normal(500),bins=50,color="black",alpha=0.5) 96 | fig.subplot_adjust(wspace=0,hspace=0) 97 | fig=plt.figure() 98 | ax=fig.add_subplot() 99 | ax.plot(np.random.standard_normal(30).cumsum(),color="black",linestyle="dashed",marker="s") 100 | fig=plt.figure() 101 | fig,ax=plt.subplots() 102 | ax.plot(np.random.standard_normal(1000).cumsum()) 103 | ticks=ax.set_xticks([0,250,500,750,1000]) 104 | labels=ax.set_xticklabels(["one","two","three","four","five"],rotation=30,fontsize=10) 105 | ax.set_xlabel("Stages") 106 | ax.set_title("Matplotlib Plot") 107 | fig=plt.figure() 108 | fig,ax=plt.subplots() 109 | ax.plot(np.random.randn(1000).cumsum(),color="black",labels="one") 110 | ax.plot(np.random.randn(1000).cumsum(),color="blue",linestyle="dashed",label="two") 111 | ax.plot(np.random.randn(1000).cumsum(),color="red",linestyle="dotted",labels="three") 112 | ax.legend() 113 | fig=plt.figure() 114 | fig,ax=plt.subplots(2,1) 115 | data=pd.Series(np.random.uniform(size=16),index=list("abcdefghijklmnop")) 116 | data.plot.bar(ax=axes[0],color="red",alpha=0.7) 117 | data.plot.barh(ax=axes[1],color="purple",alpha=.5) 118 | df=pd.DataFrame(np.random.uniform(size=(6,4)),index=["one","two","three","four","five","six"],columns=pd.Index(["A","B","C","D"],name="Genius")) 119 | print(df) 120 | df.plot.bar() 121 | df.plot.barh(stcked=True,alpha=0.5) 122 | df.plot.bar(stcked=True,alpha=0.5) 123 | -------------------------------------------------------------------------------- /stringToList.py: -------------------------------------------------------------------------------- 1 | n=input("Enter The Value:") 2 | print(n) 3 | l=n.split() 4 | print(l) 5 | l=[] 6 | for a in range(1,4): 7 | n=input("Enter The Value"+str(a)+"=") 8 | l.append(n) 9 | print(l) 10 | -------------------------------------------------------------------------------- /table.py: -------------------------------------------------------------------------------- 1 | n = int(input("Enter a number: ")) 2 | 3 | for i in range(1, 11): 4 | print(f"{n} X {i} = {n * i}") 5 | -------------------------------------------------------------------------------- /textTOspeech.py: -------------------------------------------------------------------------------- 1 | import pyttsx3 2 | engine = pyttsx3.init() 3 | engine.say("Hey I am good") 4 | engine.runAndWait() 5 | -------------------------------------------------------------------------------- /tuple.py: -------------------------------------------------------------------------------- 1 | t=(10,20,30,40,50) 2 | print(type(t)) 3 | print(t) 4 | n=t[3] 5 | print(n) 6 | l=len(t) 7 | for a in range(l): 8 | print(t[a]) 9 | for a in t: 10 | print(a) 11 | print(min(t)) 12 | print(max(t)) 13 | print(t.count(10)) 14 | print(t.index(50)) 15 | print(sum(t)) 16 | print(sum(t,10)) 17 | -------------------------------------------------------------------------------- /tuples.py: -------------------------------------------------------------------------------- 1 | #Write a program to demonstrate working with tuples in python 2 | #creating an empty tuple 3 | empty_tup=() 4 | print("Empty tuple=",empty_tup) 5 | #creating single element tuple 6 | single_tup=(10,) 7 | print("Single element tuple=",single_tup) 8 | #creating a tuple with multiple elements 9 | my_tup=(10,3.7,'program','a') 10 | print("Tuple with multiple elements is:",my_tup) 11 | print("Length of the tuple is:",len(my_tup)) 12 | T1=(10,20,30,40,70.5,33.3) 13 | print("Maximum value of the tuple T1 is:",max(T1)) 14 | print("Minimum value of the tuple T1 is:",min(T1)) 15 | str1='tuple' 16 | T=tuple(str1) #converting string into tuple 17 | print("After converting a string into tuple,the new tuple is:",T) 18 | L=[2,4,6,7,8] 19 | T2=tuple(L) #converting list into tuple 20 | print("After converting a list into tuple,the new tuple is:",T2) -------------------------------------------------------------------------------- /usingListFun.py: -------------------------------------------------------------------------------- 1 | #creating an empty list 2 | list=[] 3 | print("Empty List is :",list) 4 | #creating a list with elements 5 | my_list= [10,507,"python"] 6 | print("Created list is:",my_list) 7 | #Inserting new elements using append( 8 | my_list.append(20) 9 | my_list.append("program") 10 | my_list.append([3,7]) 11 | print("After deleting elements the new list is:",my_list) 12 | #deleting elements using pop() 13 | my_list.pop() 14 | my_list.pop(2) 15 | #deleting elements using remove 16 | my_list.remove(10) 17 | print("After deleting elements the new list is:",my_list) -------------------------------------------------------------------------------- /wishing.py: -------------------------------------------------------------------------------- 1 | name = input("Enter your name: ") 2 | 3 | print(f"Good Afternoon, {name} ") 4 | -------------------------------------------------------------------------------- /youtubeTranscriptSummarizer.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from dotenv import load_dotenv 3 | load_dotenv()##load all the environment variables 4 | import os 5 | import google.generativeai as genai 6 | from youtube_transcript_api import YouTubeTranscriptApi 7 | genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) 8 | prompt="""You are Youtube Video summarizer. You will be taking the transcript text and summarizing the entire video and providing the important summary in points within 250 words. Please provide the summary of the text given here:""" 9 | ## getting the transcript data from yt videos 10 | def extract_transcript_details(youtube_video_url): 11 | try: 12 | video_id=youtube_video_url.split("=")[1] 13 | transcript_text=YouTubeTranscriptApi.get_transcript(video_id) 14 | transcript="" 15 | for i in transcript_text: 16 | transcript_text+=" "+i["text"] 17 | return transcript 18 | except Exception as e: 19 | raise e 20 | ## getting the summmary based on Prompt from Google Gemini Pro 21 | def generate_gemini_content(transcript_text,prompt): 22 | model=genai.GenerativeModel("gemini-pro") 23 | response=model.generate_context(prompt+transcript_text) 24 | return response.text 25 | st.title("YouTube Transcript to Detailed Notes Converter") 26 | youtube_link=st.text_input("Enter YouTube Video Link:") 27 | if youtube_link: 28 | video_id=youtube_link.split("=")[1] 29 | print(video_id) 30 | st.image(f"http://img.youtube.com/vi/{video_id}/0.jpg",use_column_width=True) 31 | if st.button("Get Detailed Notes"): 32 | transcript_text=extract_transcript_details(youtube_link) 33 | if transcript_text: 34 | summary=generative_gemini_content(transcript_text,prompt) 35 | st.markdown("## Detailed Notes:") 36 | st.write(summary) 37 | #streamlit run app.py 38 | -------------------------------------------------------------------------------- /zipFunction.py: -------------------------------------------------------------------------------- 1 | l=[10,20,40,50] 2 | l1=[3,4,77,88] 3 | t=len(l) 4 | for a,b in zip(l,l1): 5 | print(a,b) 6 | for h in range(t): 7 | print(l[h],l1[h]) 8 | --------------------------------------------------------------------------------