├── OSmoduleTOlistDirectory.py
├── arithmeticOperations.py
├── array_numpy.py
├── array_operations.py
├── bikeRentelSystem.py
├── biodataUsingClass.py
├── calculatorUsingClass.py
├── classInstances.py
├── colored_module.py
├── conditional.py
├── constructor.py
├── copy_file.py
├── dataPreprocessing.py
├── dataProcessing.py
├── data_types.py
├── deepLearningAndModel.py
├── dictionary.py
├── dictionaryFunctions.py
├── escapeSequences.py
├── factorial.py
├── fileOperations&dataCleaning.py
├── findFUNC.py
├── functions.py
├── hollowSquare.py
├── json.py
├── letter.py
├── listComprehension.py
├── listFunctions.py
├── listIteration.py
├── listSlicing.py
├── newfile.py
├── newfile1.py
├── newfile2.py
├── newfile3.py
├── numpy.py
├── pandas.py
├── primeornot.py
├── printingPoem.py
├── project1.py
├── project2.py
├── project3onCNN.py
├── project4onNLP.py
├── project5onRecommendation.py
├── project6onImageClassification.py
├── project7onNLPandChatbot.py
├── pyjokes.py
├── pyramid.py
├── queueUsingList.py
├── randomModule.py
├── replace.py
├── rockPaperScissor.py
├── sets.py
├── slicing_concat.py
├── sorting.py
├── stackUsingList.py
├── startswith.py
├── staticMethodINclass.py
├── statistics.py
├── stringFormatting.py
├── stringManipulations.py
├── stringToList.py
├── table.py
├── textTOspeech.py
├── tuple.py
├── tuples.py
├── usingListFun.py
├── wishing.py
├── youtubeTranscriptSummarizer.py
└── zipFunction.py


/OSmoduleTOlistDirectory.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # Select the directory whose content you want to list 
 4 | directory_path = '/'
 5 | 
 6 | # Use the os module to list the directory content 
 7 | contents = os.listdir(directory_path)
 8 | 
 9 | # Print the contents of the directory 
10 | print(contents)
11 | 


--------------------------------------------------------------------------------
/arithmeticOperations.py:
--------------------------------------------------------------------------------
 1 | #Write a program to perform different Arithmetic operations on numbetrs in python  
 2 | n1=int(input("Enter first number:"))
 3 | n2=int(input("Enter second number:"))
 4 | sum=n1+n2
 5 | sub=n1-n2
 6 | mul=n1*n2
 7 | div=n1/n2
 8 | mod=n1%n2
 9 | fdiv=n1//n2
10 | exp=n1**n2
11 | print("Addition is:",sum)
12 | print("Substraction is:",sub)
13 | print("Multiplication is:",mul)
14 | print("Division is:",div)
15 | print("Modulo division is:",mod)
16 | print("Floor division is:",fdiv)
17 | print("Exponent is:",exp)


--------------------------------------------------------------------------------
/array_numpy.py:
--------------------------------------------------------------------------------
 1 | #Write a program to demonstrate arrays in numpy
 2 | import numpy as np
 3 | a = np.array(42)
 4 | b = np.array([1, 2, 3, 4, 5])
 5 | c = np.array([[1, 2, 3], [4, 5, 6]])
 6 | d = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])
 7 | print("Entered array is:",a,"\nand its dimension is:",a.ndim)
 8 | print("\nEntered array is:",b,"\nand its dimension is:",b.ndim) 
 9 | print("\nEntered array is:",c,"\nand its dimension is:",c.ndim)
10 | print("\nEntered array is:",d,"\nand its dimension is:",d.ndim)
11 | 


--------------------------------------------------------------------------------
/array_operations.py:
--------------------------------------------------------------------------------
 1 | '''Write a program to demonstrate array indexing such as slicing, integer array indexing 
 2 | and Boolean array indexing along with their basic operations in NumPy.'''
 3 | import numpy as np
 4 | a=np.arange(10,1,-2)
 5 | print("Sequential array with nagative step value:",a)
 6 | newarr=[a[3],a[1],a[2]]
 7 | print("Elements at these indices are:",newarr)
 8 | a=np.arange(20)
 9 | print("Array is:",a)
10 | print("a[-8:17:1]=",a[-8:17:1])
11 | print("a[10:]=",a[10:])


--------------------------------------------------------------------------------
/bikeRentelSystem.py:
--------------------------------------------------------------------------------
 1 | class bikeShop:
 2 | 	def _init_(self,stock):
 3 | 		self.stock=stock
 4 | 	def displayBike(self):
 5 | 		print("Total Bikes",self.stock)
 6 | 	def rentForBike(self,q):
 7 | 				if q<=0:
 8 | 					print("Enter the positive value or greater than zero")
 9 | 				elif q>self.stock:
10 | 					print("Enter the value(less than stock)")
11 | 				else:
12 | 						self.stock=self.stock-q
13 | 						print("Total Prices",q*100)
14 | 						print("Total Bikes",self.stock)
15 | 					
16 | while True:
17 | 	obj=bikeShop(100)
18 | 	uc=int(input('''
19 | 1 Display Stocks
20 | 2 Rent A Bike
21 | 3 Exit
22 | 				'''))
23 | 	if uc==1:
24 | 		obj.displayBike()
25 | 	elif uc==2:
26 | 		n=int(input("Enter The Quantity:"))
27 | 		obj.rentForBike(n)
28 | 	else:
29 | 		break
30 | 


--------------------------------------------------------------------------------
/biodataUsingClass.py:
--------------------------------------------------------------------------------
 1 | class Programmer:
 2 |     company = "Microsoft"
 3 |     def __init__(self, name, salary, pin):
 4 |         self.name = name
 5 |         self.salary = salary
 6 |         self.pin = pin
 7 | 
 8 | 
 9 | p = Programmer("Harry", 1200000, 245001)
10 | print(p.name, p.salary, p.pin, p.company)
11 | r = Programmer("Rohan", 1200000, 245001)
12 | print(r.name, r.salary, r.pin, r.company)
13 | 


--------------------------------------------------------------------------------
/calculatorUsingClass.py:
--------------------------------------------------------------------------------
 1 | class Calculator:
 2 |     def __init__(self, n):
 3 |         self.n = n 
 4 |     
 5 |     def square(self):
 6 |         print(f"The square is {self.n*self.n}")
 7 | 
 8 |     def cube(self):
 9 |         print(f"The cube is {self.n*self.n*self.n}")
10 | 
11 |     def squareroot(self):
12 |         print(f"The squareroot is {self.n**1/2}")
13 | 
14 | a = Calculator(4)
15 | a.square()
16 | a.cube()
17 | a.squareroot()
18 | 


--------------------------------------------------------------------------------
/classInstances.py:
--------------------------------------------------------------------------------
1 | class Demo:
2 |     a = 4
3 | 
4 | o = Demo()
5 | print(o.a) # Prints the class attribute because instance attribute is not present
6 | o.a = 0 # Instance attribute is set
7 | print(o.a) # Prints the instance attribute because instance attribute is present
8 | print(Demo.a) # Prints the class attribute
9 | 


--------------------------------------------------------------------------------
/colored_module.py:
--------------------------------------------------------------------------------
1 | from termcolor import colored
2 | 
3 | print(colored("H","red"),colored("e","yellow"),colored("l","green"),colored("l","cyan"),colored("o","blue"),colored(",","magenta"),colored("w","red"),colored("o","yellow"),colored("r","green"),colored("l","cyan"),colored("d","blue"))


--------------------------------------------------------------------------------
/conditional.py:
--------------------------------------------------------------------------------
1 | def greet (name):
2 |     return"Hello"+name
3 | print(greet("Alice"))


--------------------------------------------------------------------------------
/constructor.py:
--------------------------------------------------------------------------------
 1 | class Employee: 
 2 |     language = "Python" # This is a class attribute
 3 |     salary = 1200000
 4 | 
 5 |     def __init__(self, name, salary, language): # dunder method which is automatically called
 6 |         self.name = name
 7 |         self.salary = salary
 8 |         self.language = language
 9 |         print("I am creating an object")
10 |  
11 |  
12 |     def getInfo(self):
13 |         print(f"The language is {self.language}. The salary is {self.salary}")
14 | 
15 |     @staticmethod
16 |     def greet():
17 |         print("Good morning")
18 | 
19 | 
20 | harsh = Employee("Harsh", 1300000, "JavaScript") 
21 | # harsh.name = "Harsh"
22 | print(harsh.name, harsh.salary, harsh.language)
23 | 
24 | rohan = Employee()
25 | 


--------------------------------------------------------------------------------
/copy_file.py:
--------------------------------------------------------------------------------
1 | infile=input("Enter 1st file name:")
2 | outfile=input("Enter 2nd file name:")
3 | f1=open("firstfile.txt",'r')
4 | f2=open("secondfile.txt",'w+')
5 | content=f1.read()
6 | f2.write(content)


--------------------------------------------------------------------------------
/dataPreprocessing.py:
--------------------------------------------------------------------------------
  1 | #Missing Value Treatment, Data Discretization, Feature Selection using Variance & Correlation
  2 |    
  3 | #Data Preprocessing
  4 | '''Data Preprocessing involves cleaning and engineering data in a way that it can be used as input to several important data science tasks such as data visualization, machine learning, deep learning, and data analytics.
  5 | Some of the most common data preparation tasks include feature scaling, handling missing values, categorial variable encoding, data discretization.'''
  6 | 
  7 | #Feature Scaling 
  8 | '''A dataset can have different attributes. The attributes can have different magnitudes, variances, standard deviation, mean value etc.
  9 | For instance, salary can be in thousands, whereas age is normallly a two-digit number.
 10 | The difference in the scale or magnitude of attributes can actually affect statistical models.
 11 | For instance, variables wirh bigger ranges dominate those with smaller ranges for linear models.'''
 12 | 
 13 | #Standardization
 14 | '''Standardization is the process of centering a variable at zero and standardizing the data variance to 1.
 15 | To standardize a dataset, you simply have to subtract each data point from the mean 
 16 | of all the data points and divide the d
 17 | result by the standard deviation of the data.
 18 | Feature Scaling is applied on numeric data only.'''
 19 | 
 20 | import pandas as pd 
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np 
 23 | import seaborn as sns 
 24 | titanic_data=sns.load_dataset("titanic")
 25 | titanic_data=titanic_data["age","fare","prices"]]
 26 | titanic_data.head()
 27 | titanic_data.describe()
 28 | from sklearn.preprocessing import StandardScaler
 29 | scaler=StandardScaler()
 30 | scaler.fit(titanic_data)
 31 | titanic_data_scaled=scaler.transform(titanic_data)
 32 | titanic_data_scaled=pd.DataFrame(titanic_data_scaled,columns=titanic_data.columns)
 33 | titanic_data_scaled.head()
 34 | sns.kdeplot(titanic_data_scaled["age"])
 35 | 
 36 | #Min/Max Scaling
 37 | '''In min/max scaling, you subtract each value by the minimum value and divide the result by the difference between minimum and maximum value in the dataset.'''
 38 | 
 39 | from sklearn.preprocessing import MinMaxScaler
 40 | scaler=MinMaxScaler()
 41 | scaler.fit(titanic_data)
 42 | titanic_data_scaled=scaler.transform(titanic_data)
 43 | titanic_data_scaled=pd.DataFrame(titanic_data_scaled,columns=titanic_data.columns)
 44 | titanic_data_scaled.head()
 45 | sns.kdeplot(titanic_data_scaled["age"])
 46 | 
 47 | #Handling Missing Data
 48 | '''Missing values are those observations in the dataset that do not contain any value.
 49 | Missing values can totally change data patterns and therefore it is extremely important to understand why missing values occur in the dataset and how to handle them.'''
 50 | 
 51 | #Handling Missing Numerical Data
 52 | '''To handle missing numerical data, we can usee statistical techniques. The use of statistical techniques or algorithms to replace missing values with statistically generated values is called imputation'''
 53 | 
 54 | titanic_data=sns.load_dataset("titanic")
 55 | titanic_data.head()
 56 | titanic_data=titanic_data[["survived","pclass","age","fare"]]
 57 | titanic_data.head()
 58 | titanic_data.isnull().mean()
 59 | titanic_data.isnull().sum()
 60 | median=titanic_data.age.median()
 61 | print(median)
 62 | mean=titanic_data.age.mean()
 63 | print(mean)
 64 | titanic_data["Median_Age"]=titanic_data.age.fillna(median)
 65 | titanic_data["Mean_Age"]=titanic_data.age.fillna(mean)
 66 | titanic_data["Mean_Age"]=np.round(titanic_data["Mean_Age"],1)
 67 | titanic_data.head(20)
 68 | 
 69 | #Frequent Category Imputation
 70 | '''One of the most common ways of handling missing values in a categorial column is to replace the missing values with the most frequent occuring values i.e., the mode of the column.'''
 71 | 
 72 | import matplotlib.pyplot as plt
 73 | import seaborn as sns
 74 | 
 75 | titanic_data=sns.load_dataset("titanic")
 76 | titanic_data=titanic_data[["embark_town","age","fare"]]
 77 | titanic_data.head()
 78 | titanic_data.isnull().mean()
 79 | titanic_data.embark_town.value_counts().sort_values(ascending=False).plot.bar()
 80 | plt.xlabel("Embark Town")
 81 | plt.ylabel("Number of Passengers")
 82 | titanic_data.embark_town.mode()
 83 | titanic_data.embark_town.fillna("Southampton",inplace=True)
 84 | 
 85 | #Categorial Data Encoding
 86 | '''Models based on statistical algorithms such as machine learning and deep learning work with numbers.
 87 | A dataset can contain numerical, categorical, datetime, and mixed variables.
 88 | A mechanism is needed to convert categorical data to its numeric counterpart so that the data can be used to build statistical models.
 89 | The techniques used to convert numeric data into categorical data are called categorical data encoding schemes.'''
 90 | 
 91 | #One Hot Encoding
 92 | '''One Hot Encoding is one of the most commonly used categorical encoding schemes.
 93 | In one hot encoding for each unique value in the categorical column a new column is added.
 94 | Integer 1 is added to the column that corresponds to the original label and all the remaining column are filled with zeros.'''
 95 | 
 96 | titanic_data=sns.load_dataset("titanic")
 97 | titanic_data.head()
 98 | titanic_data=titanic_data[["sex","class","embark_town"]]
 99 | titanic_data.head()
100 | 
101 | import pandas as pd 
102 | temp=pd.get_dummies(titanic_data["sex"])
103 | temp.head()
104 | pd.concat([titanic_data["sex"],pd.get_dummies(titanic_data["sex"])],axis=1).head()
105 | temp=pd.get_dummies(titanic_data["embark_town"])
106 | temp.head()
107 | 
108 | #Label Encoding
109 | '''In label encoding, labels are replaced by integers.
110 | That is why label encoding is also called as Integer Encoding.'''
111 | 
112 | from sklearn.preprocessing import LabelEncoder
113 | le=LabelEncoder()
114 | le.fit(titanic_data["class"])
115 | titanic_data["le_class"]=le.transform(transform(titanic_data["class"])
116 | titanic_data.head()
117 | 
118 | #Data Discretization
119 | '''The process of converting continuous numeric values such as price, age, and weight into discrete intervals is called discretization or binning.'''
120 | 
121 | #Equal Width Discretization
122 | '''The most common type of discretization approach is fixed width discretization.'''
123 | 
124 | import matplotlib.pyplot as plt
125 | import seaborn as sns
126 | import pandas as pd
127 | import numpy as np
128 | 
129 | import warnings
130 | warnings.filterwarnings("ignore")
131 | diamond_data=sns.load_dataset("diamonds")
132 | diamonds_data.head()
133 | sns.distplot(diamond_data["price"])
134 | 
135 | '''The histogram for price column shows that the data is positively skewed.'''
136 | 
137 | price_range=diamond_data["price"].max()-diamond_data["price"].min()
138 | print(price_range)
139 | price_range/10
140 | lower_interval=int(np.floor(diamond_data["price"].min()))
141 | upper_interval=int(np.ceil(diamond_data["price"].max()))
142 | interval_length=int(np.round(price_range/10))
143 | print(lower_interval)
144 | print(upper_interval)
145 | print(interval_length)
146 | total_bins=[i for i in range(lower_interval,upper_interval+interval_length,interval_length)]
147 | print(total_bins)
148 | bin_labels=['Bin_no_'+str(i) for i in range(l,len(total_bins))]
149 | print(bin_labels)
150 | diamond_data['price_bins']=pd.cut(x=diamond_data['price'],bins=total_bins,labels=bin_labels,include_lowest=True)
151 | diamond_data.head()
152 | diamond_data.groupby('price_bins')['price'].count().plot.bar()
153 | plt.xticks(rotation=45)
154 | '''The output shows that the price of most of the diamonds lies in the first bin or the first interval.'''
155 | 
156 | #Handling Outliers
157 | '''Outliers  are the values that are too far from the rest of the observation.'''
158 | 
159 | #Outlier Trimming
160 | '''As the name suggests it refers to simply removing the outliers beyond a certain threshold value.
161 | One of the main advantage of outlier trimming is that it is extremely quick and doesn't distort the data.'''
162 | 
163 | titanic_data=sns.load_dataset("titanic")
164 | titanic_data.head()
165 | sns.boxplot(y="age",data=titanic_data)
166 | 
167 | '''One of the most common ways to remove the outliers is to find the Inter Quartile Range(IQR), multiply it  by 1.5 and then subtract it from the first quartile value(0.25 quantile).
168 | To find the upper limit, add the product of IQR and 1.5 to the 3rd quartile value(0.75 quantile)'''
169 | 
170 | IQR=titanic_data["age"].quantile(0.75)-titanic_data["age"].quantile(0.25)
171 | lower_age_limit=titanic_data["age"].quantile(0.25)-(IQR*1.5) upper_age_limit=titanic_data["age"].quantile(0.75)+(IQR*1.5)
172 | print(lower_age_limit)
173 | print(upper_age_limit)
174 | age_outliers=np.where(titanic_data["age"]>upper_age_limit,True,np.where(titanic_data["age"]<lower_age_limit,True,False))
175 | titanic_without_age_outliers=titanic_data.loc[~(age_outliers),]
176 | titanic_data.shape,titanic_without_age_outliers.shape
177 | 
178 | #Feature Selection
179 | '''Machine Learning algorithms learn from datasets. A dataset consists of features.
180 | A feature refers to a single characteristics or dimension of data.Feature are also known as attributes.
181 | For instance, a dataset of cars has features like car models, car color, seating capacity etc.
182 | Selecting the right features is not only improves the performance of your machine learning model but it also speeds up the training time of your algorithm.'''
183 | 
184 | #Feature Selection based on Variance
185 | '''Features having constant or very similar values do not really play any significant role in machine learning tasks such as classification and regression.
186 | Therefore, features that are very similar should be removed from the dataset.There are various ways to remove very similar features from the dataset.
187 | One of the ways is to find the variance for the particular feature and remove features having variance less than a certain threshold.
188 | A feature with low variance has a higher degree of similarity among its data points and vice-versa.'''
189 | 
190 | import pandas as pd 
191 | import numpy as np 
192 | wine_data=pd.read_csv("winequality-read.csv")
193 | wine_data.head()
194 | 
195 | #Dividing data into features and labels
196 | features=wine_data.drop(["quality"],axis=1)
197 | labels=wine_data.filter(["quality"],axis=1)
198 | 
199 | #Filtering features based on threshold
200 | from sklearn.feature_selection import VarianceThreshold
201 | var_sel=VarianceThreshold(threshold=(0.1))
202 | var_sel.fit(features)
203 | attributes_to_retain=features.columns[var_sel.get_support()]
204 | print(attributes_to_retain)
205 | attributes_to_filter=[attr for attr in features.columns if attr not in features.columns[var_sel.get_support()]]
206 | print(attributes_to_filter)
207 | filtered_dataset=features.drop(attributes_to_filter,axis=1)
208 | filtered_dataset.head()
209 | 
210 | #Feature Selection based on correlation
211 | '''In Feature selection based on correlation, the features are selected using the following steps
212 | 1. Mutual correlation between all the features is calculated.
213 | 2. The correlation threshold is set.
214 | 3. Features having mutual correlation greater than the correlation threshold with any other feature are removed from the dataset.'''
215 | 
216 | correlation_matrix=features.corr()
217 | print(correlation_matrix)
218 | import seaborn as sns
219 | sns.heatmap(correlation_matrix)
220 | correlated_features_matrix=set()
221 | for i in range(len(correlation_matrix.columns)):
222 |    for j in range(i):
223 |       if abs(correlation_matrix.iloc[i,j])>0.6:
224 |         corr_col=correlation_matrix.columns[i]
225 |          correlated_features_matrix.add(corr_col)
226 | len(correlated_features_matrix)
227 | print(correlated_features_matrix)
228 | filtered_dataset=features.drop(correlated_features_matrix,axis=1)
229 | filtered_dataset.head()
230 | 


--------------------------------------------------------------------------------
/dataProcessing.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | tips=pd.read_csv("examples/tips.csv")
 3 | tips.head()
 4 | 
 5 | import seaborn as sns 
 6 | tips["tip_pct"]=tips["tip"]/(tips["total_bill"]-tips["tip"])
 7 | tips.head()
 8 | 
 9 | #Histograms & Density Plots
10 | '''A Histogram is a kind of bar plot that gives a discretized display of continuous data.'''
11 | tips["tip_pct"].plot.hist(bins=50)
12 | tips["tip_pct"].plot.density()
13 | 
14 | #Scatter or Point Plots
15 | import numpy as np
16 | macro=pd.read_csv("examples/macrodata.csv")
17 | data=macro[["cpi","mi","tbilrate","unemp"]]
18 | trans_data=np.log(data).diff().dropna()
19 | trans_data.tail()
20 | ax=sns.regplot(x="m1",y="unemp",data=trans_data)
21 | #ax.title("Changes in log(m1) versus log(unemp)")
22 | 
23 | #Pairplot
24 | sns.pairplot(trans_data,diag_kind="kde",plot_kws={"alpha":0.5})
25 | sns.catplot(x="day",y="tip_pct",hue="time",col="smoker",kind="bar",data=tips[tips.tip_pct<1])
26 | sns.catplot(x="day",y="tip_pct",row="time",col="smoker",kind="bar",data=tips[tips.tip_pct<1])
27 | 
28 | #Box Plot explains 5 number theory
29 | sns.catplot(x="tip_pct",y="day",kind="box",data=tips[tips.tip_pct<0.5])
30 | 
31 | #Data Aggregation and Group Operations
32 | df=pd.DataFrame({"key1":["a","a",None,"b","b","a",None],"key2":pd.Series([1,2,1,2,1,None,1],dtype="Int64"),"data1":np.random.standard_normal(7),"data2":np.random.standard_normal(7)})
33 | print(df)
34 | grouped=df["data1"].groupby(df["key1"])
35 | print(grouped)
36 | grouped.mean()
37 | means=df["data1"].groupby([df["key1"],df["key2"]]).mean()
38 | print(means)
39 | sums=df["data1"].groupby([df["key1"],df["key2"]]).sum()
40 | sum.unstack()
41 | states=np.array(["OH","CA","CA","OH","CA","OH"])
42 | years=[2005,2005,2006,2005,2006,2005,2006]
43 | df["data1"].groupby([states,years]).mean()
44 | df.groupby("key1").mean()
45 | df.groupby("key2").count()
46 | df.groupby(["key1","key2"]).mean()
47 | df.groupby("key1",dropna=False).size()
48 | df.groupby(["key1","key2"],dropna=False).size()
49 | df.groupby("key1").count()
50 | for name,group in df.groupby("key1"):
51 |    print(name)
52 |    print(group)
53 | df.groupby("key1")["data1"].sum()
54 | df.groupby(["key1","key2"])["data2"].mean()
55 | tips.head()
56 | tips.groupby(["day","smoker"])["tip_pct"].mean()
57 | tips.groupby(["day","smoker"])["tip_pct"].agg("mean")
58 | tips.groupby(["day","smoker"])["tip_pct"].agg(["mean","std","count"])
59 | tips.groupby(["day","smoker"])["tip_pct"].agg(["average","mean"),("stddev",np.std)])
60 | functions=["count","mean","max"]
61 | result=tips.groupby(["day","smoker"])[["tip_pct","total_bill"]].agg(functions)
62 | print(functions)
63 | 


--------------------------------------------------------------------------------
/data_types.py:
--------------------------------------------------------------------------------
 1 | #Write a program to demonstrate different numeric data types 
 2 | a=5
 3 | b=5.7
 4 | c=2+7j
 5 | d="python" 
 6 | print("Value of a:",a,"\t\t\tDatatype is:",type(a))
 7 | print("Value of b:",b,"\t\tDatatype is:",type(b))
 8 | print("Value of c:",c,"\t\tDatatype is:",type(c))
 9 | print("Value of d:",d,"\t\tDatatype is:",type(d))
10 | 


--------------------------------------------------------------------------------
/deepLearningAndModel.py:
--------------------------------------------------------------------------------
  1 | +#Introduction to Deep Learning and Model on Iris Dataset
  2 | #Deep Learning Framework
  3 | '''Deep Learning is a field within Machine Learning that deals with building amd using Neural Network Models.
  4 | Neural Networks mimic the functioning of a human brain.
  5 | Neural Networks with more than three layers are typically categorised as Deep Learning Networks.'''
  6 | #Perceptron
  7 | '''The Perceptron is the unit of learning in an artificial neural networks.A Perceptron resembles a human brain cell.
  8 | A Perceptron is a single cell or node in a neural network.
  9 | In Deep Learning, we replace slope of model with weights called as w and intercept with the bias called as b.
 10 | Weights and Biases become the parameters for a neural network.
 11 | The number of weights equals the number of inputs/features.'''
 12 | 
 13 | #Artificial Neural Network
 14 | '''An ANN is a network of perceptrons. A deep neural network usually has three or more layers.
 15 | Each node has its own weights, biases and activation function.Each node is connected to all the nodes in the next layer forming a dense network.
 16 | Training an ANN means determining the right values for these parameters and hyperparameters such that it maximizes the accuracy of predictions for the given use case.'''
 17 | 
 18 | #Neural Network Architecture
 19 | #Input Layer
 20 | '''The input to Deep Learning model is usually a vector of Numeric values.
 21 | Vectors are usually defined using NumPy arrays. It represents the feature variables or independent variables that are used for prediction as well as training.'''
 22 | #Hidden Layer
 23 | '''An ANN can have one or more hidden layers. The more the layers are the deep the network is.
 24 | Each hidden layer can have one or more nodes. Typically, the node count is configured in range of 2^n. Ecamples are 8,16,32,64,128 etc.
 25 | A neural network is defined by the number of layers and nodes.
 26 | The output of each node in previous layer will become the input for every node in the current layers.
 27 | When there are more nodes and layers it usually results in better accuracy. As a general practice, start with small number and keep adding until an acceptable accuracy levels are obtained.'''
 28 | #Weights and Biases
 29 | '''They form the basis for Deep Learning Algorithms. Weights and Biases are trainable parameters in a neural network model.
 30 | Each input for each node will have an associated weight with it.'''
 31 | #Activation Functions
 32 | '''An activation function plays an important role in creating the output of the node in the neural network.
 33 | An activation function takes the matrix output of the node and determines if and how the node will propagate information to the next layer.
 34 | The main objective of activation function is that it converts the output to a non-linear value. They serve as a critical step in helping a neural network learn specific patterns in the data.
 35 | TanH:- A TanH function normalizes the output in the range of (-1 to +1)
 36 | ReLu:- Rectified Linear Unit- A ReLu produces a zero if the output is negative. Else, it will produce the same input verbatim.
 37 | Softmax Function:- This is used in the case of classification problems. It produces a vector of probabilities for each of the possible classes in the outcomes. The class with the highest probability will be considered as the final class.
 38 | These all activation functions are added as hyperparameters in the model.'''
 39 | 
 40 | #Output Layer
 41 | '''The output layer is the final layer in the neural network where desired predictions are obtained. '''
 42 | #Training a Neural Network Model
 43 | '''Set up and initialisation:- If error is high then it adjusts weights and biases by the process of gradient descent to improve accuracy.
 44 | Forward Propagation:- Movement from Input to hidden layer and then output layer.'''
 45 | #Measure Accuracy and Error 
 46 | '''Back Propagation:- If error is high then it adjusts weights amd biases by the process of gradient descent to improve accuracy.
 47 | Gradient Descent is the process of repeating the forward and backward propagation in order to reduce error and move closer to the desired model.
 48 | Batches and Epochs:- 10000/10(1000)
 49 | Validation and Testing'''
 50 | 
 51 | #Deep Learning Example- Iris Dataset
 52 | import pandas as pd 
 53 | import numpy as np
 54 | import matplotlib.pyplot as plt
 55 | import os
 56 | import tensorflow as tf
 57 | from sklearn.model_selection import train_test_split
 58 | from sklearn.preprocessing import StandardScaler
 59 | import warnings
 60 | warnings.filterwarnings("ignore")
 61 | 
 62 | '''Prepare input data for deep learning
 63 | Load data into pandas dataframe
 64 | Convert the dataframe into numpy array
 65 | Scale the feature dataset
 66 | Use of one hot encoding for the target variable
 67 | Split the dataset into training and test datasets
 68 | Load Data and Review content'''
 69 | 
 70 | iris_data=pd.read_csv("iris.csv")
 71 | print(iris_data.head())
 72 | '''Use label encoder to convert String to Numeric values for the target variable'''
 73 | from sklearn.preprocessing import LabelEncoder
 74 | label_encoder=LabelEncoder()
 75 | iris_data['Species']=label_encoder.fit_transform(iris_data['Species'])
 76 | print(iris_data.head())
 77 | #Converting input to numpy array
 78 | np_iris=iris_data.to_numpy()
 79 | print(np_iris.shape)
 80 | #Separate features and target variables
 81 | X_data=np_iris[:,0:4]
 82 | Y_data=np_iris[:,4]
 83 | print("\n Features before Scaling: \n---------")
 84 | print(X_data[:5,:])
 85 | print("\ntarget before one-hot ending: \n---------")
 86 | print(Y_data[:5])
 87 | #Create a standard scaler object that if fit on the input data
 88 | scaler=StandardScaler().fit(X_data)
 89 | #scale tge numeric feature variable
 90 | X_data=scaler.transform(X_data)
 91 | #convert target variable as a one-hot encoded array
 92 | Y_data=tf.keras.utils.to_categorical(Y_data,3)
 93 | print("\n Features after Scaling: \n----------")
 94 | print(X_data[:5,:])
 95 | print("\ntarget after one-hot encoding: \n----------")
 96 | print(Y_data[:5])
 97 | #Splitting the data into training and test sets
 98 | X_train,X_test,Y_train,Y_test=train_test_split(X_data,Y_data,test_size=0.10)
 99 | print("\n Train test Dimensions: \n----------")
100 | print(X_train.shape,X_test.shape,Y_train.shape,Y_test.shape)
101 | '''Create a Model
102 | Number of hidden layers
103 | Number of nodes in each layer 
104 | Activation functions
105 | Loss function and accuracy measurements. '''
106 | from tensorflow import keras 
107 | #Number of classes in the target variable
108 | NB_CLASSES=3
109 | #Create a sequential model in keras 
110 | model=tf.keras.models.Sequential()
111 | #add the first hidden layer 
112 | model.add(keras.layers.Dense(128,#Number of nodes
113 | input_shape=(4,),#number of input variables
114 | name="Hidden-Layer-1",#Logical name
115 | activation="relu"))#activation function
116 | #add a second hidden layer
117 | model.add(keras.layers.Dense(128,name="Hidden-Layer-2",activation="relu"))
118 | #add an output layer with softmax function
119 | model.add(keras.layers.Dense(NB_CLASSES,name="Output-Layer",activation="softmax"))
120 | #compile the model with loss and metrics
121 | model.compile(loss="categorical_crossentropy",metrics=["accuracy"])
122 | #print the model summary
123 | model.summary()
124 | #Make it verbose so we can see the process 
125 | VERBOSE=1
126 | #Set hyperparameters for training 
127 | #Set batch size
128 | BATCH_SIZE=16
129 | #Set the number of epochs
130 | EPOCHS=20
131 | #Set the validation split. 20% of the training dataset will be used for validation
132 | VALIDATION_SPLIT=0.2
133 | print("\nTraining Progress: \n------------")
134 | '''Fitting the model. This will perform the entire training cycle, included forward propagation, loss computation, backward propagation and gradient descent.'''
135 | history=model.fit(X_train,Y_train,batch_size=BATCH_SIZE,epochs=EPOCHS,verbose=VERBOSE,validation_split=VALIDATION_SPLIT)
136 | print("Accuracy During Training: \n------------")
137 | import matplotlib.pyplot as plt
138 | #Plot the accuracy of the model after each epoch
139 | pd.DataFrame(history.history)["accuracy"].plot(figsize=(8,5))
140 | plt.title("Accuracy improvement after each epoch")
141 | plt.show()
142 | #Evaluate the model against the test dataset and print the result
143 | print("\nEvaluate against test dataset: \n------------")
144 | model.evaluate(X_test,Y_test)
145 | #Saving a model
146 | model.save("iris_save")
147 | #Load the model
148 | loaded_model=keras.models.load_model("iris_save")
149 | #print the model summary
150 | loaded_model.summary()
151 | #Predictions with Deep Learning Model
152 | #raw prediction data
153 | prediction_input=[[2.6,12.,2.4,4.4]]
154 | #scale the prediction data with the same scaling object
155 | scaled_input=scaler.transform(prediction_input)
156 | #get the raw prediction probabilities
157 | raw_prediction=loaded_model.predict(scaled_input)
158 | print("Raw Prediction Output (Probabilities):",raw_prediction)
159 | #Find Prediction
160 | prediction=np.argmax(raw_prediction)
161 | print("Prediction is",label_encoder.inverse_transform([prediction]))


--------------------------------------------------------------------------------
/dictionary.py:
--------------------------------------------------------------------------------
 1 | #Write a program to demonstrate working with dictionaries in python
 2 | #empty dictionary
 3 | my_dict={}
 4 | print("Empty dictionary is:",my_dict)
 5 | #dictonary with integer keys
 6 | my_dict={1:'apple',2:'ball'}
 7 | print("Dictionary with integer keys:",my_dict)
 8 | #dictionary with mixed keys
 9 | my_dict={'name':'rishi',1:[2,4,3]}
10 | print("Dictionary with mixed keys",my_dict)
11 | #using dict.fromkeys()
12 | my_dict=dict.fromkeys("abcd",'alphabet')
13 | print("Dictionary created by using dict.fromkeys method=",my_dict)
14 | #using get method
15 | my_dict={'name':'jack','age':25}
16 | print(my_dict['name'])      #output jack
17 | #changing and adding dictionary elements
18 | my_dict['age']=18           #update value
19 | my_dict['class']="B.Tech"   #updating value
20 | print("After changing and adding the values,the new dictionary=",my_dict)
21 | #using items()
22 | print("Items in the dictionary is:",my_dict.items())
23 | #using keys()
24 | print("Keys in the dictionary is:",my_dict.keys())
25 | #using values()
26 | print("Values in the dictionary is:",my_dict.values())


--------------------------------------------------------------------------------
/dictionaryFunctions.py:
--------------------------------------------------------------------------------
 1 | d={
 2 | 		'name':'python',
 3 | 		'fees':8000,
 4 | 		'duration':'2 months'
 5 | }
 6 | print(d)      
 7 | print(type(d))
 8 | print(d['fees'])
 9 | for n in d:
10 | 	print(n)
11 | 	print(d[n])
12 | print(d.get('name'))
13 | for a in d.keys():
14 | 	print(a)
15 | for a in d.values():
16 | 	print(a)
17 | for a,b in d.items():
18 | 	print(a,b)
19 | del d['fees']
20 | print(d)
21 | print(d.pop('duration'))
22 | print(d)
23 | d=dict(name='python',fees=8000)
24 | print(d)
25 | d.update({'fees':10000})
26 | print(d)
27 | print(d.clear())
28 | print(d)
29 | d['desc']="This is Python"
30 | print(d)
31 | course={
32 | 		'php':{'duration':'3 months','fees':15000},
33 | 		'java':{'duration':'2 months','fees':10000},
34 | 		'python':{'duration':'1 months','fees':12000},
35 | }
36 | print(course)
37 | print(course['php'])
38 | print(course['php']['fees'])
39 | for k,v in course.items():
40 | 	print(k,v)
41 | for k,v in course.items():
42 | 	print(k,v['duration'],v['fees'])
43 | course['java']['fees']=20000
44 | print(course)
45 | 


--------------------------------------------------------------------------------
/escapeSequences.py:
--------------------------------------------------------------------------------
1 | letter = "Dear Hariram,\n\tThis python course is nice.\nThanks!"
2 | print(letter)
3 | 


--------------------------------------------------------------------------------
/factorial.py:
--------------------------------------------------------------------------------
1 | n = int(input("Enter the number: "))
2 | product = 1
3 | for i in range(1, n+1):
4 |     product = product * i
5 | 
6 | print(f"The factorial of {n} is {product}")
7 | 


--------------------------------------------------------------------------------
/fileOperations&dataCleaning.py:
--------------------------------------------------------------------------------
 1 | #unique values & value counts
 2 | import numpy as np 
 3 | import pandas as pd 
 4 | obj=pd.Series(["c","a","d","a","a","b","b","c","c"])
 5 | uniques=obj.unique()
 6 | print(uniques)
 7 | print(obj.value_counts())
 8 | 
 9 | #data loading
10 | df=pd.read_csv("examples/ex1.csv")
11 | df.head()
12 | pd.read_csv("examples/ex2.csv")
13 | pd.read_csv("examples/ex2.csv",header=None)
14 | pd.read_csv("examples/ex2.csv",names=["a","b","c","d","message"])
15 | names=["a","b","c","d","message"]
16 | pd.read_csv("examples/ex2.csv",names=names,index_col="message")
17 | result=pd.read_csv("examples/ex3.txt",sep="\s+")
18 | print(result)
19 | pd.read_csv("examples/ex4.csv",skiprows=[0,2,3])
20 | result.to_csv("out.csv") #saving as a csv file
21 | 
22 | #data cleaning & preparation
23 | #handling missing data
24 | float_data=pd.Series([1.2,-3.5,np.nan,0])
25 | print(float_data)
26 | float_data.isna()
27 | string_data=pd.Series(["aardvark",np.nan,None,"avacado"])
28 | print(string_data)
29 | string_data.isna()
30 | float_data=pd.Series([1,2,None],dtype="float64")
31 | print(float_data)
32 | float_data.isna()
33 | data=pd.Series([1,np.nan,3.5,np.nan,7])
34 | data.dropna()
35 | data[data.notna()]
36 | data=pd.DataFrame([1.,6.5,3.],[1.,np.nan,np.nan],[np.nan,np.nan,np.nan],[np.nan,6.5,3.]])
37 | print(data)
38 | data.dropna()
39 | data.dropna(how="all")
40 | data[4]=np.nan
41 | print(data)
42 | data.dropna(axis="columns",how="all")
43 | df=pd.DataFrame(np.random.standard_normal((7,3)))
44 | df.iloc[:4,1]=np.nan
45 | df.iloc[:2,2]=np.nan
46 | print(df)
47 | df.dropna()
48 | df.dropna(thresh=2)
49 | 
50 | #filling in missing data
51 | print(df)
52 | df.fillna(0)
53 | df.fillna({1:0.5,2:0.9})
54 | df=pd.DataFrame(np.random.standard_normal((6,3)))
55 | df.iloc[2:,1]=np.nan
56 | df.iloc[4:,2]=np.nan
57 | print(df)
58 | df.fillna(method="ffill")
59 | df.fillna(method="ffill",limit=2)
60 | data=pd.DataFrame({"k1":["one","two"]*3+["two"],"k2":[1,1,2,3,3,4,4]})
61 | print(data)
62 | data.duplicated()
63 | data.drop_duplicates()
64 | data['v1']=range(7)
65 | print(data)
66 | data.drop_duplicates(subset=["k1"])
67 | data.drop_duplicates(["k1","k2"],keep="last")
68 | data=pd.Series([1.,-999.,2,-999.,-1000.,3.])
69 | print(data)
70 | data.replace(-999,np.nan)
71 | data.replace([-999,1000],np.nan)
72 | data.replace([-999,1000],[np.nan,0])
73 | data.replace({-999:np.nan,-1000:0})
74 | data=pd.DataFrame(np.arange(12).reshape((3,4)),index=["Ohio","Colorado","New York"],columnns=["one","two","three","four"])
75 | print(data)
76 | def transform(x):
77 |    return x[:4].upper()
78 | data.index=data.index.map(transform)
79 | print(data)
80 | data.rename(index=str.title,columns=str.upper)
81 | data.rename(index={"OHIO":"INDIANA"},columns={"three":"peekaboo"})
82 | 


--------------------------------------------------------------------------------
/findFUNC.py:
--------------------------------------------------------------------------------
1 | name = "Ramlal is a good  boy and Jonny is a bad boy. "
2 | 
3 | print(name.find("  "))
4 | 


--------------------------------------------------------------------------------
/functions.py:
--------------------------------------------------------------------------------
 1 | #simple function
 2 | def showdata():
 3 | 	print("WELCOME TO CSVTU")
 4 | showdata()
 5 | #function with arguments
 6 | def sum(a,b):
 7 | 	print(a+b)
 8 | sum(10,20)
 9 | sum(40,20)
10 | def sum(a,b=1):
11 | 	print(a+b)
12 | sum(10)
13 | sum(40,20)
14 | #function with return type
15 | def square(x):
16 | 	return x*x,x**2
17 | s=square(5)
18 | print(s)
19 | 


--------------------------------------------------------------------------------
/hollowSquare.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ***
 3 | * *       for n = 3
 4 | ***
 5 | '''
 6 | n = int(input("Enter the number: "))
 7 | for i in range(1, n+1): 
 8 |     if(i==1 or i==n):
 9 |         print("*"* n, end="")
10 |     else:
11 |         print("*", end="")
12 |         print(" "* (n-2), end="")
13 |         print("*", end="")
14 |     print("")
15 | 


--------------------------------------------------------------------------------
/json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | d={
 3 | 		'course_name':'Python',
 4 | 		'fees':15000
 5 | }
 6 | f=json.dumps(d)
 7 | print(type(f))#string
 8 | print(f)
 9 | d='{"cname":"Python","fees":12000,"duration":"2 months"}'
10 | x=json.loads(d)
11 | print(type(x))#dictionary
12 | print(x)
13 | for a in x:
14 | 	print(a,x[a])
15 | #How to read and write JSON file in python
16 | import json 
17 | file=open("posts.json","r")
18 | x=file.read()
19 | finaldata=json.loads(x)
20 | for a in finaldata:
21 | 	print(a)
22 | 	print(a['title'],a['userId'])
23 | 


--------------------------------------------------------------------------------
/letter.py:
--------------------------------------------------------------------------------
1 | letter = '''Dear <|Name|>, 
2 | You are selected! 
3 | <|Date|> '''
4 | 
5 | print(letter.replace("<|Name|>", "Hridyesh Kumar").replace("<|Date|", "24 September 2050"))
6 | 


--------------------------------------------------------------------------------
/listComprehension.py:
--------------------------------------------------------------------------------
 1 | l=[]
 2 | for a in range(1,101):
 3 | 	l.append(a)
 4 | print(l)
 5 | n=[m for m in range(1,101) if m%2==0]
 6 | print(n)
 7 | s="hello"
 8 | d=[g for g in s]
 9 | print(d)
10 | 


--------------------------------------------------------------------------------
/listFunctions.py:
--------------------------------------------------------------------------------
 1 | l=[20,30,50,60]
 2 | print(l)
 3 | #del()=>delete element through index
 4 | del l[1]
 5 | print(l)
 6 | #pop()=>delete element through index and returns the value of deleted element
 7 | print(l.pop(2))
 8 | print(l)
 9 | #remove()=>delete element through value rather than indexing
10 | l.remove(20)
11 | print(l)
12 | #clear()=>it returns a empty list
13 | l.clear()
14 | print(l)
15 | l=[20,30,40,50]
16 | l[0]=90
17 | print(l)
18 | l.insert(0,10)
19 | print(l)
20 | l.append(70)
21 | print(l)
22 | n=[60,80]
23 | l.append(n)
24 | print(l)
25 | l.extend(n)
26 | print(l)
27 | l=[10,20,20,10,30,40,10,50]
28 | a=l.count(10)
29 | print(l)
30 | print(a)
31 | m=max(l)
32 | print(m)
33 | l1=["Hello","World"]
34 | k=max(l1)
35 | print(k)
36 | m=min(l)
37 | print(m)
38 | k=min(l1)
39 | print(k)
40 | l.sort()
41 | print(l)
42 | l.reverse()
43 | print(l)
44 | l1.reverse()
45 | print(l1)
46 | a=l1.index("World")
47 | print(a)
48 | 


--------------------------------------------------------------------------------
/listIteration.py:
--------------------------------------------------------------------------------
 1 | l=[10,20,30,40,60,80,90]
 2 | t=len(l)
 3 | for a in range(t):
 4 | 	print(l[a])
 5 | print("")
 6 | for a in l:
 7 | 	print(a)
 8 | print("")
 9 | for a in range(t-1,-1,-1):
10 | 	print(l[a])
11 | 


--------------------------------------------------------------------------------
/listSlicing.py:
--------------------------------------------------------------------------------
1 | #LIST=>Mutable, and have multiple data types
2 | l=[10,20,30,50,"Hello"]
3 | print(l[3],l[4])
4 | print(l[0:2])
5 | print(l[0: :2])
6 | print(l[3: ])
7 | print(l[-1: :-2])
8 | print(l[-1: :-1])
9 | 


--------------------------------------------------------------------------------
/newfile.py:
--------------------------------------------------------------------------------
 1 | #conditional statements
 2 | x=10
 3 | if x>=10:
 4 | 	print("YES")
 5 | else:
 6 | 	print("NO")
 7 | #Loops
 8 | list_a=[10,20,30,40,50]
 9 | for i in list_a:
10 | 	print(i)
11 | x=5
12 | while x>=0:
13 | 	print(x)
14 | 	x=x-1
15 | #Dictionary
16 | '''A dictionary stores a collection of key-value pairs, where key and value are python objects.
17 | Each key is associated with a value so that a value can be conveniently retrieved, inserted modified or deleted given a particular key.
18 | One approach for creating a dictionary is to use curly braces{}'''
19 | dict={}
20 | print(dict)
21 | d1={"a":"some value","b":[1,2,3,4]}
22 | print(d1)
23 | print(type(d1))
24 | d1[7]="an integer"
25 | print(d1)
26 | d1[5]="some value"
27 | print(d1)
28 | d1["dummy"]="another value"
29 | print(d1)
30 | del d1[5]
31 | print(d1)
32 | print(d1.pop("dummy"))
33 | print(d1)
34 | print(d1.keys())
35 | print(d1.values())
36 | # If we need to iterate over both the keys and values, we can use the items method over the keys and values as 2-tuples
37 | print(list(d1.items()))
38 | d1.update({"b":"fool","c":12})
39 | print(d1)
40 | #Categorize a list of words by their letter as a dictionary of lists
41 | words=["apple","bat","bar","atom","book","cook"]
42 | by_letter={}
43 | for word in words:
44 | 	#print(word)
45 | 	letter=word[0]  #letter=a
46 | 	if letter not in by_letter:
47 | 		by_letter[letter]=[word] #by_letter["a"]=["apple"] by_letter["b"]=["bat"]
48 | 	else:
49 | 		by_letter[letter].append(word) #by_letter["b"].append("bar")
50 | print(by_letter)
51 | # Set
52 | '''A set is an unordered collection of unique elements. A set can be created in two ways via the set function or via the set literal with curly braces{}'''
53 | print(set([2,2,2,3,4,3,4,1,2,5]))
54 | a={1,2,3,4,5}
55 | b={3,4,5,6,7,8}
56 | print(a.union(b))
57 | print(a|b)
58 | print(a.intersection(b))
59 | print(a&b)
60 | # List,Set and Dictionary comprehension
61 | '''List Comprehension are a convenient and widely used python language feature .
62 | It allows us to concisely form a new list by filtering the elements of a collection, transforming the elements passing the filter into one concise expression.
63 | Filter out string with length greater then 2 and convert them to upper case.'''
64 | strings=["a","as","bat","car","dove","python"]
65 | result=[]
66 | for i in strings:
67 | 	if len(i)>2:
68 | 		result.append(i.upper())
69 | print(result)
70 | print([x.upper() for x in strings if len(x)>2])
71 | 


--------------------------------------------------------------------------------
/newfile1.py:
--------------------------------------------------------------------------------
 1 | #"python" or 'python 'both are same
 2 | #tuple is a fixed length,immutable sequence of python objects which,once assigned ,cannot be changed.
 3 | tup_a=(4,5,6)
 4 | print(tup_a)
 5 | print(type(tup_a))
 6 | tup=tuple('string') 
 7 | print(tup)
 8 | print(tup_a)
 9 | #tup_a[0]=20 cannot change value of tuple
10 | print(tup_a[0])
11 | print(tup[0:3]) #last index is excluded
12 | print(tup[:]) #start:stop
13 | print(tup[::2]) #start:stop:step
14 | nested_tup=(4,5,6),(7,8)
15 | print(nested_tup)
16 | print(nested_tup[0])
17 | print(nested_tup[0][1])
18 | tuple=(4,None,'fool')+('bar',)#concatenate tuple
19 | print(tuple)
20 | print(('fool','bar')*4)#multiplying size of tuple
21 | tup=(4,5,6)
22 | a,b,c=tup
23 | print(a,b)
24 | print(a,b,c)
25 | print(tup[::-1])
26 | print(tup[-1])
27 | a=(1,2,2,2,2,3,4,2)
28 | print(a.count(2))
29 | # lists are variable length and their contents can be modified in place.Lists are mutable.We can define them using square brackets[] or using list type function.
30 | list1=[2,3,7,None]
31 | print(list1)
32 | print(type(list1))
33 | gen=range(20)
34 | print(gen)
35 | print(list(range(20)))
36 | # Adding and removing elements
37 | # Elements can be appended to the end of the list with the append method
38 | list2=['fool','peeka','bar']
39 | list2.append('war')
40 | print(list2)
41 | list2.insert(1,'red')
42 | print(list2)
43 | print(list2.pop(2))
44 | print(list2)
45 | list2.remove('fool')
46 | print(list2)
47 | # concatenate
48 | print([4,None,'fool']+[7,8,(2,3)])
49 | x=[4,None,'fool']
50 | x.extend([7,8,(2,3)])
51 | print(x)
52 | a=[7,2,5,1,3]
53 | a.sort()
54 | print(a)
55 | a.sort(reverse=True)
56 | print(a)
57 | b=["saw","small","He","foxes","six"]
58 | b.sort(key=len)
59 | print(b)
60 | seq=[7,2,3,7,5,6,0,1]
61 | print(seq[1:5])
62 | seq[3:5]=[6,3]
63 | print(seq)
64 | print(seq[:5])
65 | print(seq[3:])
66 | print(seq[-1])
67 | print(seq[::2])
68 | print(seq[::-1])
69 | 


--------------------------------------------------------------------------------
/newfile2.py:
--------------------------------------------------------------------------------
  1 | #Nested List Comprehension
  2 | data=[["John","Emily","Michael","Mary","Steven"],["Maria","Juan","Javier","Natalia","Pilar"]]
  3 | print(data)
  4 | #We want to get a single list containing all names with two or more a's in them.
  5 | interest=[]
  6 | for names in data:
  7 | 	enough=[name for name in names if name.count("a")>=2]
  8 | 	interest.extend(enough)
  9 | print(interest)
 10 | result=[name for names in data for name in names if name.count("a")>=2]
 11 | print(result)
 12 | #Indentation is colon mark which is 4 space or a tab
 13 | x=10
 14 | if x>5:
 15 | 	print("X is greater than 5")
 16 | else:
 17 | 	print("X is not greater than 5")
 18 | #FUNCTIONS
 19 | '''Functions are the primary and most important method of code organisation and reuse in python.
 20 | Functions are declared with the def keyword. A function contains  a block of code with an optimal use of the return keyword.'''
 21 | def fun(x,y):
 22 |  	return x+y
 23 | print(fun(10,20))
 24 | result=fun(20,30)
 25 | print(result)
 26 | def fun_with_return(x):
 27 | 	print(x)
 28 | result=fun_with_return("hello")
 29 | print(result)
 30 | def fun_with_return():
 31 | 	print("hello")
 32 | result=fun_with_return()
 33 | print(result)
 34 | #Positional Arguments
 35 | #Keyword Arguments
 36 | def fun(x,y,z=1.5):
 37 | 	if z>1:
 38 | 		return z*(x+y)
 39 | 	else:
 40 | 		return z/(x+y)
 41 | print(fun(5,6))
 42 | print(fun(5,6,z=0.7))
 43 | print(fun(x=10,y=20,z=30))
 44 | print(fun(5,6,0.7))
 45 | a=[]
 46 | def fun():
 47 | 	for i in range(5):
 48 | 		a.append(i)
 49 | print(fun())
 50 | print(a)
 51 | def fun():
 52 | 	global a
 53 | 	a=[]
 54 | 	for i in range(5):
 55 | 		a.append(i)
 56 | fun()
 57 | print(a)
 58 | def f():
 59 | 	a=5
 60 | 	b=6
 61 | 	c=7
 62 | 	return a,b,c
 63 | a,b,c=f()
 64 | print(a,b,c)
 65 | def f():
 66 | 	a=5
 67 | 	b=6
 68 | 	c=7
 69 | 	return {"a":a,"b":b,"c":c}
 70 | print(f())
 71 | states=["  Alabama  ","Georgia!","georgia","Georgia","Florida","south carolina##","West virginia?"]
 72 | import re   # regular expressions
 73 | def clean_strings(strings):
 74 | 	result=[]
 75 | 	for value in strings:
 76 | 		value=value.strip()
 77 | 		value=re.sub("[!#?]","",value)
 78 | 		value=value.title()
 79 | 		result.append(value)
 80 | 	return result
 81 | print(clean_strings(states))
 82 | #Lambda Functions
 83 | '''Python has support for anonymous or lambda functions, which are a way of writing functions consisting of a single statement, result of which is the return value'''
 84 | def short_function(x):
 85 | 	return x*2
 86 | print(short_function(20))
 87 | equiv=lambda x:x*2
 88 | print(equiv(20))
 89 | equiv=lambda x,y:x*y*2
 90 | print(equiv(20,40))
 91 | def apply_to_list(some_list,f):
 92 | 	return [f(x) for x in some_list]
 93 | ints=[4,0,1,5,6]
 94 | print(apply_to_list(ints,lambda x:x*2))
 95 | strings=["foo(","card","bar","aaaa","abab"]
 96 | #sorting based on unique characters=>set(x)
 97 | strings.sort(key=lambda x:len(set(x)))
 98 | print(strings)
 99 | #ERRORS AND EXEPTION HANDLING
100 | print(float("1.2345"))
101 | #print(float("something"))#ValueError
102 | def attempt_float(x):
103 | 	try:
104 | 		return float(x)
105 | 	except:
106 | 		return x
107 | #The code in the except part of the block will only be executed if float(x) raises and exception
108 | print(attempt_float("1.2345"))
109 | print(attempt_float("something"))
110 | #NUMPY
111 | '''NumPy, short for Numerical Python, is one of the most important fundamental packages for numerical computing in python'''
112 | import numpy as np
113 | arr=np.arange(1_000_000)
114 | print(arr)
115 | list=list(range(1_000_000))
116 | print(list[1:10])
117 | %timeit arr2=arr*2
118 | %timeit list2=[x*2 for x in list]
119 | '''One of the key features of NumPy is its N-dimensional array object or n-D array, which is fast, flexible container for large datasets in python.'''
120 | data=np.array([[1.5,0.1,3],[0,-3,6.5]])
121 | print(data)
122 | print(data*10)
123 | print(data+data)
124 | print(data.shape)
125 | print(data.dtype)
126 | print(data.ndim)
127 | data1=[6,7.5,8.0,1]
128 | arr1=np.array(data1)
129 | print(arr1)
130 | print(arr1.ndim)
131 | print(np.zeros(10))
132 | print(np.zeros((3,6)))
133 | print(np.ones((3,6)))
134 | print(np.arange(15))
135 | arr1=np.array([1,2,3],dtype=np.float64)
136 | arr2=np.array([1,2,3],dtype=np.int32)
137 | print(arr1.dtype)
138 | print(arr2.dtype)
139 | arr=np.array([1,2,3,4,5])
140 | print(arr.dtype)
141 | float_arr=arr.astype(np.float64)
142 | print(float_arr.dtype)
143 | print(float_arr)
144 | arr=np.array([1.,2.,3.],[4.,5.,6.])
145 | print(arr)
146 | print(arr*arr)
147 | print(arr-arr)
148 | print(1/arr)
149 | print(arr**2)
150 | arr2=np.array([0.,4.,1.],[7.,2.,12.])
151 | print(arr2)
152 | print(arr2>arr)
153 | arr=np.arange(10)
154 | print(arr)
155 | print(arr[5])
156 | print(arr[5:8])
157 | arr[5:8]=12
158 | print(arr)
159 | arr_slice=arr[5:8]
160 | print(arr_slice)
161 | arr_slice[1]=12345
162 | print(arr_slice)
163 | arr2d=np.array([[1,2,3],[4,5,6],[7,8,9]])
164 | print(arr2d)
165 | print(arr2d[2])
166 | print(arr2d[2][1])
167 | print(arr2d[:2])
168 | print(arr2d[:2,1:])
169 | arr=np.arange(15).reshape((3,5))
170 | print(arr)
171 | arr=np.arange(15).reshape((5,3))
172 | print(arr)
173 | print(arr.T)#Transpose
174 | arr=np.array([[0,1,0],[1,2,-2],[6,3,2],[-1,0,-1],[1,0,1]])
175 | print(arr) 
176 | print(np.dot(arr.T,arr))#Matrix Multiplication
177 | print(arr.T@arr)#Matrix Multiplication
178 | a
179 | 


--------------------------------------------------------------------------------
/newfile3.py:
--------------------------------------------------------------------------------
 1 | #PANDAS
 2 | '''Pandas contains data structures and data manipulation tools designed to make data cleaning and analysis fast and convenient in Python.
 3 | Series and Dataframe
 4 | Series is a one-dimensional array like object containing a sequence of value.'''
 5 | import pandas as pd
 6 | obj=pd.Series([4,7,-5,3])
 7 | print(obj)
 8 | obj2=pd.Series([4,7,-5,3],index=["d","b","a","c"])
 9 | print(obj2)
10 | print(obj2["a"])
11 | obj2["d"]=6
12 | print(obj2)
13 | print(obj2[["c","a","d"]])
14 | obj2=pd.Series([4,7,-5,3,5],index=["d","b","a","a","c"])
15 | print(obj2)
16 | print(obj2[obj2>0])
17 | print(obj2*2)
18 | import numpy as np
19 | np.exp(obj2)
20 | sdata={"Ohio":35000,"Texas}":71000,"Oregon":16000,"Utah":5000}
21 | obj3=pd.Series(sdata)
22 | print(obj3)
23 | print(obj3.to_dict())
24 | #DATA FRAME
25 | '''A DataFrame represents a rectangular table of data and contains an ordered, named collection of columns each of which can be a different value type.
26 | The DataFrame has both a row index and column index.'''
27 | data={"states":["Ohio","Ohio","Ohio","Nevada","Nevada","Nevada"],"year":[2000,2001,2002,2001,2002,2003],"pop":[1.5,1.7,3.6,2.4,2.9,3.2]}
28 | frame=pd.DataFrame(data)
29 | print(frame)
30 | print(frame.head())
31 | print(frame.tail())
32 | print(pd.DataFrame(data,columns=["year","states","pop"]))
33 | frame2=pd.DataFrame(data,columns=["year","states","pop","debt"])
34 | print(frame2)
35 | print(frame2.columns)
36 | print(frame2["states"])
37 | print(frame2.year)
38 | print(frame2[["states","year"]])
39 | print(frame2.loc[1])
40 | print(frame2.iloc[2])
41 | frame2["debt"]=16.5
42 | print(frame2)
43 | frame2["debt"]=np.arange(6.)
44 | print(frame2)
45 | frame2["eastern"]=frame2["states"]=="ohio"
46 | print(frame2)
47 | del frame2["eastern"]
48 | print(frame2.columns)
49 | frame2.index.name="year"
50 | frame2.columns.name="state"
51 | print(frame2)
52 | data=pd.DataFrame(np.arange(16).reshape((4,4)),index=["Ohio","Colorado","Utah","New York"],columns=["one","two","three","four"])
53 | print(data)
54 | print(data["two"])
55 | print(data[["three","one"]])
56 | print(data[data["three"]>5])
57 | print(data.loc["Colorado"])
58 | print(data.loc[["Colorado","New York"],["two","three"]])
59 | print(data.iloc[0:2,0:3])
60 | print(data.loc[data.three>=2])
61 | 


--------------------------------------------------------------------------------
/numpy.py:
--------------------------------------------------------------------------------
 1 | #NUMPY
 2 | '''NumPy, short for Numerical Python, is one of the most important fundamental packages for numerical computing in python'''
 3 | import numpy as np
 4 | arr=np.arange(1_000_000)
 5 | print(arr)
 6 | list=list(range(1_000_000))
 7 | print(list[1:10])
 8 | %timeit arr2=arr*2
 9 | %timeit list2=[x*2 for x in list]
10 | '''One of the key features of NumPy is its N-dimensional array object or n-D array, which is fast, flexible container for large datasets in python.'''
11 | data=np.array([[1.5,0.1,3],[0,-3,6.5]])
12 | print(data)
13 | print(data*10)
14 | print(data+data)
15 | print(data.shape)
16 | print(data.dtype)
17 | print(data.ndim)
18 | data1=[6,7.5,8.0,1]
19 | arr1=np.array(data1)
20 | print(arr1)
21 | print(arr1.ndim)
22 | print(np.zeros(10))
23 | print(np.zeros((3,6)))
24 | print(np.ones((3,6)))
25 | print(np.arange(15))
26 | arr1=np.array([1,2,3],dtype=np.float64)
27 | arr2=np.array([1,2,3],dtype=np.int32)
28 | print(arr1.dtype)
29 | print(arr2.dtype)
30 | arr=np.array([1,2,3,4,5])
31 | print(arr.dtype)
32 | float_arr=arr.astype(np.float64)
33 | print(float_arr.dtype)
34 | print(float_arr)
35 | arr=np.array([1.,2.,3.],[4.,5.,6.])
36 | print(arr)
37 | print(arr*arr)
38 | print(arr-arr)
39 | print(1/arr)
40 | print(arr**2)
41 | arr2=np.array([0.,4.,1.],[7.,2.,12.])
42 | print(arr2)
43 | print(arr2>arr)
44 | arr=np.arange(10)
45 | print(arr)
46 | print(arr[5])
47 | print(arr[5:8])
48 | arr[5:8]=12
49 | print(arr)
50 | arr_slice=arr[5:8]
51 | print(arr_slice)
52 | arr_slice[1]=12345
53 | print(arr_slice)
54 | arr2d=np.array([[1,2,3],[4,5,6],[7,8,9]])
55 | print(arr2d)
56 | print(arr2d[2])
57 | print(arr2d[2][1])
58 | print(arr2d[:2])
59 | print(arr2d[:2,1:])
60 | arr=np.arange(15).reshape((3,5))
61 | print(arr)
62 | arr=np.arange(15).reshape((5,3))
63 | print(arr)
64 | print(arr.T)#Transpose
65 | arr=np.array([[0,1,0],[1,2,-2],[6,3,2],[-1,0,-1],[1,0,1]])
66 | print(arr)
67 | print(np.dot(arr.T,arr))#Matrix Multiplication
68 | print(arr.T@arr)#Matrix Multiplication


--------------------------------------------------------------------------------
/pandas.py:
--------------------------------------------------------------------------------
 1 | #PANDAS
 2 | '''Pandas contains data structures and data manipulation tools designed to make data cleaning and analysis fast and convenient in Python.
 3 | Series and Dataframe
 4 | Series is a one-dimensional array like object containing a sequence of value.'''
 5 | import pandas as pd
 6 | obj=pd.Series([4,7,-5,3])
 7 | print(obj)
 8 | obj2=pd.Series([4,7,-5,3],index=["d","b","a","c"])
 9 | print(obj2)
10 | print(obj2["a"])
11 | obj2["d"]=6
12 | print(obj2)
13 | print(obj2[["c","a","d"]])
14 | obj2=pd.Series([4,7,-5,3,5],index=["d","b","a","a","c"])
15 | print(obj2)
16 | print(obj2[obj2>0])
17 | print(obj2*2)
18 | import numpy as np
19 | np.exp(obj2)
20 | sdata={"Ohio":35000,"Texas}":71000,"Oregon":16000,"Utah":5000}
21 | obj3=pd.Series(sdata)
22 | print(obj3)
23 | print(obj3.to_dict())
24 | #DATA FRAME
25 | '''A DataFrame represents a rectangular table of data and contains an ordered, named collection of columns each of which can be a different value type.
26 | The DataFrame has both a row index and column index.'''
27 | data={"states":["Ohio","Ohio","Ohio","Nevada","Nevada","Nevada"],"year":[2000,2001,2002,2001,2002,2003],"pop":[1.5,1.7,3.6,2.4,2.9,3.2]}
28 | frame=pd.DataFrame(data)
29 | print(frame)
30 | print(frame.head())
31 | print(frame.tail())
32 | print(pd.DataFrame(data,columns=["year","states","pop"]))
33 | frame2=pd.DataFrame(data,columns=["year","states","pop","debt"])
34 | print(frame2)
35 | print(frame2.columns)
36 | print(frame2["states"])
37 | print(frame2.year)
38 | print(frame2[["states","year"]])
39 | print(frame2.loc[1])
40 | print(frame2.iloc[2])
41 | frame2["debt"]=16.5
42 | print(frame2)
43 | frame2["debt"]=np.arange(6.)
44 | print(frame2)
45 | frame2["eastern"]=frame2["states"]=="ohio"
46 | print(frame2)
47 | del frame2["eastern"]
48 | print(frame2.columns)
49 | frame2.index.name="year"
50 | frame2.columns.name="state"
51 | print(frame2)
52 | data=pd.DataFrame(np.arange(16).reshape((4,4)),index=["Ohio","Colorado","Utah","New York"],columns=["one","two","three","four"])
53 | print(data)
54 | print(data["two"])
55 | print(data[["three","one"]])
56 | print(data[data["three"]>5])
57 | print(data.loc["Colorado"])
58 | print(data.loc[["Colorado","New York"],["two","three"]])
59 | print(data.iloc[0:2,0:3])
60 | print(data.loc[data.three>=2])


--------------------------------------------------------------------------------
/primeornot.py:
--------------------------------------------------------------------------------
 1 | number=int(input("Enter the number="))
 2 | if number>1:
 3 |     for i in range (2,int(number/2)+1):
 4 |         if (number%i)==0:
 5 |             print(number,"is not a prime number")
 6 |             break
 7 |     else:
 8 |         print(number,"is a prime number")
 9 | else:
10 |     print(number,"is not a prime number")


--------------------------------------------------------------------------------
/printingPoem.py:
--------------------------------------------------------------------------------
 1 | print(''' Twinkle, twinkle, little star,
 2 | How I wonder what you are!
 3 | Up above the world so high,
 4 | Like a diamond in the sky.
 5 | 
 6 | When the blazing sun is gone,
 7 | When he nothing shines upon,
 8 | Then you show your little light,
 9 | Twinkle, twinkle, all the night.
10 | 
11 | Then the trav'ller in the dark,
12 | Thanks you for your tiny spark,
13 | He could not see which way to go,
14 | If you did not twinkle so.
15 | 
16 | In the dark blue sky you keep,
17 | And often thro' my curtains peep,
18 | For you never shut your eye,
19 | Till the sun is in the sky.
20 | 
21 | 'Tis your bright and tiny spark,
22 | Lights the trav'ller in the dark:
23 | Tho' I know not what you are,
24 | Twinkle, twinkle, little star.''')
25 | 


--------------------------------------------------------------------------------
/project1.py:
--------------------------------------------------------------------------------
  1 | #Project on Regression and Random Forest Regression
  2 | #Regression Problems in Machine Learning 
  3 | '''Machine Learning is a branch of Artificial Intelligence that enables computer programs to automatically learn and improve from experience.
  4 | Machine Learning Algorithms learn from datasets and then based on the patterns identified from the datasets make predictions on unseen data.
  5 | ML algorithms can be broadly categorized into two types:
  6 | 1. Supervised Learning 
  7 | 2. Unsupervised Learning
  8 | Supervised ML algorithms are those algorithms where the input dataset and the corresponding output or true prediction is available and the algorithms try to find the relationship between inputs and outputs.
  9 | In unsupervised ML algorithms, the true labels for the outputs are not known. Rather, the algorithms try to find similar patterns in the data. E.g., Clustering.
 10 | Supervised learning algorithms are further divided into two types:
 11 | 1. Regression Algorithms
 12 | 2. Classification Algorithms
 13 | Regression algorithms predict a continuous value e.g.,the price of a house.
 14 | Classification algorithms predict a discrete value e.g., whether a incoming email is Spam/Ham.'''
 15 | 
 16 | import pandas as pd 
 17 | import numpy as np 
 18 | import seaborn as sns
 19 | #sns.get_dataset_names()
 20 | #Importing the dataset and printing the dataset header
 21 | tips_df=sns.load_dataset("tips")
 22 | tips_df.head()
 23 | 
 24 | '''We will be using machine learning algorithms to predict the tip for a particular record based on the remaining features such as total_bill, gender, day, time etc.
 25 | Dividing Data into Features and Labels'''
 26 | x=tips_df.drop(['tip'],axis=1)
 27 | y=tips_df["tip"]
 28 | x.head()
 29 | y.head()
 30 | 
 31 | #Converting Categorical Data to Numbers
 32 | '''ML Algorithms can only work with numbers. It is important to convert categorical data into a numeric format'''
 33 | #Numeric Variables
 34 | numerical=x.drop(['sex','smoker','day','time'],axis=1)
 35 | numerical.head()
 36 | #DataFrame that contains only categorical columns
 37 | categorical=x.filter(['sex','smoker','day','time'])
 38 | categorical.head()
 39 | categorical["day"].value_counts()
 40 | 
 41 | '''One of the most common approaches to convert a categorical column to a numeric one is via one-hot encoding.
 42 | In one-hot encoding, for every unique value in the original columns, anew column is created.'''
 43 | 
 44 | cat_numerical=pd.get_dummies(categorical)
 45 | cat_numerical.head()
 46 | '''The final step is to join the numerical columns with the one-hot encoded columns.'''
 47 | x=pd.concat([numerical,cat_numerical],axis=1)
 48 | x.head()
 49 | #Divide Data into Training and Test Sets
 50 | '''We divide the dataset into two sets i.e., train and test set.
 51 | The dataset is trained via the train set and evaluated on the test set.'''
 52 | 
 53 | from sklearn.model_selection import train_test_split
 54 | x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20,random_state=0)
 55 | 
 56 | #Data Scaling/Normalization
 57 | '''The final step before data is passed to ML algorithm is to scale the data.
 58 | Some columns of the dataset contain small values, while the others contain very large values.It is better to convert all values to a uniform scale.'''
 59 | 
 60 | from sklearn.preprocessing import StandardScaler
 61 | sc=StandardScaler()
 62 | x_train=sc.fit_transform(x_train)
 63 | x_test=sc.transform(x_test)
 64 | '''We have converted data into a format that can be sured to train ML algorithms for regression.'''
 65 | 
 66 | #Linear Regression 
 67 | '''Linear Regression is a linear model that assumes a linear relationship between inputs and outputs and minimizes the cost of error between the predicted and actual output using functions like mean absolute error.'''
 68 | 
 69 | #Advantages
 70 | '''Linear Regression is a simple to implement and easily interpretable algorithm.
 71 | It takes less time to train, even for huge datasets.
 72 | Linear Regression coefficients are easy to interpret.
 73 | Importing Linear Regression model from sklearn.'''
 74 | 
 75 | from sklearn.linear_model import LinearRegression 
 76 | lin_reg=LinearRegression()
 77 | regressor=lin_reg.fit(X_train,y_train)
 78 | y_pred=regressor.predict(X_test)
 79 | 
 80 | '''Once you have trained a model and have made predictions on the test set, the next step is to know how well your model has performed for making predictions on the unknown test set.
 81 | There are various metrics to check that.
 82 | Mean Absolute Error (MAE) is calculated by taking the average of absolute error obtained by subtracting real values from predicted values.
 83 | Mean Squared Error (MSE) is similar to MAE. However, the error for each record is squared in case of MSE.
 84 | Root Mean Squared Error (RMSE) is the under root of mean squared error.'''
 85 | 
 86 | from sklearn import metrics
 87 | print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,y_pred))
 88 | print('Mean Squared Error:',metrics.mean_squared_error(y_test,y_pred))
 89 | print('Root Mean Squared Error:',metrics.mean_squared_error(y_test,y_pred))
 90 | 
 91 | '''By looking at the MAE, it can be concluded that, on average there is an error of 0.70 for predictions, which means that on average there is an error of 0.70 for predictions, which means that on average, the predicted tip values are 0.70$ more or less than the actual tip values.'''
 92 | 
 93 | #Random Forest Regression
 94 | '''Random Forest Regression is tree-based algorithm.
 95 | Ensemble modelling technique.'''
 96 | #Advantages
 97 | '''You have lots of missing data or imbalance dataset (0(200) and 1(1000)).
 98 | With a large number of trees or models, you can avoid overfitting. Overfitting occurs when ML models performs better on the training set but worse on the test set.'''
 99 | 
100 | from sklearn.ensemble import RandomForestRegressor
101 | rf_reg=RandomForestRegressor(random_state=42,n_estimators=500)
102 | regressor=rf_reg.fit(X_train,y_train)
103 | y_pred=regressor.predict(X_test)
104 | from sklearn import metrics
105 | print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,y_pred))
106 | print('Mean Squared Error:',metrics.mean_squared_error(y_test,y_pred))
107 | print('Root Mean Squared Error:',np.sqrt(metrics.mean_squared_error(y_test,y_pred)))
108 | 
109 | 


--------------------------------------------------------------------------------
/project2.py:
--------------------------------------------------------------------------------
  1 | #Project on Logistic Regression & Clustering
  2 | #Classification Problems in Machine Learning 
  3 | '''Classification problems are the type of problems where you have to predict a deiscrete value i.e., whether the student will pass the exam or not.'''
  4 | 
  5 | import pandas as pd 
  6 | import numpy as np
  7 | #importing the dataset
  8 | churn_df=pd.read_csv("Churn_Modelling.csv")
  9 | churn_df.head()
 10 | 
 11 | '''The exited column contains information regarding whether or not the customer exited the bank after six months.'''
 12 | 
 13 | #Removing unnecessary columns
 14 | churn_df=churn_df.drop(['RowNumber','CustomerId','Surname'],axis=1)
 15 | churn_df.head()
 16 | #Dividing Data into Features and Labels
 17 | X=churn_df.drop(['Exited'],axis=1)
 18 | y=churn_df['Exited']
 19 | X.head()
 20 | y.head()
 21 | #Converting Categorical Data to Numbers
 22 | numerical=X.drop(['Geography','Gender'],axis=1)
 23 | numerical.head()
 24 | categorical=X.filter(['Geography','Gender'])
 25 | categorical.head()
 26 | cat_numerical=pd.get_dummies(categorical)
 27 | cat_numerical.head()
 28 | X=pd.concat([numerical,cat_numerical],axis=1)
 29 | X.head()
 30 | #Dividing Data into Training and Test Sets
 31 | from sklearn.model_selection import train_test_split
 32 | X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=0)
 33 | 
 34 | #Data Scaling/Normalization
 35 | from sklearn.preprocessing import StandardScaler
 36 | sc=StandardScaler()
 37 | X_train=sc.fit_transform(X_train)
 38 | X_test=sc.transform(X_test)
 39 | 
 40 | '''Binary Classification problems are those classification problems where there are only two possible values for the output level.
 41 | Whether a customer will leave the bank after a certain period or not.'''
 42 | 
 43 | #Logistic Regression 
 44 | '''Logistic Regression is a linear model, which makes classification by passing the output of linear regression through a sigmoid function.
 45 | Importing logistic regression classifier from sklearn'''
 46 |  
 47 | from sklearn.linear_model import LogisticRegression
 48 | log_clf=LogisticRegression()
 49 | classifier=log_clf.fit(X_train,y_train)
 50 | y_pred=classifier.predict(X_test)
 51 | 
 52 | '''There are various metrics to evaluate a classification method.
 53 | Some of the most commonly used classification metrics are F1 score, recall, precision, accuracy and confusion matrix.
 54 | True Negatives(TN/tn):True Negatives are those output labels that are actually false and the model also predicted them as false.
 55 | True Positives(TP/tp):True Positives are those output labels that are actually true and the model also predicted them as true.
 56 | False Negatives(FN/fn):False Negatives are those output labels that are actually true but the model predicted them as false.
 57 | False Positives(FP/fp):False Positives are those output labels that are actually false but the model also predicted them as true.'''
 58 | 
 59 | #Precision
 60 | '''It is obtained by dividing true positives by the sum of true positive and false positive.
 61 | Precision=tp/(tp+fp)'''
 62 | #Recall
 63 | '''It is obtained by dividing true positives by the sum of true positives and false negatives.
 64 | Recall=tp/(tp+fn)'''
 65 | 
 66 | #Evaluating the algorithm on the test set
 67 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
 68 | print(confusion_matrix(y_test,y_pred))
 69 | print(classification_report(y_test,y_pred))
 70 | print(accuracy_score(y_test,y_pred))
 71 | 
 72 | #Random Forest Classifier
 73 | from sklearn.ensemble import RandomForestClassifier
 74 | rf_clf=RandomForestClassifier(random_state=42,n_estimators=500)
 75 | classifiers=rf_clf.fit(X_train,y_train)
 76 | y_pred=classifier.predict(X_test)
 77 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
 78 | print(confusion_matrix(y_test,y_pred))
 79 | print(classification_report(y_test,y_pred))
 80 | print(accuracy_score(y_test,y_pred))
 81 | 
 82 | #Clustering
 83 | '''Clustering algorithms are unsupervised algorithms where the training data is not labeled.
 84 | Rather, the algorithms cluster or group the datasets based on common characteristics.'''
 85 | 
 86 | #K-Means Clustering
 87 | '''K-Means Clustering is one of the most commonly used algorithms for clustering, K refers to the number of clusters that you want your data to be grouped into.
 88 | In K-Means clustering, the number of clusters has to be defined before K clustering can be applied to the data points.'''
 89 | 
 90 | #Steps for K-Means Clustering
 91 | '''1.Randomly assign centroid values for each cluster.
 92 | 2.Calculate the euclidean distance between each data point and centroid values of all the clusters.
 93 | 3.Assign the data point to the cluster of the centroid with the shortest distance.
 94 | 4.Calculate and update centroid values based on the mean values of the coordinates of all the data points of the corresponding cluster.
 95 | 5.Repeat steps 2-4 until new centroid values for all the clusters are different from the previous centroid values.'''
 96 | 
 97 | import numpy as np
 98 | import pandas as pd 
 99 | from sklearn.cluster import KMeans
100 | import matplotlib.pyplot as plt
101 | 
102 | #Customer Segmentation using K-Means Clustering
103 | '''In this project, you will see how to segment customers based on their incomes and past spending habits.
104 | You will then identify customers who have high incomes and higher spending.'''
105 | 
106 | dataset=pd.read_csv("Mall_Customers.csv")
107 | dataset.head()
108 | '''The output shows that the dataset contains 200 records and 5 cloumns.
109 | Plotting the histogram for the annual income column.'''
110 | import warnings
111 | warnings.filterwarnings("ignore")
112 | sns.distplot(dataset["Annual Income (k$)"],kde=False,bins=50)
113 | '''The output shows that most of the customers have incomes between 60 and 90K per year.
114 | Plotting the histogram for the spending score column.'''
115 | sns.distplot(dataset["Spending Score (1-100)"],kde=False,bins=50,color="red")
116 | '''The output shows that most of the customers have a spending score between 40 and 60.
117 | Plotting regression plot for annual income against spending score.'''
118 | sns.regplot(x="Annual Income (k$)",y="Spending Score (1-100)",data=dataset)
119 | '''There is no linear relationship between annual income and spending.
120 | Plotting regression plot for age and spending score'''
121 | sns.regplot(x="Age",y="Spending Score (1-100)",data=dataset)
122 | '''The output confirms an inverse linear relationship between age and spending score.
123 | Young people have higher spending compared to older people.'''
124 | dataset=dataset.filter(["Annual Income(k$)","Spending Score (1-100)"],axis=1)
125 | dataset.head()
126 | km_model=KMeans(n_clusters=4)
127 | km_model.fit(dataset)
128 | print(km_model.cluster_centers_)
129 | print(km_model.labels_)
130 | plt.scatter(dataset.values[:,0],dataset.values[:,1],c=km_model.labels_,cmap='rainbow')
131 | plt.scatter(km_model.cluster_centers_[:,0],km_model.cluster_centers_[:,1],s=100,c='black')
132 | #Elbow method to get the optimal number of cluaters
133 | loss=[]
134 | for i in range(1,11):
135 |    km=KMeans(n_clusters=i).fit(dataset)
136 |    loss.append(km.inertia_)
137 | plt.plot(range(1,11),loss)
138 | plt.title('Finding optimal number of vlusters via elbow method')
139 | plt.xlabel('Number of clusters')
140 | plt.ylabel('loss')
141 | plt.show() 
142 | km_model=KMeans(n_clusters=5)
143 | km_model.fit(dataset)
144 | print(km_model.cluster_centers_)
145 | print(km_model.labels_)
146 | plt.scatter(dataset.values[:,0],dataset.values[:,1],c=km_model.labels_,cmap='rainbow')
147 | plt.scatter(km_model.cluster_centers_[:,0],km_model.cluster_centers_[:,1],s=100,c='black')
148 | #Filtering all records with cluster id 1
149 | cluster_map=pd.DataFrame()
150 | cluster_map['data_indx']=dataset.index.values
151 | cluster_map['cluster']=km_model.labels_
152 | print(cluster_map)
153 | cluster_map=cluster_map[cluster_map.clusters==1]
154 | cluster_map.head()
155 | '''These are the customers who have high incomes and high spending and these customers should be targeted during marketing campaigns.'''
156 | 


--------------------------------------------------------------------------------
/project3onCNN.py:
--------------------------------------------------------------------------------
 1 | #Project on Image Classification using Convolutional Neural Networks
 2 | #Convolutional Neural Networks
 3 | '''A Convolutional Neural Network (CNN) is a type of artificial neural network that is used in image recognition and processing that is specifically designed to process pixel data.'''
 4 | #CNN Model on MNIST Dataset for written digit classification
 5 | '''MNIST Dataset is the handwritten numbers taken as images. All images are grey scale.'''
 6 | from keras.datasets import mnist
 7 | #from keras.preprocessing.image import load_img, array_to_img
 8 | from tensorflow.keras.utils import to_categorical
 9 | from keras.models import Sequential
10 | from keras.layers import Dense
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | #Load the data
14 | (X_train,y_train),(X_test,y_test)=mnist.load_data()
15 | print(X_train.shape)
16 | print(y_train.shape)
17 | print(X_test.shape)
18 | print(y_test.shape)
19 | #Understand the image format
20 | X_train[0].shape
21 | plt.imshow(X_train[0],cmap="gray")
22 | y_train[0]
23 | #Preprocessing the image data
24 | image_height,image_width=28,28
25 | X_train=X_train.reshape(60000,image_height*image_width)
26 | X_test=X_test.reshape(10000,image_height*image_width)
27 | print(X_train.shape)
28 | print(X_test.shape)
29 | print(X_train[0])
30 | X_train=X_train.astype('float32')
31 | X_test=X_test.astype('float32')
32 | X_train/=255.0
33 | X_test/=255.0
34 | print(X_train[0])
35 | print(y_train.shape)
36 | print(y_test.shape)
37 | '''Converting the target value into 10 bins. So, we will see that the output from a model will then go into one of these bins.'''
38 | y_train=to_categorical(y_train,10)
39 | y_test=to_categorical(y_test,10)
40 | print(y_train.shape)
41 | print(y_test.shape)
42 | print(y_train[0])
43 | #Building the model
44 | model=Sequential()
45 | model.add(Dense(512,activation='relu',input_shape=(784,)))
46 | model.add(Dense(512,activation='relu'))
47 | model.add(Dense(10,activation="softmax"))
48 | #Compile the model
49 | model.compile(optimizer="adam",loss='categorical_crossentropy',metrics=["accuracy"])
50 | model.summary()
51 | history=model.fit(X_train,y_train,epochs=20,validation_data=(X_test,y_test))
52 | plt.plot(history.history['accuracy'])
53 | #Evaluating the model
54 | score=model.evaluate(X_test,y_test)
55 | '''In neural networks, we only have fully connected layer, otherwise known as dense layer. With Convolutional Neural Networks, we have more operations such as the convolution operation, max pooling, flattening and also a fully connected layer.'''
56 | from keras.layers import Conv2D, MaxPooling2D,Flatten,Dense
57 | from keras.models import Sequential
58 | from keras.datasets import mnist
59 | from tensorflow.keras.utils import to_categorical
60 | (X_train,y_train),(X_test,y_test)=mnist.load_data()
61 | print(X_train.shape)
62 | print(X_test.shape)
63 | print(y_train.shape)
64 | print(y_test.shape)
65 | X_train=X_train.reshape(60000,28,28,1)
66 | X_test=X_test.reshape(10000,28,28,1)
67 | X_train=X_train.astype('float32')
68 | X_test=X_test.astype('float32')
69 | X_train/=255.0
70 | X_test/=255.0
71 | y_train=to_categorical(y_train,10)
72 | y_test=to_categorical(y_test,10)
73 | print(X_train.shape)
74 | print(X_test.shape)
75 | print(y_train.shape)
76 | print(y_test.shape)
77 | #CNN Model Development
78 | cnn=Sequential()
79 | cnn.add(Conv2D(32,kernal_size=(3,3),input_size=(28,28,1),padding='same',activation='relu'))
80 | cnn.add(MaxPooling2D())
81 | cnn.add(Conv2D(32,kernal_size=(3,3),padding='same',activation='relu'))
82 | cnn.add(MaxPooling2D())
83 | cnn.add(Flatten())
84 | cnn.add(Dense(64,activation='relu'))
85 | cnn.add(Dense(10,activation='softmax'))
86 | cnn.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
87 | print(cnn.summary())
88 | history_cnn=cnn.fit(X_train,y_train,epochs=12,verbose=1,validation_data=(X_train,y_train))
89 | plt.plot(history_cnn.history['accuracy'])
90 | plt.plot(history_cnn.history['val_accuracy'])


--------------------------------------------------------------------------------
/project4onNLP.py:
--------------------------------------------------------------------------------
  1 | #Project on Spam/Ham Classification using NLP
  2 | #Natural Language Processing- NLP
  3 | '''NLP is a field concerned with the ability of a computer to understand, analyze, manipulate and potentially generate human language.
  4 | NLP is a broad umbrella that encompasses many topics. Some of them are sentiment analysis, topic modelling, text classification etc.
  5 | NLTK:- Natural Language ToolKit: The NLTK is the most utilized package for handling natural language processing tasks. It is an open source library.'''
  6 | #Spam/Ham Classification using Natural Language Processing
  7 | #pip install NLTK
  8 | import nltk
  9 | import pandas as pd
 10 | import numpy as np
 11 | dataset=pd.read_csv("SMSSpamCollection.tsv",sep="\t",header=None)
 12 | dataset.columns=['label','body_txt']
 13 | dataset.head()
 14 | dataset['body_txt'][0]
 15 | dataset['body_txt'][1]
 16 | #What is the shape of the data
 17 | print("Input data has {} rows and {} columns".format(len(dataset),len(dataset.columns)))
 18 | #How many Spam/Ham are there
 19 | print("Out of {} rows,{} are spam and {} are ham".format(len(dataset),len(dataset[dataset['label']=='spam']),len(dataset[dataset['labal']=='ham'])))
 20 | #How much missing data is there
 21 | print("Number of null in label: {}".format(dataset['label'].isnull().sum()))
 22 | print("Number of null in text: {}".format(dataset['body_text'].isnull().sum()))
 23 | '''Preprocessing text data:- Cleaning up the text data is necessary to highlight attributes that you are going to use in ML algorithms.
 24 | Cleaning or preprocessing the data consists of a number of steps.
 25 | Remove Punctuation
 26 | Tokenization 
 27 | Remove Stopwords
 28 | Lemmatize/Stemming'''
 29 | import string 
 30 | string.punctuation
 31 | def remove_punct(text):
 32 |    text_nopunct="".join([char for char in text if char not in string.punctuation])
 33 |    return text_nopunct
 34 | dataset['body_txt_clean']=dataset['body_text'].apply(lambda x:remove_punct(x))
 35 | dataset.head()
 36 | #Tokenization
 37 | '''Tokenizing is splitting some string or sentence into a list of words'''
 38 | import re
 39 | def tokenize(text):
 40 |    tokens=re.split('\W',text)
 41 |    return tokens
 42 | dataset['body_text_tokenized']=dataset['body_text_clean'].apply(lambda x:tokenize(x.lower()))
 43 | dataset.head()
 44 | '''Remove Stopwords:- These are commonly used words like the, and, but,if that don't contribute much to the meaning of a sentence.'''
 45 | stopwords=nltk.corpus.stopwords.words('english')
 46 | def remove_stopwords(tokenized_list):
 47 |    text=[word for word in tokenized_list if word not in stopwords]
 48 |    return text
 49 | dataset['body_text_nostop']=dataset['body_text_tokenized'].apply(lambda x:remove_stopwords(x))
 50 | dataset.head()
 51 | '''Stemming:- Stemming is the process of reducing inflected or derived words to their stem or root.'''
 52 | ps=nltk.PorterStemmer()
 53 | def stemming(tokenized_text):
 54 |    text=[ps.stem(word) for word in tokenized_text]
 55 |    return text
 56 | dataset['body_text_stemmed']=dataset['body_text_nostop'].apply(lambda x:stemming(x))
 57 | dataset.head()
 58 | '''Lemmatization:- It is the process of grouping together the inflected forms of a word so they can be analysed as a single term, identified by the word's lemma.For e.g. type, typing and typed are forms of the same lemma type.'''
 59 | wn=nltk.WordNetLemmatizer()
 60 | def lemmatizing(tokenized_text):
 61 |    text=[wn.lemmatize(word) for word in tokenized_text]
 62 |    return text
 63 | dataset['body_text_lemmatized']=dataset['body_text_nostop'].apply(lambda x:lemmatizing(x))
 64 | dataset.head()
 65 | '''Vectorization:- This is defined as the process of encoding text as integers to create feature vectors. In out ontext we will be taking individual text messages and converting it to a numeric vector that represents that text message.
 66 | Count Vectorization:- This creates a document-term matrix where the entry of each cell will be a count of the number of times that word occured in that document.'''
 67 | from sklearn.feature_extraction.text import CountVectorizer
 68 | def clean_text(text):
 69 |    text="".join([word.lower() for word in text if word not in string.punctuation])
 70 |    tokens=re.split('\W',text)
 71 |    text=[ps.stem(word) for word in tokens if word not in stopwords]
 72 |    return text
 73 | count_vect=CountVectorizer(analyzer=clean_text)
 74 | X_count=count_vect.fit_transform(dataset['body_text'])
 75 | print(X_count.shape)
 76 | #Apply count vectorizer to a smaller sample
 77 | data_sample=dataset[0:20]
 78 | count_vect_sample=CountVectorizer(analyzer=clean_text)
 79 | X_count_sample=count_vect_sample.fit_transform(data_sample['body_text'])
 80 | print(X_count_sample.shape)
 81 | '''Sparse Matrix:- A matrix in which most entries are 0. In the interest of efficient storage, a sparse matrix will be stored by only storing the locations of the non-zero elements.'''
 82 | print(X_count_sample)
 83 | X_counts_df=pd.DataFrame(X_count_sample.toarray())
 84 | print(X_counts_df)
 85 | '''TF-IDF(Term Frequency,Inverse Document Frequency):- Creates a document term matrix where the column represents single unique terms(unirams) but the cell represents a weighting meant to represent how important a word is to a document.'''
 86 | from sklearn .feature_extraction.text import TfidfVectorizer
 87 | tfidf_vect=TfidfVectorizer(analyzer=clean_text)
 88 | X_tfidf=tfidf_vect.fit_transform(dataset['body_text'])
 89 | print(X_tfidf.shape)
 90 | #Apply TfidfVectorizer to a smaller sample
 91 | data_sample=dataset[0:20]
 92 | tfidf_vect_sample=TfidfVectorizer(analyzer=clean_text)
 93 | X_tfidf_sample=tfidf_vect_sample.fit_transform(data_sample['body_text'])
 94 | print(X_tfidf_sample.shape)
 95 | X_tfidf_df=pd.DataFrame(X_tfidf_sample.toarray())
 96 | X_tfidf_df.columns=tfidf_vect_sample.get_feature_names()
 97 | print(X_tfidf_df)
 98 | 
 99 | #Feature Engineering: Feature Creation
100 | dataset=pd.read_csv("SMSSpamCollection.tsv",sep="\t",header=None)
101 | dataset.columns=['label','body_text']
102 | dataset.head()
103 | #Create feature for text message length
104 | dataset['body_len']=dataset['body_text'].apply(lambda x:len(x)-x.count(" "))
105 | dataset.head()
106 | #create feature for % of text that is punctuation
107 | def count_punct(text):
108 |    count=sum([1 for char in text if char in string.punctuation])
109 |    return round(count/(len(text)-text.count(" ")),3)*100
110 | dataset['punct%']=dataset['body_text'].apply(lambda x:count_punct(x))
111 | dataset.head()
112 | import matplotlib.pyplot as plt 
113 | import numpy as np
114 | bins=np.linspace(0,200,40)
115 | plt.hist(dataset['body_len'],bins)
116 | plt.title('Body Length Distribution')
117 | plt.show()
118 | bins=np.linspace(0,50,40)
119 | plt.hist(dataset['punct%'],bins)
120 | plt.title('Punctuation % Distribution')
121 | plt.show()
122 | 
123 | #Building Machine Learning Classifiers using Random Forest Model
124 | import nltk
125 | import pandas as pd
126 | import re
127 | from sklearn.feature_extraction.text import TfidfVectorizer
128 | import string
129 | dataset=pd.read_csv("SMSSpamCollection.tsv",sep="\t",header=None)
130 | dataset.columns=['label','body_text']
131 | dataset.head()
132 | def count_punct(text):
133 |    count=sum([1 for char in text if char in string.punctuation])
134 |    return round(count/(len(text)-text.count(" ")),3)*100
135 | dataset['punct%']=dataset['body_text'].apply(lambda x:count_punct(x))
136 | dataset['body_len']=dataset['body_text'].apply(lambda x:len(x)-x.count(" "))
137 | dataset.head()
138 | def clean_text(text):
139 |    text="".join([word.lower() for word in text if word not in string.punctuation])
140 |    tokens=re.split('\W',text)
141 |    text=[ps.stem(word) for word in tokens if word not in stopwords]
142 |    return text
143 | tfidf_vect=TfidfVectorizer(analyzer=clean_text)
144 | X_tfidf=tfidf_vect.fit_transform(dataset['body_text'])
145 | X_feaures=pd.concat([datset['body_len'],dataset['punct%'],pd.DataFrame(X_tfidf.toarray())],axis=1)
146 | X_feaures.head()
147 | 
148 | #Model using K-Fold cross validation
149 | from sklearn.ensemble import RandomForestClassifer
150 | from sklearn.model_selection import KFold, cross_val_score
151 | rf=RandomForestClassifier(n_jobs=1)
152 | k_fold=KFold(n_splits=5)
153 | cross_val_score(rf,X_features,dataset['label'],cv=k_fold,scoring='accuracy',n_jobs=1)
154 | 
155 | #Model using Train Test Split
156 | from sklearn.metrics import precision_recall_fscore_support as score
157 | from sklearn.model_selection import train_test_split
158 | X_train, X_test, y_train, y_test=train_test_split(X_features, dataset['label'],test_size=0.3,random_state=0)
159 | rf=RandomForestClassifier(n_estimators=500,max_depth=20,n_jobs=-1)
160 | rf_model=rf.fit(X_train,y_train)
161 | sorted(zip(rf_model.feature_importances_,X_train.columns),reverse=True)[0:10]
162 | y_pred=rf_model.predict(X_test)
163 | precision,recall,fscore,support=score(y_test,y_pred,pos_label='spam',average='binary')
164 | print('Precision {} / Recall {} /Acccuracy {}'.format(round(precision,3),round(recall,3),round((y_pred==y_test).sum()/len(y_pred),3)))
165 | 


--------------------------------------------------------------------------------
/project5onRecommendation.py:
--------------------------------------------------------------------------------
  1 | #Movie Recommender System
  2 | '''Recommender Systems, also labelled as recommendation systems, are statistical algorithms that recommend products to users based on similarities between the buying trends of various user or similarities between the products.
  3 | 
  4 | Collaborative Filtering:- The process used to calculate similaritiies between the buying trends of various users or similarities between products is called collaborative filtering.
  5 | 
  6 | User based collaborative filtering:- If two user X and Y, like products A and B and there is another user Z who likes product A, then the product B will also be recommended to user Z.
  7 | 
  8 | Item-based collaborative filtering:- Inthis products are recommended based on similarities between themselves. For instance if a user likes product A and product A has properties X and Y will be recommended to the user.'''
  9 | 
 10 | import numpy as np 
 11 | import pandas as pd
 12 | import matplotlib.pyplot as plt
 13 | import seaborn as sns
 14 | '''The dataset contains around 100,000 movie reviews applied to 9,000 movies by 600 users.'''
 15 | movie_ids_titles=pd.read_csv("movies.csv")
 16 | movie_ids_titles.head()
 17 | movie_ids_titles.shape
 18 | movie_ids_ratings=pd.read_csv("ratings.csv")
 19 | movie_ids_ratings.head()
 20 | movie_ids_ratings.shape
 21 | '''Data Preprocessing:- We need a dataframe that consists of userId, movieId, title and ratings'''
 22 | movie_ids_titles.drop(['genres'],inplace=True,axis=1)
 23 | movie_ids_titles.head()
 24 | movie_ids_ratings.drop(["timestamp"],inplace=True,axis=1)
 25 | movie_ids_ratings.head()
 26 | merged_movie_df=pd.merge(movie_ids_ratings,movie_ids_titles,on='movieId')
 27 | merged_movie_df.head()
 28 | '''Data Visualization:- Let's first group the dataset by title and see what information we can get regarding the ratings of movies.'''
 29 | merged_movie_df.groupby('title').describe()
 30 | merged_movie_df.groupby('title')['rating'].mean().head()
 31 | '''Let's sort the movie titles by the descending order of the average user ratings'''
 32 | merged_movie_df.groupby('title')['rating'].mean().sort_values(ascending=False).head()
 33 | '''Let's now print the movies in the descending order of their rating counts'''
 34 | merged_movie_df.groupby('title')['rating'].count().sort_values(ascending=False).head()
 35 | '''A movie which is rated by large number of people is usually a good movie.
 36 | Let's create a dataframe that shows the title, mean rating and the rating counts.'''
 37 | movie_rating_mean_count=pd.DataFrame(columns=['rating_mean','rating_count'])
 38 | movie_rating_mean_count["rating_mean"]=merged_movie_df.groupby('title')['rating'].mean()
 39 | movie_rating_mean_count["rating_count"]=merged_movie_df.groupby('title')['rating'].count()
 40 | movie_rating_mean_count.head()
 41 | '''The above dataframe contains movie title, average rating (ratings mean) and the number of rating_counts
 42 | We will plot a histogram to see how the average ratings are distributed. '''
 43 | plt.figure(figsize=(10,8))
 44 | sns.set_style("darkgrid")
 45 | movie_rating_mean_count['rating_mean'].hist(bins=30,color='purple')
 46 | #Distribution for rating counts
 47 | plt.figure(figsize=(10,8))
 48 | sns.set_style("darkgrid")
 49 | movie_rating_mean_count['rating_count'].hist(bins=33,color='green')
 50 | '''There are around 7000 movies with less than 10 rating counts. The number of movies decrease with an increase in ratings counts. Movies with more than 50 rating are very few.
 51 | It is also interesting to see the relationship between mean ratings and rating counts of a movie.'''
 52 | plt.figure(figsize=(10,8))
 53 | sns.set_style("darkgrid")
 54 | sns.regplot(x="rating_mean",y="rating_count",data=movie_rating_mean_count,color="brown")
 55 | '''From the above graph in the top right portion,you can see that the movies with a higher number of rating counts tend to have higher mean ratings as well.
 56 | Let's sort our dataset by rating counts and see the average ratings of the movies with the top 5 highest number of ratings.'''
 57 | movie_rating_mean_count.sort_values("rating_count",ascending=False).head()
 58 | 
 59 | #Item Based Collaborative Filtering
 60 | '''In item based collaborative filtering, products are recommended based on common characteristics.
 61 | The first step is to create a dataframe where each movie is represented by a column and rows contain user ratings for movies.'''
 62 | user_movie_rating_matrix=merged_movie_df.pivot_table(index="userId",columns="title",values="rating")
 63 | print(user_movie_rating_matrix)
 64 | user_movie_rating_matrix.shape
 65 | '''The Dataset contains 610 unique users and 9719 unique movies.
 66 | Now we will find the movie recommendation based on a single movie and then based on multiple movies.
 67 | Finding recommendations based on a single movie. Suppose we want to find the recommendation based on the movie Pulp Fiction.
 68 | First we will filter the column that contains the user ratings for the movie.'''
 69 | pulp_fiction_ratings=user_movie_rating_matrix["Pulp Fiction (1994)"]
 70 | '''Next, we will find the correlation between the user ratings of all the movies and the user ratings for the movie pulp fiction'''
 71 | pulp_fiction_correlations=pd.DataFrame(user_movie_rating_matrix.corrwith(pulp_fiction_ratings,columns=["pf_corr"])
 72 | pulp_fiction_correlations.sort_values("pf_corr",ascending=False).head(5)
 73 | '''Correlation itself is not giving meaningful results, one solution to this problem can be that in addition to the correlation between the movies, we also use rating counts, for the correlated movie as a criteria for finding the best revommendation.'''
 74 | pulp_fiction_correlations=pulp_fiction_correlations.join(movie_rating_mean_count["rating_count"])
 75 | pulp_fiction_correlations.head()
 76 | '''The pf_corr column contains some NaN values. This is because there can be movies that are rated by users who did not rate Pulp Fiction (1994). In such cases, correlation will be null.
 77 | We will remove all the movies with null correlation with Pulp Fiction (1994).'''
 78 | pulp_fiction_correlations.dropna(inplace=True)
 79 | pulp_fiction_correlations.sort_values("pf_corr",ascending=False).head()
 80 | '''A better way is to find the movies with the rating counts of atleast 50 and having the highest correlation with Pulp Fiction (1994).'''
 81 | pulp_fiction_correlations_50=pulp_fiction_correlations[pulp_fiction_correlations['rating_count']>50]
 82 | pulp_fiction_correlation_50.sort_values("pf_corr",ascending=False).head()
 83 | '''Finding the recommendation based on multiple movies. The first step is to create a dataframe, which contains a correlation between all the movies in our dataset in the form of a matrix.'''
 84 | all_movie_correlations=user_movie_rating_matrix.corr(method="pearson",min_periods=50)
 85 | all_movie_correlations.head()
 86 | '''Now suppose a new user logs into the website. The user has already watched three movies and has given ratings to those movies.'''
 87 | movie_data=[['Forrest Gump (1994)',4.0],['Fight Club (1999)',3.5],['Interstellar (2014)',4.0]]
 88 | test_movies=pd.DataFrame(movie_data,columns=['Movie_Name','Movie_Rating'])
 89 | test_movies.head()
 90 | '''We will be recommending movies from our dataset based on the ratings by a new user for these three movies.'''
 91 | print(test_movies['Movie_Name'][0])
 92 | print(test_movies['Movie_Rating'][0])
 93 | '''From all_movie_correlations dataframe, let's obtain correlation values for the movies related to Forrest Gump (1994)'''
 94 | all_movie_correlations['Forrest Gump (1994)'].dropna()
 95 | '''Next, we will iterate through the three movies in the test_movies dataframe, find the correlated movies, and then multiply the correlation of all the correlated movies with the ratings of the input movie.
 96 | The correlated movies, along with the weighted correlation are appended to an empty series named recommended movies.'''
 97 | recommended_movies=pd.Series()
 98 | for i in range(0,2):
 99 |    movie=all_movie_correlations[test_movies['Movie_Name'][i]].dropna()
100 |    movie=movie.map(lambda movie_corr:movie_corr*test_movies["Movie_Rating"][i])
101 |    recommended_movies=recommended_movies.append(movie)
102 | print(recommended_movies)
103 | '''To get a final recommendation, you can sort the movies in the descending order of the weighted correlation'''
104 | recommended_movies.sort_values(inplace=True,ascending=False)
105 | print(recommended_movies.head(10))
106 | 


--------------------------------------------------------------------------------
/project6onImageClassification.py:
--------------------------------------------------------------------------------
 1 | #Project on Image Classification/Recognition using CNN on CIFAR-10 Dataset
 2 | '''In this project we will be using CIFAR-10 dataset. This dataset includes thousands of pictures of 10 different kinds of objects like airplanes, automobiles, birds and so on.
 3 | Each image in the dataset includes a matching label so we know what kind of image it is.
 4 | The images in the CIFAR-10 dataset are only 32x32 pixels.'''
 5 | import keras
 6 | from keras.datasets import cifar10
 7 | from keras.models import Sequential
 8 | from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
 9 | from pathlib import Path
10 | from tensorflow.keras.utils import to_categorical
11 | #Load the dataset
12 | (X_train,y_train),(X_test,y_test)=cifar10.load_data()
13 | #Normalize the data
14 | X_train=X_train.astype('float32')
15 | X_test=X_test.astype('float32')
16 | X_train/=255.0
17 | X_test/=255.0
18 | #Convert class vectors to binary class matrices
19 | y_train=to_categorical(y_train,10)
20 | y_test=to_categorical(y_test,10)
21 | model=Sequential()
22 | model.add(Conv2D(32,(3,3),padding='same',input_shape=(32,32,3),activation='relu'))
23 | model.add(Conv2D(32,(3,3),activation='relu'))
24 | model.add(MaxPooling2D(pool_size=(2,2)))
25 | model.add(Dropout(0.25))
26 | 
27 | model.add(Conv2D(64,(3,3),padding='same',activation='relu'))
28 | model.add(Conv2D(32,(3,3),activation='relu'))
29 | model.add(MaxPooling2D(pool_size=(2,2)))
30 | model.add(Dropout(0.25))
31 | 
32 | model.add(Flatten())
33 | model.add(Dense(512,activation='relu'))
34 | model.add(Dropout(0.5))
35 | model.add(Dense(10,activation='softmax'))
36 | 
37 | #Compile the model 
38 | model.compile(
39 |    loss='categorical_crossentropy',
40 |    optimizer='adam',
41 |    metrics=['accuracy'])
42 | model.summary()
43 | 
44 | #Train the model
45 | model.fit(
46 |    X_train,
47 |    y_train,
48 |    batch_size=32,
49 |    epochs=25,
50 |    validation_data=(X_test,y_test),
51 |    shuffle=True)
52 | 
53 | #Save the neural network architecture
54 | model_structure=model.to_json()
55 | f=Path("model_structure.json")
56 | f.write_text(model_structure)
57 | 
58 | #Save the trained neural network weights
59 | model.save_weights("model_weight.h5")
60 | 
61 | #Making Predictions on the images
62 | from keras.models import model_from_json
63 | from pathlib import Path
64 | from keras.preprocessing import image
65 | import numpy as np
66 | class_labels=["Planes","car","Bird","Cat","Deer","Dog","Frog","Horse","Boat","Truck"]
67 | #load the json file that contains the model structure 
68 | f=Path("model_structure.json")
69 | model_structure=f.read_text()
70 | #Recreate the keras model object from the json data
71 | model=model_from_json(model_structure)
72 | #Load an image file to test
73 | import matplotlib.pyplot as plt
74 | from tensorflow.keras.utils import load_img,img_to_array
75 | img=load_img("dog.png",target_size=(32,32))
76 | plt.imshow(img)
77 | #Convert the image to a numpy array 
78 | from tensorflow.keras.utils import img_to_array
79 | image_to_test=img_to_array(img)
80 | list_of_images=np.expand_dims(image_to_test,axis=0)
81 | #make predictions using the model 
82 | results=model.predict(list_of_images)
83 | #since we are only testing one image, we only need to check the first result 
84 | single_result=results[0]
85 | #We will get a likelihood score for all 10 possible classes.Find out which class has the highest score
86 | most_likely_class_index=int(np.argmax(single_result))
87 | class_likelihood=single_result[most_likely_class_index]
88 | #Print the result 
89 | print("This is a image of a {} likelihood:{:2f}".format(class_label,class_likelihood))


--------------------------------------------------------------------------------
/project7onNLPandChatbot.py:
--------------------------------------------------------------------------------
  1 | #Project on Sentiment Analysis using NLP and Chatbot using NLP
  2 | #Sentiment Classification using NLP and Classification Algorithm
  3 | '''Sentiment Analysis is a means to identify the view or emotion behind a situation.
  4 | It basically means to analyze and find the emotion or intent behind a piece of text or speech or any model of communication.
  5 | This burger has a very bad taste- negative review
  6 | I ordered this pizza today- neutral sentiment/review
  7 | I love this cheese sandwich, its so delicious- positive review'''
  8 | import pandas as pd 
  9 | import matplotlib.pyplot as plt
 10 | import seaborn as sns
 11 | import re
 12 | 
 13 | import nltk
 14 | from nltk.corpus import stopwords
 15 | from nltk.stem import WordNetLemmatizer
 16 | 
 17 | from sklearn.feature_extraction.text import CountVectorizer
 18 | from sklearn.model_selection import GridSearchCV
 19 | from sklearn.ensemble import RandomForestClassifier
 20 | 
 21 | from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_score,roc_curve
 22 | from sklearn.metrics import classification_report, plot_confusion_matrix
 23 | 
 24 | df_train=pd.read_csv("train.txt",delimiter=";",names=['text','label'])
 25 | df_val=pd.read_csv("val.txt",delimiter=";",names=['text','label'])
 26 | 
 27 | df=pd.concat([df_train,df_val])
 28 | df.reset_index(inplace=True,drop=True)
 29 | print("Shape of the dataframe:",df.shape)
 30 | df.sample(5)
 31 | 
 32 | import warnings
 33 | warnings.filterwarnings("ignore")
 34 | sns.countplot(df.label)
 35 | 
 36 | '''Positive Sentiment- joy,love,surprise
 37 | Negative Sentiment- anger,sadness,fear
 38 | Now we will create a custom encoder to convert categorical target labels to numerical i.e. 0 and 1'''
 39 | 
 40 | def custom_encoder(df):
 41 |    df.replace(to_replace="surprise",value=1,inplace=True)
 42 |    df.replace(to_replace="love",value=1,inplace=True)
 43 |    df.replace(to_replace="joy",value=1,inplace=True)
 44 |    df.replace(to_replace="fear",value=0,inplace=True)
 45 |    df.replace(to_replace="anger",value=0,inplace=True)
 46 |    df.replace(to_replace="sadness",value=0,inplace=True)
 47 | custom_encoder(df['label'])
 48 | sns.countplot(df.label)
 49 | '''Preprocessing Steps:-
 50 | Get rid of any characters apart from alphabets
 51 | Convert the string to lowercase because Python is case-sensitive
 52 | 3 check and remove the stopwords
 53 | Perform Lemmatization'''
 54 | 
 55 | lm=WordNetLemmatizer()
 56 | def text_transformation(df_col):
 57 |    corpus=[]
 58 |    for item in df_col:
 59 |       new_item=re.sub('[^a-zA-Z]',' ',str(item))
 60 |       new_item=new_item.lower()
 61 |       new_item=new_item.split()
 62 |       new_item=[lm.lemmatize(word) for word in new_item if word not in set(stopwords.words('english'))]
 63 |       corpus.append(' '.join(str(x) for x in new_item))
 64 |    return corpus
 65 | corpus=text_transformation(df['text'])
 66 | cv=CountVectorizer(ngram_range=(1,2))
 67 | traindata=cv.fit_transform(corpus)
 68 | x=traindata
 69 | y=df.label
 70 | '''Now we will fit the data into grid search and view the best parameters using the best_params attribute'''
 71 | parameters={'max_features':('auto','sqrt'),'n_estimators':[5,10],'max_depth':[10,None],'min_samples_leaf':[5],'min_samples_leaf':[1],'bootstrap':[True]}
 72 | grid_search=GridSearchCV(RandomForestClassifier(),parameters,cv=5,return_train_score=True,n_jobs=-1)
 73 | grid_search.fit(x,y)
 74 | grid_search.best_params_
 75 | '''We can view all the models and their respective parameters,mean test score and rank as GridSearch CV'''
 76 | for i in range(8):
 77 |    print('Parameters:',grid_search.cv_results_['params'][i])
 78 |    print('Mean test Score:',grid_search.cv_results_[mean_test_score'][i])
 79 |    print("Rank:",grid_search.cv_results_['rank_test_score'])
 80 | 
 81 | '''Now we will choose the best parameter obtained from GridSearchCV and create a final random forest classifier model and then train our model.'''
 82 | 
 83 | rfc=RandomForestClassifier(max_features=grid_search.best_params_['max_features'],max_depth=grid_search.best_params_['max_depth'],n_estimators=grid_search.best_params_['n_estimators'],min_samples_split=grid_search.best_params_['min_samples_split'],min_samples_leaf=grid_search.best_params_['min_samples_leaf'],bootstrap=grid_search.best_params_['bootstrap'])
 84 | rfc.fit(x,y)
 85 | 
 86 | #Test Data Transformation
 87 | test_df=pd.read_csv('test.txt',delimiter=';',names=['text','label'])
 88 | X_test,y_test=test_df.text,test_df.label
 89 | #encode the labels into two classes 0 and 1
 90 | test_df=custom_encoder(y_test)
 91 | #preprocessing of text
 92 | test_corpus=text_transformation(X_test)
 93 | #convert the text data into vectors
 94 | testdata=cv.transform(test_corpus)
 95 | #predict the target
 96 | predictions=rfc.predict(testdata)
 97 | 
 98 | #Model Evaluation
 99 | '''We will evaluate our model using various metrics such as accuracy score, recall score confusion matrix.'''
100 | 
101 | acc_score=accuracy_score(y_test,predictions)
102 | pre_score=precision_score(y_test,predictions)
103 | rec_score=recall_score(y_test,predictions)
104 | print('Accuracy Score:',acc_score)
105 | print('Precision Score:',pre_score)
106 | print('Recall Score:',rec_score)
107 | print("-"*50)
108 | cr=classification_report(y_test,predictions)
109 | print(cr)
110 | '''ROC Curve- We will plot probability of the class using the predict_proba() method of random forest classifier
111 | and then we will plot the curve.'''
112 | predictions_probability=rfc.predict_proba(testdata)
113 | fpr,tpr,thresfolds=roc_curve(y_test,predictions_probability[:,1])
114 | plt.plot(fpr,tpr)
115 | plt.plot([0,1])
116 | plt.title('ROC Curve')
117 | plt.xlabel('False Positive Rate')
118 | plt.ylabel('True Positive Rate')
119 | plt.show()
120 | '''As we can see that our model performed very well in classifying the sentiments, with an accuracy score, precision score and recall score of approx 96%
121 | Now we will check for custom input as well and let our model identity the sentiment of the input statement.'''
122 | 
123 | def expression_check(prediction_input):
124 |    if prediction_input==0:
125 |       print("Input statement has negative sentiment")
126 |    elif prediction_input==1:
127 |       print("Input statement has positive sentiment")
128 |    else:
129 |       print("Invalid Statement")
130 | '''Function to take the input statement and performs the same transformation as we did earlier'''
131 | def sentiment_predictor(input):
132 |    input=text_transformation(input)
133 |    transformed_input=cv.transform(input)
134 |    predictions=rfc.predict(transformed_input)
135 |    expression_check(prediction)
136 | input1=["Sometimes I just don't want to go out"]
137 | input2=["I bought a new phone and it's so good"]
138 | sentiment_predictor(input1)
139 | sentiment_predictor(input2)
140 | '''Input statement has negative sentiment
141 | Input statement has positive statement'''
142 | 
143 | #Chatbot using NLP and Neural Networks in Python
144 | '''Tag means classes
145 | Patterns means what user is going to ask
146 | Response is chatbot reponse'''
147 | data={"intents":[{"tag":"greetings","patterns":["Hello","How are you?","Hi There","Hi", "What's up"],"responses":["Howdy Partner!","Hello","How are you doing?","Greetings!","How do you do"]},{"tag":"age","patterns":["how old are you","when is your birthday","when was you born"],"responses":["I am 24 years old","I was born in 1966","My birthday is July 3rd and I was born in 1996","03/07/1996"]},{"tag":"date","patterns":["what are you doing this weekend","do you want to hangout sometime?","what are your plans for this week"],"responses":["I am available this week","I don't have any plans","I am not busy"]},{"tag":"name","patterns":["what's your name","what are you called","who are you"],"responses":["My name is Kippi","I'm Kippi","Kippi"]},{"tag":"goodbye","patterns":["bye","g2g","see ya","adios","cya"],"responses":["It was nice speaking to you","See you later","Speak Soon"]},]}
148 | '''For each tag we created, we would specify patterns. Essentially this defines the different ways of how a user may pose a query to the chatbot.
149 | The chatbot would then take these patterns and use them as training data to determine what someone is asking and the chatbot reponse would be relevant to that question.'''
150 | import json
151 | import string
152 | import random
153 | import nltk
154 | import numpy as np 
155 | from nltk.stem import WordNetLemmatizer
156 | import tensorflow as tf
157 | from tensorflow.keras import Sequential
158 | from tensorflow.keras.layers import Dense,Dropout
159 | nltk.download("punkt")
160 | nltk.download("wordnet")
161 | '''In order to create our training data below steps to be followed
162 | Create a vocabulary of all the words used in the patterns
163 | Create a list of the classes-tag of each intent
164 | Create a list of all the patterns within the intents file
165 | Create a list of all the associated tags to go with each patterns in the intents file.
166 | Initializing lemmatizer to get stem of words'''
167 | lemmatizer=OrdNetLemmatizer()
168 | words=[]
169 | classes=[]
170 | doc_x=[]
171 | doc_y=[]
172 | '''Loop through all the intents 
173 | Tokenize each pattern and append token to words, the patterns and the associated tag to their associated list'''
174 | for intent in data["intents"]:
175 |    for pattern in intent["patterns"]:
176 |       tokens=nltk.word_tokenize(pattern)
177 |       words.extend(tokens)
178 |       doc_x.append(pattern)
179 |       doc_y.append(intent["tag"])
180 |    if intent["tag"] not in classes:
181 |       classes.append(intent["tag"])
182 | #Lemmatize all the words in the vocab and convert them to lowercase
183 | words=[lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]
184 | '''Sorting the vocab and classes in alphabetical order and taking the set to ensure no duplicates occur'''
185 | words=sorted(set(words)
186 | classes=sorted(set(classes))
187 | print(words)
188 | print(classes)
189 | print(doc_x)
190 | print(doc_y)
191 | #List for training data
192 | training=[]
193 | out_empty=[0]*len(classes)
194 | #creating a bag of words model
195 | for idx,doc in enumerate(doc_x):
196 |    bow=[]
197 |    text=lemmmatizer.lemmatize(doc.lower())
198 |    for word in words:
199 |       bow.append(1) if word in text else bow.append(0)
200 |    output_row=list(out_empty)
201 |    output_row[classes.index(doc_y[idx])]=1
202 |    training.append([bow,output_row])
203 | random.shuffle(training)
204 | training=np.array(training,dtype=object)
205 | train_X=np.array(list(training[:,0]))
206 | train_y=np.array(list(training[:,1]))
207 | '''The model will look at the features and predict the tag associated with the features and then will select an appropriate message/response from the tag.'''
208 | input_shape=(len(train_X[0]),)
209 | output_shape=len(train_y[0])
210 | epochs=500
211 | from tensorflow.keras.models import Sequential
212 | from tensorflow.keras.layers import Dense,Dropout
213 | #Create a Sequential model
214 | model=Sequential()
215 | model.add(Dense(128,input_shape=input_shape,activation='relu'))
216 | model.add(Dropout(0.5))
217 | model.add(Dense(64,activation='relu'))
218 | model.add(Dropout(0.3))
219 | model.add(Dense(output_shape,activation='softmax'))
220 | #Create the Adam optimizer with a specified learning rate
221 | adam=tf.keras.optimizers.Adam(learning_rate=0.01)
222 | #compile the model using the Adam optimizer
223 | model.compile(loss='categorical_crossentropy',optimizer=adam,metrics=['accuracy'])
224 | print(model.summary())
225 | model.fit(x=train_X,y=train_y,epochs=500,verbose=1)
226 | def clean_text(text):
227 |    tokens=nltk.word_tokenize(text)
228 |    tokens=[lemmatizer.lemmatize(word) for word in tokens]
229 |    return tokens
230 | def bag_of_words(text,vocab):
231 |    tokens=clean_text(text)
232 |    bow=[0]*len(vocab)
233 |    for w in tokens:
234 |       for idx,word in enumerate(vocab):
235 |          if word==w:
236 |             bow[idx]=1
237 |    return np.array(bow)
238 | def pred_class(text,vocab,labels):
239 |    bow=bag_of_words(text,vocab)
240 |    result=model.predict(np.array([bow]))[0]
241 |    thresh=0.2
242 |    y_pred=[[idx,res] for idx,res in enumerate(result) if res>thresh]
243 |    y_pred.sort(key=lambda x:x[1],reverse=True)
244 |    return_list=[]
245 |    for r in y_pred:
246 |       return_list.append(labels[r[0]])
247 |    return return_list
248 | def get_response(intents_list,intent_json):
249 |    tag=intents_list[0]
250 |    list_of_intents=intents_json["intents"]
251 |    for i in list_of_intents:
252 |       if i["tag"]==tag:  
253 |          result=random.choice(i["responses"])
254 |          break
255 |    return result
256 | #Running the chatbot
257 | while True:
258 |    message=input("")
259 |    intents=pred_class(message,words,classes)
260 |    result=get_response(intents,data)
261 |    print(result)


--------------------------------------------------------------------------------
/pyjokes.py:
--------------------------------------------------------------------------------
1 | import pyjokes
2 | 
3 | print("Printing Jokes...")
4 | 
5 | # This prints a random joke
6 | joke = pyjokes.get_joke()
7 | print(joke)


--------------------------------------------------------------------------------
/pyramid.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | For n = 3
 3 |   *
 4 |  ***
 5 | *****
 6 | 
 7 | For n = 5
 8 |     *
 9 |    ***
10 |   *****
11 |  ********
12 | **********
13 | 
14 | '''
15 | 
16 | n = int(input("Enter the number: "))
17 | for i in range(1, n+1):
18 |     print(" "* (n-i), end="")
19 |     print("*"* (2*i-1), end="")
20 |     print("")
21 | 


--------------------------------------------------------------------------------
/queueUsingList.py:
--------------------------------------------------------------------------------
 1 | il=[]
 2 | while True:
 3 | 	c=int(input('''
 4 | 			1 Enqueue
 5 | 			2 Dequeue
 6 | 			3 Front Elements
 7 | 			4 Rear Elements
 8 | 			5 Display Elements
 9 | 			6 Exit
10 | 			'''))
11 | 	if c==1:
12 | 		n=input("Enter The Value:")	
13 | 		l.append(n)	
14 | 		print(l)
15 | 	elif c==2:
16 | 		if len(l)==0:
17 | 			print("Empty Queue")
18 | 		else:
19 | 			del l[0]
20 | 			print(l)
21 | 	elif c==3:
22 | 		if len(l)==0:
23 | 			print("Empty Queue")
24 | 		else:
25 | 			print("Front Queue Value=>",l[0])
26 | 	elif c==4:
27 | 		if len(l)==0:
28 | 			print("Empty Queue")
29 | 		else:
30 | 			print("Rear Queue Value=>",l[-1])
31 | 	elif c==5:
32 | 		print("Display Queue=>",l)
33 | 	elif c==6:
34 | 		break
35 | 	else:
36 | 		print("Invalid Operation")
37 | 


--------------------------------------------------------------------------------
/randomModule.py:
--------------------------------------------------------------------------------
 1 | from random import randint
 2 | 
 3 | class Train:
 4 | 
 5 |     def __init__(self, trainNo):
 6 |         self.trainNo = trainNo
 7 | 
 8 |     def book(self, fro, to):
 9 |         print(f"Ticket is booked in train no: {self.trainNo} from {fro} to {to}") 
10 | 
11 |     def getStatus(self):
12 |         print(f"Train no: {self.trainNo} is running on time") 
13 | 
14 |     def getFare(self, fro, to):
15 |         print(f"Ticket fare in train no: {self.trainNo} from {fro} to {to} is {randint(222, 5555)}")  
16 | 
17 | 
18 | t = Train(12399)
19 | t.book("Rampur", "Delhi")
20 | t.getStatus()
21 | t.getFare("Rampur", "Delhi")
22 | 


--------------------------------------------------------------------------------
/replace.py:
--------------------------------------------------------------------------------
1 | name = "Hariram is a good  boy and  Geeta is a good girl."
2 | 
3 | print(name.replace("  ", " "))
4 | print(name) # Strings are immutable which means that you cannot change them by running functions on them
5 | 


--------------------------------------------------------------------------------
/rockPaperScissor.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | l=["rock","scissor","paper"]
 3 | '''
 4 | rock vs paper => paper wins
 5 | rock vs scissor => rock wins
 6 | paper vs scissor => scissor wins
 7 | 
 8 | '''
 9 | while True:
10 | 	ocount=0
11 | 	ucount=0
12 | 	uc=int(input('''
13 | Game Start.....
14 | 1 Yes
15 | 2 No | Exit
16 | 				'''))
17 | 	if uc==1:
18 | 		for a in range(1,6):
19 | 			userInput= int(input('''
20 | 1 Rock
21 | 2 Scissor
22 | 3 Paper
23 | '''			))
24 | 			if userInput==1:
25 | 				uchoice="rock"
26 | 			elif userInput==2:
27 | 				uchoice="scissor"
28 | 			elif userInput==3:
29 | 				uchoice="paper"
30 | 			Ochoice=random.choice(l)
31 | 			if Ochoice==uchoice:
32 | 				print("Opponent choice",Ochoice)
33 | 				print("User choice",uchoice)
34 | 				print("Game Draw")
35 | 				ucount=ucount+1
36 | 				ocount=ocount+1
37 | 			elif(uchoice=="rock" and Ochoice=="scissor") or (uchoice=="paper" and Ochoice=="rock") or (uchoice=="scissor" and Ochoice=="paper"):
38 | 				print("Opponent Choice",Ochoice)
39 | 				print("User choice",uchoice)
40 | 				print("You Win")
41 | 				ucount=ucount+1
42 | 			else:
43 | 				print("Opponent Choice",Ochoice)
44 | 				print("User choice",uchoice)
45 | 				print("Opponent Win")
46 | 				ocount=ocount+1
47 | 		if ucount==ocount:
48 | 			print("Final Game Draw....." )
49 | 			print("User Score",ucount )
50 | 			print("Opponent Score",ocount )
51 | 		elif ucount>ocount:
52 | 			print("Final You Win The Game....." )
53 | 			print("User Score",ucount )
54 | 			print("Opponent Score",ocount )
55 | 		else:
56 | 			print("Final Opponent Win The Game....." )
57 | 			print("User Score",ucount )
58 | 			print("Opponent Score",ocount )
59 | 	else:
60 | 		break
61 | 


--------------------------------------------------------------------------------
/sets.py:
--------------------------------------------------------------------------------
 1 | s={10,20,30,40}
 2 | print(s)
 3 | for a in s:
 4 | 	print(a)
 5 | l=[10,20,30,40]
 6 | s=set(l)
 7 | print(s)
 8 | s={10,20,30,40,50}
 9 | s.remove(20)
10 | print(s)
11 | s.discard(50)
12 | print(s)
13 | print(s.pop())
14 | print(s)
15 | s.clear()
16 | print(s)
17 | l=[10,80,90]
18 | s={10,20,30,40,50}
19 | s.add(60)
20 | print(s)
21 | s.update(l)
22 | print(s)
23 | 


--------------------------------------------------------------------------------
/slicing_concat.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd # type: ignore
 2 | 
 3 | # Initializing the nested list with Data set
 4 | player_list = [['M.S.Dhoni', 36, 75, 5428000],
 5 | 			['A.B.D Villers', 38, 74, 3428000],
 6 | 			['V.Kohli', 31, 70, 8428000],
 7 | 			['S.Smith', 34, 80, 4428000],
 8 | 			['C.Gayle', 40, 100, 4528000],
 9 | 			['J.Root', 33, 72, 7028000],
10 | 			['K.Peterson', 42, 85, 2528000]]
11 | 
12 | 
13 | # creating a pandas dataframe
14 | df = pd.DataFrame(player_list, columns=['Name', 'Age', 'Weight', 'Salary'])
15 | df # data frame before slicing
16 | 


--------------------------------------------------------------------------------
/sorting.py:
--------------------------------------------------------------------------------
 1 | list1 = [(1,2),(3,3),(1,1)]
 2 | list1.sort() 
 3 | print(list1)
 4 | list1.sort(reverse=True)
 5 | print(list1)
 6 | # Original list of strings
 7 | words = ["apple", "banana", "kiwi", "orange", "grape"]
 8 | words.sort()
 9 | print("Sorted in alphabetical order:",words)
10 | # Sorting by length using the len() function as the key
11 | words.sort(key=len)
12 | # Displaying the sorted list
13 | print("Sorted by Length:", words)
14 | # Original list of tuples
15 | people = [("Alice", 25), ("Bob", 30), ("Charlie", 22), ("David", 28)]
16 | # Sorting by the second element of each tuple (age)
17 | people.sort(key=lambda x: x[1])
18 | # Displaying the sorted list
19 | print("Sorted by Age in tuple:", people)
20 | # Original list of dictionaries
21 | students = [
22 | 	{"name": "Alice", "age": 25},
23 | 	{"name": "Bob", "age": 30},
24 | 	{"name": "Charlie", "age": 22},
25 | 	{"name": "David", "age": 28}]
26 | # Sorting by the 'age' key in each dictionary
27 | students.sort(key=lambda x: x["age"])
28 | # Displaying the sorted list
29 | print("Sorted by Age in dictionary:", students)


--------------------------------------------------------------------------------
/stackUsingList.py:
--------------------------------------------------------------------------------
 1 | l=[]
 2 | while True:
 3 | 	c=int(input('''
 4 | 			1 Push Elements
 5 | 			2 Pop Elements
 6 | 			3 Peek Elements
 7 | 			4 Display Elements
 8 | 			5 Exit
 9 | 			'''))
10 | 	if c==1:
11 | 		n=input("Enter The Value:")	
12 | 		l.append(n)	
13 | 		print(l)
14 | 	elif c==2:
15 | 		if len(l)==0:
16 | 			print("Empty Stack")
17 | 		else:
18 | 			p=l.pop()
19 | 			print(p)
20 | 			print(l)
21 | 	elif c==3:
22 | 		if len(l)==0:
23 | 			print("Empty Stack")
24 | 		else:
25 | 			print("Last Stack Value=>",l[-1])
26 | 	elif c==4:
27 | 		print("Display Stack=>",l)
28 | 	elif c==5:
29 | 		break
30 | 	else:
31 | 		print("Invalid Operation")
32 | 


--------------------------------------------------------------------------------
/startswith.py:
--------------------------------------------------------------------------------
1 | l = ["Ram", "Soham", "Sachin", "Rahul"]
2 | 
3 | for name in l:
4 |     if(name.startswith("S")):
5 |         print(f"Hello {name}")
6 | 


--------------------------------------------------------------------------------
/staticMethodINclass.py:
--------------------------------------------------------------------------------
 1 | class Calculator:
 2 |     def __init__(self, n):
 3 |         self.n = n 
 4 |     
 5 |     def square(self):
 6 |         print(f"The square is {self.n*self.n}")
 7 | 
 8 |     def cube(self):
 9 |         print(f"The cube is {self.n*self.n*self.n}")
10 | 
11 |     def squareroot(self):
12 |         print(f"The squareroot is {self.n**1/2}")
13 |     
14 |     @staticmethod
15 |     def hello():
16 |         print("Hello there!")
17 | 
18 | a = Calculator(4)
19 | a.hello()
20 | a.square()
21 | a.cube()
22 | a.squareroot()
23 | 


--------------------------------------------------------------------------------
/statistics.py:
--------------------------------------------------------------------------------
 1 | '''Write a program to compute summary statistics such as mean, median, mode, standard 
 2 | deviationand variance of the given different types of data.''' # type: ignore
 3 | import numpy as np
 4 | a=np.array([[1,23,78],[98,60,75],[79,25,48]])
 5 | print("Entered array:",a)
 6 | #Minimum function 
 7 | print("Minimum=",np.amin(a))
 8 | #Maximum Function
 9 | print("Maximum=",np.amax(a))
10 | #Mean Function
11 | print("Mean=",np.mean(a))
12 | #Median Function
13 | print("Median=",np.median(a))
14 | #std Function
15 | print("Standard Deviation=",np.std(a))
16 | #var Function
17 | print("Variance=",np.var(a))


--------------------------------------------------------------------------------
/stringFormatting.py:
--------------------------------------------------------------------------------
 1 | #String Formating
 2 | #named indexes:
 3 | txt1="Welcome to {fname} {lname}".format(fname="AI",lname="World !!!")
 4 | #numbered indexes:
 5 | txt2="Welcome to {0} {1}".format("AI","World !!!")
 6 | #empty placeholders
 7 | txt3="Welcome to {} {}".format("AI","World !!!")
 8 | txt4="Welcome to {b:10} {a}".format(a="AI",b="World !!!")
 9 | '''   ^ ---- use it for center
10 |       < ---- use it for left align
11 |       > ---- use it for right align'''
12 | txt5="Welcome to {a:^10} {b}".format(a="AI",b="World !!!")
13 | print(txt1)
14 | print(txt2)
15 | print(txt3)
16 | print(txt4)
17 | print(txt5)
18 | 


--------------------------------------------------------------------------------
/stringManipulations.py:
--------------------------------------------------------------------------------
  1 | #data aggregation & grouping operations, Visualisation using Matplotlib
  2 | #String Manipulations
  3 | val="a,b,,guido"
  4 | val.split(",")
  5 | pieces=[x.strip() for x in val.split(",")
  6 | print(pieces)
  7 | first,second,third=pieces
  8 | first+"::"+second+"::"+third
  9 | "::".join(pieces)
 10 | 
 11 | #Data Wrangling
 12 | data=pd.Series(np.random.uniform(size=9),index=[["a","a","a","b","b","c","c","d","d"],[1,2,3,1,3,1,2,2,3]])
 13 | print(data)
 14 | data.index
 15 | data['b']
 16 | data['b'][3]
 17 | data["b":"c"]
 18 | data.loc[["b","d"]]
 19 | data.unstack()
 20 | frame=pd.DataFrame(np.arange(12).reshape((4,3)),index=[["a","a","b","b"],["Green","Red","Green"]])
 21 | print(frame)
 22 | frame.index.names=["key1","key2"]
 23 | frame.columns.names=["state","color"]
 24 | frame.index.nlevels
 25 | 
 26 | #combining and merging datasets
 27 | '''pandas.merge->
 28 | Connect rows in DataFrames based on one or more keys
 29 | pandas.concat->
 30 | Concatenate or stack objects together along an axis
 31 | combine_first->
 32 | Splice together overlapping data to fill in miing values in one object with values from another'''
 33 | df1=pd.DataFrame({"key":["b","b","a","c","a","a","b"],"data1":pd.Series(range(7),dtype="Int64")})
 34 | df2=pd.DataFrame({"key":["a","b","d"],"data2":pd.Series(range(3),dtype="Int64")})
 35 | print(df1)
 36 | print(df2)
 37 | pd.merge(df1,df2)
 38 | df3=pd.DataFrame({"lkey":["b","b","a","c","a","a","b"],"data1":pd.Series(range(7),dtype="Int64")})
 39 | df4=pd.DataFrame({"rkey":["a","b","d"],"data2":pd.Series(range(3),dtype="Int64")})
 40 | pd.merge(df3,df4,left_on="lkey",right_on="rkey")
 41 | pd.merge(df1,df2,how="outer")
 42 | pd.merge(df3,df4,left_on="lkey",right_on="rkey",how="outer")
 43 | df1=pd.DataFrame({"key":["b","b","a","c","a","b"],"data1":pd.Series(range(6),dtype="Int64")})
 44 | df2=pd.DataFrame({"key":["a","b","a","b","d"],"data2":pd.Series(range(5),dtype="Int64")})
 45 | print(df1)
 46 | print(df2)
 47 | pd.merge(df1,df2,on="key",how="left")
 48 | pd.merge(df1,df2,how="inner")
 49 | left=pd.DataFrame({"key1":["foo","foo","bar"],"key2":["one","two","three"],"lval":pd.Series([1,2,3],dtype="Int64")})
 50 | right=pd.DataFrame({"key1":["foo","foo","bar","bar"],"key2":["one","one","one","two"],"rval":pd.Series([4,5,6,7],dtype="Int64")})
 51 | print(left)
 52 | print(right)
 53 | pd.merge(left,right,on=["key1","key2"],how="outer")
 54 | left1=pd.DataFrame({"key":["a","b","a","a","b","c"],"value":pd.Series(range(6),dtype="Int64")})
 55 | right1=pd.DataFrame({"group_val":[3.5,7]},index=["a","b"])
 56 | print(left1)
 57 | print(right1)
 58 | pd.merge(left1,right1,left_on="key",right_index=True)
 59 | 
 60 | #Concatenating along an axis
 61 | arr=np.arange(12).reshape((3,4))
 62 | print(arr)
 63 | np.concatenate([arr,arr],axis=1)
 64 | np.concatenate([arr,arr])
 65 | s1=pd.Series([0,1],index=["a","b"],dtype="Int64")
 66 | s2=pd.Series([2,3,4],index=["c","d","e"],dtype="Int64")
 67 | s3=pd.Series([5,6],index=["f","g"],dtype="Int64")
 68 | pd.concat([s1,s2,s3])
 69 | pd.concat([s1,s2,s3],axis="columns")
 70 | a=pd.Series([np.nan,2.5,0.0,3.5,4.5,np.nan],index=["f","e","d","c","b","a"])
 71 | b=pd.Series([0.,np.nan,2.,np.nan,np.nan,5.],index=["a","b","c","d","e","f"])
 72 | print(a)
 73 | print(b)
 74 | np.where(pd.isna(a),b,a)
 75 | a.combine_first(b)
 76 | 
 77 | #Plotting and Visualisation
 78 | import matplotlib.pyplot as plt
 79 | data=np.arange(10)
 80 | print(data)
 81 | plt.plot(data)
 82 | 
 83 | #Plots in Matplotlib reside within a figure object
 84 | fig=plt.figure()
 85 | ax1=fig.add_subplot(2,2,1)
 86 | ax1.hist(np.random.standard_normal(100),bins=20,color="black",alpha=0.6)
 87 | ax2=fig.add_subplot(2,2,2)
 88 | ax2.scatter(np.arange(30),np.arange(30)+3*np.random.standard_normal(30))
 89 | ax3=fig.add_subplot(2,2,3)
 90 | ax3.plot(np.random.standard_normal(50).cumsum(),color="black",linestyle="dashed")
 91 | ax4=fig.add_subplot(2,2,4)
 92 | fig,axes=plt.subplots(2,2,sharex=True,sharey=True)
 93 | for i in range(2):
 94 |    for j in range(2):
 95 |       axes[i,j].hist(np.random.standard_normal(500),bins=50,color="black",alpha=0.5)
 96 | fig.subplot_adjust(wspace=0,hspace=0)
 97 | fig=plt.figure()
 98 | ax=fig.add_subplot()
 99 | ax.plot(np.random.standard_normal(30).cumsum(),color="black",linestyle="dashed",marker="s")
100 | fig=plt.figure()
101 | fig,ax=plt.subplots()
102 | ax.plot(np.random.standard_normal(1000).cumsum())
103 | ticks=ax.set_xticks([0,250,500,750,1000])
104 | labels=ax.set_xticklabels(["one","two","three","four","five"],rotation=30,fontsize=10)
105 | ax.set_xlabel("Stages")
106 | ax.set_title("Matplotlib Plot")
107 | fig=plt.figure()
108 | fig,ax=plt.subplots()
109 | ax.plot(np.random.randn(1000).cumsum(),color="black",labels="one")
110 | ax.plot(np.random.randn(1000).cumsum(),color="blue",linestyle="dashed",label="two")
111 | ax.plot(np.random.randn(1000).cumsum(),color="red",linestyle="dotted",labels="three")
112 | ax.legend()
113 | fig=plt.figure()
114 | fig,ax=plt.subplots(2,1)
115 | data=pd.Series(np.random.uniform(size=16),index=list("abcdefghijklmnop"))
116 | data.plot.bar(ax=axes[0],color="red",alpha=0.7)
117 | data.plot.barh(ax=axes[1],color="purple",alpha=.5)
118 | df=pd.DataFrame(np.random.uniform(size=(6,4)),index=["one","two","three","four","five","six"],columns=pd.Index(["A","B","C","D"],name="Genius"))
119 | print(df)
120 | df.plot.bar()
121 | df.plot.barh(stcked=True,alpha=0.5)
122 | df.plot.bar(stcked=True,alpha=0.5)
123 | 


--------------------------------------------------------------------------------
/stringToList.py:
--------------------------------------------------------------------------------
 1 | n=input("Enter The Value:")
 2 | print(n)
 3 | l=n.split()
 4 | print(l)
 5 | l=[]
 6 | for a in range(1,4):
 7 | 	n=input("Enter The Value"+str(a)+"=")
 8 | 	l.append(n)
 9 | print(l)
10 | 


--------------------------------------------------------------------------------
/table.py:
--------------------------------------------------------------------------------
1 | n = int(input("Enter a number: "))
2 | 
3 | for i in range(1, 11):
4 |     print(f"{n} X {i} = {n * i}")
5 | 


--------------------------------------------------------------------------------
/textTOspeech.py:
--------------------------------------------------------------------------------
1 | import pyttsx3
2 | engine = pyttsx3.init()
3 | engine.say("Hey I am good")
4 | engine.runAndWait()
5 | 


--------------------------------------------------------------------------------
/tuple.py:
--------------------------------------------------------------------------------
 1 | t=(10,20,30,40,50)
 2 | print(type(t))
 3 | print(t)
 4 | n=t[3]
 5 | print(n)
 6 | l=len(t)
 7 | for a in range(l):
 8 | 	print(t[a])
 9 | for a in t:
10 | 	print(a)
11 | print(min(t))
12 | print(max(t))
13 | print(t.count(10))
14 | print(t.index(50))
15 | print(sum(t))
16 | print(sum(t,10))
17 | 


--------------------------------------------------------------------------------
/tuples.py:
--------------------------------------------------------------------------------
 1 | #Write a program to demonstrate working with tuples in python
 2 | #creating an empty tuple 
 3 | empty_tup=()
 4 | print("Empty tuple=",empty_tup)
 5 | #creating single element tuple
 6 | single_tup=(10,)
 7 | print("Single element tuple=",single_tup)
 8 | #creating a tuple with multiple elements
 9 | my_tup=(10,3.7,'program','a')
10 | print("Tuple with multiple elements is:",my_tup)
11 | print("Length of the tuple is:",len(my_tup))
12 | T1=(10,20,30,40,70.5,33.3) 
13 | print("Maximum value of the tuple T1 is:",max(T1))
14 | print("Minimum value of the tuple T1 is:",min(T1))
15 | str1='tuple'
16 | T=tuple(str1)  #converting string into tuple
17 | print("After converting a string into tuple,the new tuple is:",T)
18 | L=[2,4,6,7,8]
19 | T2=tuple(L)    #converting list into tuple
20 | print("After converting a list into tuple,the new tuple is:",T2)


--------------------------------------------------------------------------------
/usingListFun.py:
--------------------------------------------------------------------------------
 1 | #creating an empty list 
 2 | list=[]
 3 | print("Empty List is :",list)
 4 | #creating a list with elements
 5 | my_list= [10,507,"python"] 
 6 | print("Created list is:",my_list)
 7 | #Inserting new elements using append(
 8 | my_list.append(20)
 9 | my_list.append("program")
10 | my_list.append([3,7])
11 | print("After deleting elements the new list is:",my_list)
12 | #deleting elements using pop()
13 | my_list.pop()
14 | my_list.pop(2)
15 | #deleting elements using remove
16 | my_list.remove(10)
17 | print("After deleting elements the new list is:",my_list)


--------------------------------------------------------------------------------
/wishing.py:
--------------------------------------------------------------------------------
1 | name = input("Enter your name: ")
2 | 
3 | print(f"Good Afternoon, {name} ") 
4 | 


--------------------------------------------------------------------------------
/youtubeTranscriptSummarizer.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from dotenv import load_dotenv
 3 | load_dotenv()##load all the environment variables
 4 | import os
 5 | import google.generativeai as genai
 6 | from youtube_transcript_api import YouTubeTranscriptApi
 7 | genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
 8 | prompt="""You are Youtube Video summarizer. You will be taking the transcript text and summarizing the entire video and providing the important summary in points within 250 words. Please provide the summary of the text given here:"""
 9 | ## getting the transcript data from yt videos
10 | def extract_transcript_details(youtube_video_url):
11 |   try:
12 |     video_id=youtube_video_url.split("=")[1]
13 |     transcript_text=YouTubeTranscriptApi.get_transcript(video_id)
14 |     transcript=""
15 |     for i in transcript_text:
16 |       transcript_text+=" "+i["text"]
17 |     return transcript
18 |   except Exception as e:
19 |     raise e
20 |   ## getting the summmary based on Prompt from Google Gemini Pro
21 | def generate_gemini_content(transcript_text,prompt):
22 |   model=genai.GenerativeModel("gemini-pro")
23 |   response=model.generate_context(prompt+transcript_text)
24 |   return response.text
25 | st.title("YouTube Transcript to Detailed Notes Converter")
26 | youtube_link=st.text_input("Enter YouTube Video Link:")
27 | if youtube_link:
28 |   video_id=youtube_link.split("=")[1]
29 |   print(video_id)
30 |   st.image(f"http://img.youtube.com/vi/{video_id}/0.jpg",use_column_width=True)
31 | if st.button("Get Detailed Notes"):
32 |   transcript_text=extract_transcript_details(youtube_link)
33 |   if transcript_text:
34 |     summary=generative_gemini_content(transcript_text,prompt)
35 |     st.markdown("## Detailed Notes:")
36 |     st.write(summary)
37 |     #streamlit run app.py
38 | 


--------------------------------------------------------------------------------
/zipFunction.py:
--------------------------------------------------------------------------------
1 | l=[10,20,40,50]
2 | l1=[3,4,77,88]
3 | t=len(l)
4 | for a,b in zip(l,l1):
5 | 	print(a,b)
6 | for h in range(t):
7 | 	print(l[h],l1[h])
8 | 


--------------------------------------------------------------------------------