├── README.md ├── Data Science File ├── data.csv ├── Untitled4.ipynb └── 5_ExploratoryDataAnalysis.ipynb ├── simple.py ├── Assignment 3 ├── Question F.py ├── question b.py ├── Question G.py ├── Question E.py ├── question c.py ├── Question d.py ├── Question H.py ├── question i.py └── Algerian_forest_Modm.csv ├── Assignment 2 ├── Ven Diagram.py ├── Data Sheet.py └── Ven Diagram 2.py ├── Misccode.py ├── Question 1 ├── First Code.py ├── Statistics CT.py └── my file.py /README.md: -------------------------------------------------------------------------------- 1 | # Data-Science 2 | Reference By Dr Dr Ajit Kumar Majumdar , Daffodil International University dept. of Computer Science and Engennering. 3 | -------------------------------------------------------------------------------- /Data Science File/data.csv: -------------------------------------------------------------------------------- 1 | Student ID,Student Name,CGPA,Age,Semester,Gender 2 | 111,A,3.22,18,F22,male 3 | 222,B,,20,S22,female 4 | 333,C,3.62,,F23,female 5 | 444,D,4,20,F23,male 6 | 555,E,3.68,21,S23,male 7 | 666,F,3.89,22,S23,male 8 | 777,O,3.91,30,F23,other 9 | -------------------------------------------------------------------------------- /simple.py: -------------------------------------------------------------------------------- 1 | #Claculate mean, median, mode, standard deviation, variance, Minimum, Maximum of 5, 6, 7, 10 12 2 | #Data Science Code 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | AA=[5,6,7,10,12] 8 | A=pd.DataFrame(AA) 9 | 10 | np.mean(A) 11 | np.median(A) 12 | np.min(A) 13 | np.std(A) 14 | np.var(A) 15 | -------------------------------------------------------------------------------- /Assignment 3/Question F.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import scipy.stats as stats 3 | # Read the CSV data 4 | data = pd.read_csv("../Assignment 3/Algerian_forest_Modm.csv") 5 | # Select a random sample of size 180 with seed value 5364 6 | sample_data = data['Ws'].sample(n=180, random_state=5364) 7 | 8 | # f) Determine the coefficient of skewness and kurtosis 9 | skewness = stats.skew(sample_data) 10 | kurtosis = stats.kurtosis(sample_data) 11 | print("\nSkewness:", skewness) 12 | print("Kurtosis:", kurtosis) -------------------------------------------------------------------------------- /Assignment 3/question b.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | # Read the CSV data 4 | data = pd.read_csv("Algerian_forest_Modm.csv") 5 | # Select a random sample of size 180 with seed value 5364 6 | sample_data = data['Ws'].sample(n=180, random_state=5364) 7 | 8 | # b) Construct a histogram 9 | plt.figure(figsize=(10, 6)) 10 | plt.hist(sample_data, bins=20, edgecolor='black') 11 | plt.title('Histogram of Ws Values') 12 | plt.xlabel('Ws') 13 | plt.ylabel('Frequency') 14 | plt.grid(True) 15 | plt.show() -------------------------------------------------------------------------------- /Assignment 3/Question G.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | # Read the CSV data 4 | data = pd.read_csv("../Assignment 3/Algerian_forest_Modm.csv") 5 | # Select a random sample of size 180 with seed value 5364 6 | sample_data = data['Ws'].sample(n=180, random_state=5364) 7 | 8 | # g) Determine covariance and correlation matrices 9 | covariance_matrix = np.cov(sample_data) 10 | correlation_matrix = np.corrcoef(sample_data) 11 | print("\nCovariance Matrix:") 12 | print(covariance_matrix) 13 | print("\nCorrelation Matrix:") 14 | print(correlation_matrix) -------------------------------------------------------------------------------- /Assignment 3/Question E.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import statistics 3 | data = pd.read_csv("../Assignment 3/Algerian_forest_Modm.csv") 4 | # Select a random sample of size 180 with seed value 5364 5 | sample_data = data['Ws'].sample(n=180, random_state=5364) 6 | 7 | 8 | # e) Determine population standard deviation and population coefficient of variation 9 | mean_Ws = statistics.mean(sample_data) 10 | population_std_dev = statistics.pstdev(sample_data) 11 | population_coeff_var = (population_std_dev / mean_Ws) * 100 12 | print("\nPopulation Standard Deviation:", population_std_dev) 13 | print("Population Coefficient of Variation:", population_coeff_var) -------------------------------------------------------------------------------- /Assignment 2/Ven Diagram.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | # Create Drew Conway's Venn diagram 5 | plt.figure(figsize=(6, 4)) 6 | plt.title("Drew Conway's Venn Diagram") 7 | plt.text(0.5, 0.5, "Data Science", horizontalalignment='center', verticalalignment='center', fontsize=15) 8 | plt.text(0.2, 0.6, "Hacking Skills", horizontalalignment='center', verticalalignment='center', fontsize=12) 9 | plt.text(0.8, 0.6, "Math And Statistics", horizontalalignment='center', verticalalignment='center', fontsize=12) 10 | plt.text(0.5, 0.3, "Domain Expert", horizontalalignment='center', verticalalignment='center', fontsize=12) 11 | plt.axis('off') 12 | plt.show() 13 | -------------------------------------------------------------------------------- /Misccode.py: -------------------------------------------------------------------------------- 1 | #Misccode.py 2 | wt=[21.4,19.7,19.7,20.6,20.8,20.1,19.7,20.3,20.9] 3 | import scipy.stats as sc 4 | res=sc.describe(wt) 5 | 6 | import numpy as np 7 | m=res[2] 8 | v=res[3] 9 | s=np.sqrt(v) 10 | 11 | cv=(s/m)*100;cv 12 | 13 | sc.gmean(wt) 14 | sc.variation(wt) 15 | #cv=(std/mean)%100 16 | 17 | 18 | m1=100; sd1=15; 19 | m2=15; sd2=10; 20 | 21 | cv1=(sd1/m1)*100 22 | cv2=(sd2/m2)*100 23 | 24 | cv1 25 | cv2 26 | 27 | 28 | sc.variation(wt) 29 | 30 | sc.pmean(wt,1) 31 | 32 | x1=[20,29,12,45,12,34,23] 33 | x2=[13,5,19,39,56,78,22,34,2,5] 34 | x3=[4,5,8,22,55,34,67,12] 35 | 36 | cv1=sc.variation(x1);cv1 37 | 38 | import scipy.stats as sc 39 | def cv(xx): 40 | re=sc.variation(xx) 41 | return(re) 42 | 43 | cv(x1) 44 | cv(x2) 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /Assignment 3/question c.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import statistics 3 | import scipy.stats as stats 4 | # Read the CSV data 5 | data = pd.read_csv("Algerian_forest_Modm.csv") 6 | # Select a random sample of size 180 with seed value 4691 7 | sample_data = data['Ws'].sample(n=180, random_state=5364) 8 | 9 | 10 | # c) Calculate mean, median, mode, geometric mean, and harmonic mean 11 | mean_Ws = statistics.mean(sample_data) 12 | median_Ws = statistics.median(sample_data) 13 | mode_Ws = stats.mode(sample_data) 14 | geometric_mean_Ws = stats.gmean(sample_data) 15 | harmonic_mean_Ws = statistics.harmonic_mean(sample_data) 16 | print("Mean:", mean_Ws) 17 | print("Median:", median_Ws) 18 | print("Mode:", mode_Ws) 19 | print("Geometric Mean:", geometric_mean_Ws) 20 | print("Harmonic Mean:", harmonic_mean_Ws) -------------------------------------------------------------------------------- /Question 1: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.stats import pearsonr 3 | 4 | # Given temperature data 5 | temperatures = [22, 25, 29, 30, 25, 25, 24, 25, 26, 28, 22, 26, 26, 25, 27, 31, 30, 28, 25, 27, 33, 30, 29, 30, 30, 29, 28, 30, 29, 27, 32, 33, 30, 29, 29, 32, 31, 32, 29, 34, 35, 35, 30, 28, 31, 36, 29, 32, 28, 31] 6 | 7 | # Calculate correlation coefficients for each pair of consecutive readings 8 | correlation_coefficients = [] 9 | for i in range(len(temperatures) - 1): 10 | corr, _ = pearsonr([temperatures[i]], [temperatures[i+1]]) 11 | correlation_coefficients.append(corr) 12 | 13 | # Identify the highest and lowest correlation coefficient values 14 | max_corr = max(correlation_coefficients) 15 | min_corr = min(correlation_coefficients) 16 | 17 | print("Highest correlation coefficient:", max_corr) 18 | print("Lowest correlation coefficient:", min_corr) 19 | -------------------------------------------------------------------------------- /Assignment 2/Data Sheet.py: -------------------------------------------------------------------------------- 1 | # Question no i solve problem ( fire / not fire ) code 2 | import numpy as np 3 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis 4 | 5 | # Given data points for FWI and RH 6 | RH = np.array([79, 67, 63, 56]) 7 | FWI = np.array([0.21, 2.5, 10.2, 6.1]) 8 | 9 | # Estimate Fisher Linear Discriminant Function 10 | X = np.column_stack((RH, FWI)) 11 | y = np.array([1, 1, 2, 2]) # Assuming 1 for "fire" and 2 for "not fire" 12 | 13 | lda = LinearDiscriminantAnalysis() 14 | lda.fit(X, y) 15 | 16 | # Classify the given points 17 | new_data = np.array([[79, 0.21], [67, 2.5], [63, 10.2], [56, 6.1]]) 18 | predicted_classes = lda.predict(new_data) 19 | 20 | for i in range(len(new_data)): 21 | print(f"FWI: {new_data[i][1]}, RH: {new_data[i][0]}, Classified as: {'fire' if predicted_classes[i] == 1 else 'not fire'}") 22 | -------------------------------------------------------------------------------- /Assignment 3/Question d.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import statistics 4 | import numpy as np 5 | import scipy.stats as stats 6 | # Read the CSV data 7 | data = pd.read_csv("../Assignment 3/Algerian_forest_Modm.csv") 8 | # Select a random sample of size 180 with seed value 5364 9 | sample_data = data['Ws'].sample(n=180, random_state=5364) 10 | 11 | # d) Report five summary measures and construct a box plot 12 | summary_measures = { 13 | 'Minimum': np.min(sample_data), 14 | '1st Quartile (Q1)': np.percentile(sample_data, 25), 15 | 'Median': np.median(sample_data), 16 | '3rd Quartile (Q3)': np.percentile(sample_data, 75), 17 | 'Maximum': np.max(sample_data) 18 | } 19 | print("\nSummary Measures:") 20 | for measure, value in summary_measures.items(): 21 | print(f"{measure}: {value}") 22 | plt.boxplot(sample_data) 23 | plt.title('Box Plot of Ws Values') 24 | plt.ylabel('Ws') 25 | plt.show() -------------------------------------------------------------------------------- /Assignment 3/Question H.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import scipy.stats as stats 3 | # Read the CSV data 4 | data = pd.read_csv("../Assignment 3/Algerian_forest_Modm.csv") 5 | # Select a random sample of size 180 with seed value 5364 6 | sample_data = data['Ws'].sample(n=180, random_state=5364) 7 | 8 | # h) Estimate the regression equation of Temperature on RH 9 | # Assuming RH as independent variable (X) and Temperature as dependentvariable (Y) 10 | RH = data['RH'].sample(n=180, random_state=5364) 11 | Temperature = data['Temperature'].sample(n=180, random_state=5364) 12 | RH_reshaped = RH.values.reshape(-1, 1) 13 | # Fit the linear regression model 14 | reg_model = stats.linregress(Temperature,RH) 15 | # Extract the regression coefficients 16 | slope = reg_model.slope 17 | intercept = reg_model.intercept 18 | print("\nRegression Equation:") 19 | print(f"RH = {slope:.4f} * Temperature + {intercept:.4f}") 20 | # Estimate RH when Temperature is 21.5% 21 | predicted_rh = slope * 21.5 + intercept 22 | print("Estimated RH when Temperature is 21.5%:", predicted_rh) -------------------------------------------------------------------------------- /Assignment 2/Ven Diagram 2.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib_venn import venn3 3 | 4 | # Define the sizes of each group 5 | sizes = { 6 | '100': 20, # Math and Statistics 7 | '010': 25, # Domain Experts 8 | '001': 30, # Hacking Skills 9 | '110': 15, # Math and Statistics & Domain Experts 10 | '101': 10, # Math and Statistics & Hacking Skills 11 | '011': 18, # Domain Experts & Hacking Skills 12 | '111': 7 # Math and Statistics & Domain Experts & Hacking Skills 13 | } 14 | # Create the Venn diagram 15 | venn = venn3(subsets=sizes, set_labels=('Math and Statistics', 'Domain Experts', 'Hacking Skills')) 16 | 17 | # Label each subset 18 | venn.get_label_by_id('100').set_text('Math and Statistics') 19 | venn.get_label_by_id('010').set_text('Domain Experts') 20 | venn.get_label_by_id('001').set_text('Hacking Skills') 21 | venn.get_label_by_id('110').set_text('Research') 22 | venn.get_label_by_id('101').set_text('Machine Learning') 23 | venn.get_label_by_id('011').set_text('Danger Zone') 24 | venn.get_label_by_id('111').set_text('Common Data Science') 25 | 26 | # Show the plot 27 | plt.title("Data Science Venn Diagram") 28 | plt.show() 29 | -------------------------------------------------------------------------------- /First Code.py: -------------------------------------------------------------------------------- 1 | #DIU_315_1.py.docx 2 | import numpy as np 3 | import pandas as pd 4 | import scipy.linalg as slin 5 | 6 | dob=pd.read_csv("C:/Users/HP/Desktop/315/obli.csv") 7 | dnonc=pd.read_csv("C:/Users/HP/Desktop/315/nonc.csv") 8 | 9 | Z2=np.transpose(dob[["activity","antigen"]]) 10 | Z1=np.transpose(dnonc[["activity","antigen"]]) 11 | 12 | S1=np.cov(Z1,bias=False);S1 13 | S2=np.cov(Z2,bias=False);S2 14 | 15 | X1b=np.mean(Z1,axis=1);X1b 16 | X2b=np.mean(Z2,axis=1);X2b 17 | 18 | n2=len(dob); n1=len(dnonc) 19 | 20 | Sp=((n1-1)/((n1-1)+(n2-1)))*S1+((n2-1)/((n2-1)+(n2-1)))*S2;Sp 21 | #Spp=(29/73)*S1+(44/73)*S2;Spp 22 | 23 | SpI=np.linalg.inv(Sp); 24 | 25 | X12=X1b-X2b;X12 26 | yh=np.dot(X12,SpI);yh 27 | 28 | U=X1b+X2b 29 | mhat=np.dot((1/2)*yh,U) 30 | 31 | x0=[-.0867,-0.07786] 32 | 33 | #x0=[-.1744,.1892] 34 | 35 | #v1 = float(input("Please provide 1st value: "));v2 = float(input("Please provide 2nd value: "));#x0=[v1,v2] 36 | 37 | rule=np.dot(yh,x0)-mhat;rule 38 | 39 | if rule>0: 40 | print("Subject/individual belongs to Goup 1") 41 | else: 42 | print("Subject/individual belongs to Goup 2") 43 | 44 | #--------------------------------------# 45 | 46 | 47 | ## 48 | np.array([[ 64.96, 33.2 , -24.44], 49 | [ 33.2 , 56.4 , -24.1 ], 50 | [-24.44, -24.1 , 75.56]]) 51 | 52 | 53 | import numpy as np 54 | 55 | A = [45, 37, 42, 35, 39] 56 | B = [38, 31, 26, 28, 33] 57 | C = [10, 15, 17, 21, 12] 58 | 59 | 60 | 61 | ## 62 | -------------------------------------------------------------------------------- /Assignment 3/question i.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib_venn import venn3 3 | 4 | # Define the sizes of each group 5 | sizes = { 6 | '100': 20, # Math and Statistics 7 | '010': 25, # Domain Experts 8 | '001': 30, # Hacking Skills 9 | '110': 15, # Math and Statistics & Domain Experts 10 | '101': 10, # Math and Statistics & Hacking Skills 11 | '011': 18, # Domain Experts & Hacking Skills 12 | '111': 7 # Math and Statistics & Domain Experts & Hacking Skills 13 | } 14 | # Create the Venn diagram 15 | venn = venn3(subsets=sizes, set_labels=('Math and Statistics', 'Domain Experts', 'Hacking Skills')) 16 | 17 | # Label each subset 18 | venn.get_label_by_id('100').set_text('Math and Statistics') 19 | venn.get_label_by_id('010').set_text('Domain Experts') 20 | venn.get_label_by_id('001').set_text('Hacking Skills') 21 | venn.get_label_by_id('110').set_text('Research') 22 | venn.get_label_by_id('101').set_text('Machine Learning') 23 | venn.get_label_by_id('011').set_text('Danger Zone') 24 | venn.get_label_by_id('111').set_text('Common Data Science') 25 | 26 | # Show the plot 27 | plt.title("Data Science Venn Diagram") 28 | plt.show() 29 | 30 | 31 | import numpy as np 32 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis 33 | # Given data points for FWI and RH 34 | RH = np.array([79, 67, 63, 56]) 35 | FWI = np.array([0.21, 2.5, 10.2, 6.1]) 36 | # Estimate Fisher Linear Discriminant Function 37 | X = np.column_stack((RH, FWI)) 38 | y = np.array([1, 1, 2, 2]) # Assuming 1 for "fire" and 2 for "not fire" 39 | lda = LinearDiscriminantAnalysis() 40 | lda.fit(X, y) 41 | # Classify the given points 42 | new_data = np.array([[79, 0.21], [67, 2.5], [63, 10.2], [56, 6.1]]) 43 | predicted_classes = lda.predict(new_data) 44 | 45 | for i in range(len(new_data)): 46 | print(f"FWI: {new_data[i][1]}, RH: {new_data[i][0]}, Classified as: {'fire' if predicted_classes[i] == 1 else 'not fire'}") -------------------------------------------------------------------------------- /Statistics CT.py: -------------------------------------------------------------------------------- 1 | #StatisticsCT_24Feb.py 2 | # Data Set used in correlation and regression 3 | x=[9,7,11,12,8,7,8,11,10,12,6,6] 4 | y=[8.1,6,3.6,4,5,10,7.6,8,8,6,8.6,8] 5 | 6 | #Descriptive statistics USING pandas 7 | import pandas as pd 8 | xd=pd.DataFrame(x) 9 | xd.describe() 10 | xd.mean() 11 | xd.median() 12 | xd.mode() 13 | xd.var() 14 | xd.skew() 15 | xd.kurt() 16 | 17 | #descriptive statistics USING statistics 18 | import statistics 19 | statistics.harmonic_mean(x) 20 | statistics.geometric_mean(x) 21 | statistics.mean(x) 22 | statistics.median(x) 23 | statistics.mode(x) 24 | statistics.pstdev(x) 25 | statistics.stdev(x) 26 | statistics.pvariance(x) 27 | statistics.variance(x) 28 | 29 | # COVARIANCE, CORRELATION, AND REGRESSION 30 | #DATA 31 | x=[9,7,11,12,8,7,8,11,10,12,6,6] 32 | y=[8.1,6,3.6,4,5,10,7.6,8,8,6,8.6,8] 33 | 34 | import numpy as np 35 | r = np.corrcoef(x, y) #creating correlation matrix 36 | np.cov(x,y) # Calculates variance-Covariance Matrix 37 | r 38 | r[0,1] 39 | r[1,0] 40 | 41 | #USING pandas 42 | import pandas as pd 43 | xd=pd.DataFrame(x) 44 | yd=pd.DataFrame(y) 45 | xd=pd.Series(x) 46 | yd=pd.Series(y) 47 | 48 | xd.corr(yd) # Calculates correlation coefficient between x and y 49 | yd.corr(xd) # Calculates correlation coefficient between x and y 50 | 51 | yd.cov(xd) # Calculates Covariance between x and y 52 | 53 | 54 | #SCATTER PLOT 55 | import matplotlib.pyplot as plt 56 | plt.scatter(x, y) 57 | plt.show() 58 | 59 | # Add title and axis labels 60 | plt.title("Figure: Scatter plot between car's ages and selling prices") 61 | plt.xlabel("Age of Car in years (X)") 62 | plt.ylabel("Selling Price of Car (Y)") 63 | plt.scatter(x, y) 64 | plt.show() 65 | 66 | 67 | #Histogram 68 | plt.hist(x) 69 | plt.show() 70 | 71 | 72 | # b, a, r, pvalue, standard_error 73 | x=[9,7,11,12,8,7,8,11,10,12,6,6] 74 | y=[8.1,6,3.6,4,5,10,7.6,8,8,6,8.6,8] 75 | from scipy import stats 76 | slope, intercept, r, p, std_err = stats.linregress(x, y) 77 | 78 | print(r) 79 | slope 80 | intercept 81 | stats.linregress(x, y) # Returns b, a, r, pvalue, standard_error 82 | -------------------------------------------------------------------------------- /my file.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import statistics 5 | from scipy import stats 6 | 7 | # Import the CSV file 8 | data = pd.read_csv("weather_data.csv") 9 | 10 | # Extract FFMC values 11 | ffmc_values = data['FFMC'] 12 | 13 | # b) Construct a histogram 14 | plt.figure(figsize=(10, 6)) 15 | plt.hist(ffmc_values, bins=20, edgecolor='black') 16 | plt.title('Histogram of FFMC Values') 17 | plt.xlabel('FFMC') 18 | plt.ylabel('Frequency') 19 | plt.grid(True) 20 | plt.show() 21 | 22 | # c) Calculate mean, median, mode, geometric mean, and harmonic mean 23 | mean_ffmc = np.mean(ffmc_values) 24 | median_ffmc = np.median(ffmc_values) 25 | mode_ffmc = statistics.mode(ffmc_values) 26 | geometric_mean_ffmc = stats.gmean(ffmc_values) 27 | harmonic_mean_ffmc = statistics.harmonic_mean(ffmc_values) 28 | 29 | print("Mean:", mean_ffmc) 30 | print("Median:", median_ffmc) 31 | print("Mode:", mode_ffmc) 32 | print("Geometric Mean:", geometric_mean_ffmc) 33 | print("Harmonic Mean:", harmonic_mean_ffmc) 34 | 35 | # d) Report five summary measures and construct a box plot 36 | q1_ffmc = np.percentile(ffmc_values, 25) 37 | q3_ffmc = np.percentile(ffmc_values, 75) 38 | iqr_ffmc = q3_ffmc - q1_ffmc 39 | minimum_ffmc = min(ffmc_values) 40 | maximum_ffmc = max(ffmc_values) 41 | 42 | plt.figure(figsize=(8, 6)) 43 | plt.boxplot(ffmc_values, vert=False) 44 | plt.title('Box Plot of FFMC Values') 45 | plt.xlabel('FFMC') 46 | plt.grid(True) 47 | plt.show() 48 | 49 | print("Minimum:", minimum_ffmc) 50 | print("1st Quartile:", q1_ffmc) 51 | print("Median:", median_ffmc) 52 | print("3rd Quartile:", q3_ffmc) 53 | print("Maximum:", maximum_ffmc) 54 | 55 | # e) Determine population standard deviation and population coefficient of variation 56 | population_std_dev_ffmc = statistics.pstdev(ffmc_values) 57 | population_coeff_var_ffmc = (population_std_dev_ffmc / mean_ffmc) * 100 58 | 59 | print("Population Standard Deviation:", population_std_dev_ffmc) 60 | print("Population Coefficient of Variation:", population_coeff_var_ffmc) 61 | 62 | # f) Determine the coefficient of skewness and kurtosis 63 | skewness_ffmc = statistics.skew(ffmc_values) 64 | kurtosis_ffmc = statistics.kurtosis(ffmc_values) 65 | 66 | print("Coefficient of Skewness:", skewness_ffmc) 67 | print("Kurtosis:", kurtosis_ffmc) 68 | 69 | # g) Covariance and correlation matrices 70 | covariance_matrix = data[['Temp', 'RH', 'Ws', 'Rain', 'FFMC', 'DMC', 'DC', 'ISI', 'BUI', 'FWI']].cov() 71 | correlation_matrix = data[['Temp', 'RH', 'Ws', 'Rain', 'FFMC', 'DMC', 'DC', 'ISI', 'BUI', 'FWI']].corr() 72 | 73 | print("Covariance Matrix:") 74 | print(covariance_matrix) 75 | print("\nCorrelation Matrix:") 76 | print(correlation_matrix) 77 | 78 | # h) Regression equation of FWI on RH 79 | fwi_values = data['FWI'] 80 | rh_values = data['RH'] 81 | 82 | slope, intercept, r_value, p_value, std_err = stats.linregress(rh_values, fwi_values) 83 | 84 | print("Regression Equation: FWI = {:.2f} * RH + {:.2f}".format(slope, intercept)) 85 | # Estimate FWI when RH is 21.5% 86 | rh = 21.5 87 | estimated_fwi = slope * rh + intercept 88 | print("Estimated FWI when RH is 21.5%:", estimated_fwi) 89 | -------------------------------------------------------------------------------- /Data Science File/Untitled4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "42ab3c93-7b47-43eb-af5d-8fa258234dcb", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import matplotlib.pyplot as plt\n", 11 | "import seaborn as sns\n", 12 | "import numpy as np\n", 13 | "import pandas as pd\n", 14 | "dt = pd.read_csv(\"data.csv\")\n", 15 | "dt" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "id": "e1ed0469-cf9a-4a47-9707-fdb1f023fe7a", 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/plain": [ 27 | "(7, 6)" 28 | ] 29 | }, 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "output_type": "execute_result" 33 | } 34 | ], 35 | "source": [ 36 | "dt.shape" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "id": "24ce1bf5-8b79-4bc4-bc6c-2d85342ce72e", 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "dt.isnull()" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "id": "65393823-1eae-4d10-8e74-e479f11b246c", 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "Full_fill_data =dt.fillna(0)\n", 57 | "Full_fill_data" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "id": "7c295e67-6d18-449a-9f51-572edc68c86c", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "Full_fill_data.describe()" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 6, 73 | "id": "4eeb347c-4950-44be-a488-97d849e320c4", 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/html": [ 79 | "
\n", 80 | "\n", 93 | "\n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | "
Student IDStudent NameCGPAAgeSemesterGender
0111A3.2218.0F22male
3444D4.0020.0F23male
4555E3.6821.0S23male
5666F3.8922.0S23male
6777O3.9130.0F23other
\n", 153 | "
" 154 | ], 155 | "text/plain": [ 156 | " Student ID Student Name CGPA Age Semester Gender\n", 157 | "0 111 A 3.22 18.0 F22 male\n", 158 | "3 444 D 4.00 20.0 F23 male\n", 159 | "4 555 E 3.68 21.0 S23 male\n", 160 | "5 666 F 3.89 22.0 S23 male\n", 161 | "6 777 O 3.91 30.0 F23 other" 162 | ] 163 | }, 164 | "execution_count": 6, 165 | "metadata": {}, 166 | "output_type": "execute_result" 167 | } 168 | ], 169 | "source": [ 170 | "delete_data =dt.dropna(inplace=False)\n", 171 | "delete_data" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "id": "4ff77eff-732f-4f83-bc72-32cac26f9aea", 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "from sklearn.preprocessing import LabelEncoder\n", 182 | "le = LabelEncoder() \n", 183 | "dt['Gender']= le.fit_transform(dt['Gender']) \n", 184 | "dt " 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "id": "381f38eb-4dc8-47cd-a42f-54791c1ebbd8", 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "Delete_data=dt.dropna(inplace=False)\n", 195 | "Delete_data" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "id": "7890adbe-bc44-4989-ae7d-457644f4d220", 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "dt1 = pd.get_dummies(dt, columns=['Gender'])\n", 206 | "dt1" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 10, 212 | "id": "7824c539-d0af-4e3d-ad30-12e04fb2181d", 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "data": { 217 | "text/html": [ 218 | "
\n", 219 | "\n", 232 | "\n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | "
Student IDStudent NameCGPAAgeSemesterGender
\n", 247 | "
" 248 | ], 249 | "text/plain": [ 250 | "Empty DataFrame\n", 251 | "Columns: [Student ID, Student Name, CGPA, Age, Semester, Gender]\n", 252 | "Index: []" 253 | ] 254 | }, 255 | "execution_count": 10, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | } 259 | ], 260 | "source": [ 261 | "duplicates = dt[dt.duplicated()]\n", 262 | "duplicates" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "id": "0aae7aef-4446-44b8-89d4-813ecfcf4363", 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "dt.head()" 273 | ] 274 | } 275 | ], 276 | "metadata": { 277 | "kernelspec": { 278 | "display_name": "Python 3 (ipykernel)", 279 | "language": "python", 280 | "name": "python3" 281 | }, 282 | "language_info": { 283 | "codemirror_mode": { 284 | "name": "ipython", 285 | "version": 3 286 | }, 287 | "file_extension": ".py", 288 | "mimetype": "text/x-python", 289 | "name": "python", 290 | "nbconvert_exporter": "python", 291 | "pygments_lexer": "ipython3", 292 | "version": "3.11.7" 293 | } 294 | }, 295 | "nbformat": 4, 296 | "nbformat_minor": 5 297 | } 298 | -------------------------------------------------------------------------------- /Assignment 3/Algerian_forest_Modm.csv: -------------------------------------------------------------------------------- 1 | day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region 2 | 2,9,2012,22,86,15,10.1,30.5,0.7,7,0,1.1,0,not_fire,1 3 | 4,6,2012,25,89,13,2.5,28.6,1.3,6.9,0,1.7,0,not_fire,1 4 | 16,6,2012,29,89,13,0.7,36.1,1.7,7.6,0,2.2,0,not_fire,1 5 | 17,6,2012,30,89,16,0.6,37.3,1.1,7.8,0,1.6,0,not_fire,1 6 | 3,9,2012,25,78,15,3.8,42.6,1.2,7.5,0.1,1.7,0,not_fire,1 7 | 13,9,2012,25,86,21,4.6,40.9,1.3,7.5,0.1,1.8,0,not_fire,1 8 | 15,9,2012,24,82,15,0.4,44.9,0.9,7.3,0.2,1.4,0,not_fire,1 9 | 1,9,2012,25,76,17,7.2,46,1.3,7.5,0.2,1.8,0.1,not_fire,1 10 | 3,6,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,not_fire,1 11 | 15,6,2012,28,80,17,3.1,49.4,3,7.4,0.4,3,0.1,not_fire,1 12 | 14,9,2012,22,76,26,8.3,47.4,1.1,7,0.4,1.6,0.1,not_fire,1 13 | 25,9,2012,26,81,21,5.8,48.6,3,7.7,0.4,3,0.1,not_fire,1 14 | 29,9,2012,26,80,16,1.8,47.4,2.9,7.7,0.3,3,0.1,not_fire,1 15 | 30,9,2012,25,78,14,1.4,45,1.9,7.5,0.2,2.4,0.1,not_fire,1 16 | 13,6,2012,27,84,21,1.2,50,6.7,17,0.5,6.7,0.2,not_fire,1 17 | 18,6,2012,31,78,14,0.3,56.9,1.9,8,0.7,2.4,0.2,not_fire,1 18 | 9,9,2012,30,77,15,1,56.1,2.1,8.4,0.7,2.6,0.2,not_fire,1 19 | 31,8,2012,28,80,21,16.8,52.5,8.7,8.7,0.6,8.3,0.3,not_fire,1 20 | 9,6,2012,25,88,13,0.2,52.9,7.9,38.8,0.4,10.5,0.3,not_fire,1 21 | 2,7,2012,27,75,19,1.2,55.7,2.4,8.3,0.8,2.8,0.3,not_fire,1 22 | 10,9,2012,33,73,12,1.8,59.9,2.2,8.9,0.7,2.7,0.3,not_fire,1 23 | 11,9,2012,30,77,21,1.8,58.5,1.9,8.4,1.1,2.4,0.3,not_fire,1 24 | 2,6,2012,29,61,13,1.3,64.4,4.1,7.6,1,3.9,0.4,not_fire,1 25 | 14,6,2012,30,78,20,0.5,59,4.6,7.8,1,4.4,0.4,not_fire,1 26 | 20,6,2012,30,80,16,0.4,59.8,3.4,27.1,0.9,5.1,0.4,not_fire,1 27 | 1,7,2012,29,68,19,1,59.9,2.5,8.6,1.1,2.9,0.4,not_fire,1 28 | 16,7,2012,28,76,21,0,72.6,7,25.5,0.7,8.3,0.4,not_fire,1 29 | 8,9,2012,30,73,17,0.9,62,2.6,8.4,1.1,3,0.4,not_fire,1 30 | 1,6,2012,29,57,18,0,65.7,3.4,7.6,1.3,3.4,0.5,not_fire,1 31 | 5,6,2012,27,77,16,0,64.8,3,14.2,1.2,3.9,0.5,not_fire,1 32 | 3,7,2012,32,76,20,0.7,63.1,2.6,9.2,1.3,3,0.5,not_fire,1 33 | 10,7,2012,33,69,13,0.7,66.6,6,9.3,1.1,5.8,0.5,not_fire,1 34 | 15,7,2012,30,80,19,0.4,60.7,5.2,17,1.1,5.9,0.5,not_fire,1 35 | 4,9,2012,29,73,17,0.1,68.4,1.9,15.7,1.4,2.9,0.5,not_fire,1 36 | 12,9,2012,29,88,13,0,71,2.6,16.6,1.2,3.7,0.5,not_fire,1 37 | 9,7,2012,32,68,14,1.4,66.6,7.7,9.2,1.1,7.4,0.6,not_fire,1 38 | 7,9,2012,31,71,17,0.3,69.6,3.2,30.1,1.5,5.1,0.6,not_fire,1 39 | 23,9,2012,32,54,11,0.5,73.7,7.9,30.4,1.2,9.6,0.7,not_fire,1 40 | 24,9,2012,29,65,19,0.6,68.3,5.5,15.2,1.5,5.8,0.7,not_fire,1 41 | 14,7,2012,34,61,13,0.6,73.9,7.8,22.9,1.4,8.4,0.8,not_fire,1 42 | 2,8,2012,35,55,12,0.4,78,5.8,10,1.7,5.5,0.8,not_fire,1 43 | 3,8,2012,35,63,14,0.3,76.6,5.7,10,1.7,5.5,0.8,not_fire,1 44 | 16,9,2012,30,65,14,0,78.1,3.2,15.7,1.9,4.2,0.8,not_fire,1 45 | 10,6,2012,28,79,12,0,73.2,9.5,46.3,1.3,12.6,0.9,not_fire,1 46 | 12,7,2012,31,75,13,0.1,75.1,7.9,27.7,1.5,9.2,0.9,not_fire,1 47 | 1,8,2012,36,45,14,0,78.8,4.8,10.2,2,4.7,0.9,not_fire,1 48 | 6,9,2012,29,74,19,0.1,75.8,3.6,32.2,2.1,5.6,0.9,not_fire,1 49 | 28,9,2012,32,47,14,0.7,77.5,7.1,8.8,1.8,6.8,0.9,not_fire,1 50 | 24,7,2012,28,78,16,0.1,70,9.6,79.7,1.4,14.7,1.3,not_fire,1 51 | 19,6,2012,31,55,16,0.1,79.9,4.5,16,2.5,5.3,1.4,not_fire,1 52 | 5,9,2012,29,75,16,0,80.8,3.4,24,2.8,5.1,1.7,fire,1 53 | 4,7,2012,33,78,17,0,80.1,4.6,18.5,2.7,5.7,1.7,not_fire,1 54 | 26,9,2012,31,54,11,0,82,6,16.3,2.5,6.2,1.7,not_fire,1 55 | 23,7,2012,27,66,22,0.4,68.2,10.5,71.3,1.8,15.4,2.1,not_fire,1 56 | 21,6,2012,30,78,14,0,81,6.3,31.6,2.6,8.4,2.2,fire,1 57 | 11,7,2012,33,76,14,0,81.1,8.1,18.7,2.6,8.1,2.2,not_fire,1 58 | 22,6,2012,31,67,17,0.1,79.1,7,39.5,2.4,9.7,2.3,not_fire,1 59 | 6,6,2012,31,67,14,0,82.6,5.8,22.2,3.1,7,2.5,fire,1 60 | 8,8,2012,32,60,18,0.3,77.1,11.3,47,2.2,14.1,2.6,not_fire,1 61 | 22,7,2012,28,79,18,0.1,73.4,16.4,79.9,1.8,21.7,2.8,not_fire,1 62 | 22,9,2012,31,50,19,0.6,77.8,10.6,41.4,2.4,12.9,2.8,not_fire,1 63 | 7,7,2012,35,64,18,0.2,80,9.7,40.4,2.8,12.1,3.2,not_fire,1 64 | 13,7,2012,34,81,15,0,81.8,9.7,37.2,3,11.7,3.4,not_fire,1 65 | 17,7,2012,29,70,14,0,82.8,9.4,34.1,3.2,11.1,3.6,fire,1 66 | 23,6,2012,32,62,18,0.1,81.4,8.2,47.7,3.3,11.5,3.8,fire,1 67 | 4,8,2012,34,69,13,0,85,8.2,19.8,4,8.2,3.9,fire,1 68 | 29,6,2012,32,47,13,0.3,79.9,18.4,84.4,2.2,23.8,3.9,not_fire,1 69 | 20,7,2012,33,65,15,0.1,81.4,12.3,62.1,2.8,16.5,4,fire,1 70 | 27,9,2012,31,66,11,0,85.7,8.3,24.9,4,9,4.1,fire,1 71 | 16,8,2012,36,61,18,0.3,80.2,11.7,90.4,2.8,17.6,4.2,fire,1 72 | 12,8,2012,35,51,13,0.3,81.3,15.6,75.1,2.5,20.7,4.2,not_fire,1 73 | 30,8,2012,35,70,17,0.8,72.7,25.2,180.4,1.7,37.4,4.2,not_fire,1 74 | 5,7,2012,33,66,14,0,85.9,7.6,27.9,4.8,9.1,4.9,fire,1 75 | 11,6,2012,31,65,14,0,84.5,12.5,54.3,4,15.8,5.6,fire,1 76 | 17,9,2012,31,52,14,0,87.7,6.4,24.3,6.2,7.7,5.9,fire,1 77 | 18,7,2012,31,68,14,0,85.4,12.1,43.1,4.6,14.2,6,fire,1 78 | 5,8,2012,34,65,13,0,86.8,11.1,29.7,5.2,11.5,6.1,fire,1 79 | 20,9,2012,28,84,18,0,83.8,13.5,49.3,4.5,16,6.3,fire,1 80 | 15,8,2012,36,55,13,0.3,82.4,15.6,92.5,3.7,22,6.3,fire,1 81 | 6,7,2012,32,63,14,0,87,10.9,37,5.6,12.5,6.8,fire,1 82 | 6,8,2012,32,75,14,0,86.4,13,39.1,5.2,14.2,6.8,fire,1 83 | 8,6,2012,30,73,15,0,86.6,12.1,38.3,5.6,13.5,7.1,fire,1 84 | 12,6,2012,26,81,19,0,84,13.8,61.4,4.8,17.7,7.1,fire,1 85 | 7,6,2012,33,54,13,0,88.2,9.9,30.5,6.4,10.9,7.2,fire,1 86 | 25,7,2012,31,65,18,0,84.3,12.5,88.7,4.8,18.5,7.3,fire,1 87 | 24,6,2012,32,66,17,0,85.9,11.2,55.8,5.6,14.9,7.5,fire,1 88 | 18,9,2012,32,49,11,0,89.4,9.8,33.1,6.8,11.3,7.7,fire,1 89 | 8,7,2012,33,68,19,0,85.6,12.5,49.8,6,15.4,8,fire,1 90 | 7,8,2012,32,69,16,0,86.5,15.5,48.6,5.5,17.2,8,fire,1 91 | 21,9,2012,31,55,11,0,87.8,16.5,57.9,5.4,19.2,8.3,fire,1 92 | 25,6,2012,31,64,15,0,86.7,14.2,63.8,5.7,18.3,8.4,fire,1 93 | 21,7,2012,33,70,17,0,85.4,18.5,71.5,5.2,22.4,8.8,fire,1 94 | 19,9,2012,29,57,14,0,89.3,12.5,41.3,7.8,14.2,9.7,fire,1 95 | 9,8,2012,35,59,17,0,87.4,14.8,57,6.9,17.9,9.9,fire,1 96 | 13,8,2012,35,63,15,0,87,19,85.1,5.9,24.4,10.2,fire,1 97 | 26,6,2012,31,64,18,0,86.8,17.8,71.8,6.7,21.6,10.6,fire,1 98 | 14,8,2012,33,66,14,0,87,21.7,94.7,5.7,27.2,10.6,fire,1 99 | 30,7,2012,31,79,15,0,85.4,28.5,136,4.7,37.4,10.7,fire,1 100 | 19,7,2012,35,59,17,0,88.1,12,52.8,7.7,18.2,10.9,fire,1 101 | 28,7,2012,33,76,15,0,86.5,24.4,117.8,5.6,32.1,11.3,fire,1 102 | 10,8,2012,35,55,14,0,88.9,18.6,67,7.4,21.9,11.6,fire,1 103 | 29,7,2012,32,73,15,0,86.6,26.7,127,5.6,35,11.9,fire,1 104 | 11,8,2012,35,63,13,0,88.9,21.7,77,7.1,25.5,12.1,fire,1 105 | 30,6,2012,33,50,14,0,88.7,22.9,92.8,7.2,28.3,12.9,fire,1 106 | 27,8,2012,33,82,21,0,84.9,47,200.2,4.4,59.3,13.2,fire,1 107 | 26,8,2012,31,78,18,0,85.8,45.6,190.6,4.7,57.1,13.7,fire,1 108 | 28,6,2012,32,55,14,0,89.1,25.5,88.5,7.6,29.7,13.9,fire,1 109 | 17,8,2012,37,52,18,0,89.3,16,100.7,9.7,22.9,14.6,fire,1 110 | 27,6,2012,34,53,18,0,89,21.6,80.3,9.2,25.8,15,fire,1 111 | 26,7,2012,36,53,19,0,89.2,17.1,98.6,10,23.9,15.3,fire,1 112 | 27,7,2012,36,48,13,0,90.3,22.2,108.5,8.7,29.4,15.3,fire,1 113 | 31,7,2012,35,64,17,0,87.2,31.9,145.7,6.8,41.2,15.7,fire,1 114 | 18,8,2012,36,54,18,0,89.4,20,110.9,9.7,27.5,16.1,fire,1 115 | 20,8,2012,35,68,19,0,88.3,25.9,130.6,8.8,34.7,16.8,fire,1 116 | 19,8,2012,35,62,19,0,89.4,23.2,120.9,9.7,31.3,17.2,fire,1 117 | 21,8,2012,36,58,19,0,88.6,29.6,141.1,9.2,38.8,18.4,fire,1 118 | 28,8,2012,34,64,16,0,89.4,50.2,210.4,7.3,62.9,19.9,fire,1 119 | 25,8,2012,35,60,15,0,88.9,43.9,181.3,8.2,54.7,20.3,fire,1 120 | 22,8,2012,36,55,18,0,89.1,33.5,151.3,9.9,43.1,20.4,fire,1 121 | 24,8,2012,34,64,14,0,88.9,40.5,171.3,9,50.9,20.9,fire,1 122 | 23,8,2012,36,53,16,0,89.5,37.6,161.5,10.4,47.5,22.3,fire,1 123 | 29,8,2012,35,48,18,0,90.1,54.2,220.4,12.5,67.4,30.2,fire,1 124 | 1,9,2012,29,86,16,0,37.9,0.9,8.2,0.1,1.4,0,not_fire,2 125 | 27,9,2012,28,87,15,4.4,41.1,6.5,8,0.1,6.2,0,not_fire,2 126 | 3,6,2012,29,80,14,2,48.7,2.2,7.6,0.3,2.6,0.1,not_fire,2 127 | 16,6,2012,29,87,15,0.4,47.4,4.2,8,0.2,4.1,0.1,not_fire,2 128 | 1,6,2012,32,71,12,0.7,57.1,2.5,8.2,0.6,2.8,0.2,not_fire,2 129 | 2,6,2012,30,73,13,4,55.7,2.7,7.8,0.6,2.9,0.2,not_fire,2 130 | 28,9,2012,27,87,29,0.5,45.9,3.5,7.9,0.4,3.4,0.2,not_fire,2 131 | 14,6,2012,27,79,16,0.7,53.4,6.4,7.3,0.5,6.1,0.3,not_fire,2 132 | 20,6,2012,31,72,14,0.2,60.2,3.8,8,0.8,3.7,0.3,not_fire,2 133 | 10,9,2012,29,74,15,1.1,59.5,4.7,8.2,0.8,4.6,0.3,not_fire,2 134 | 17,6,2012,31,69,17,4.7,62.2,3.9,8,1.1,3.8,0.4,not_fire,2 135 | 18,6,2012,33,62,10,8.7,65.5,4.6,8.3,0.9,4.4,0.4,not_fire,2 136 | 19,6,2012,32,67,14,4.5,64.6,4.4,8.2,1,4.2,0.4,not_fire,2 137 | 5,9,2012,30,58,12,4.1,66.1,4,8.4,1,3.9,0.4,not_fire,2 138 | 6,9,2012,34,71,14,6.5,64.5,3.3,9.1,1,3.5,0.4,not_fire,2 139 | 24,9,2012,26,49,6,2,61.3,11.9,28.1,0.6,11.9,0.4,not_fire,2 140 | 1,7,2012,28,58,18,2.2,63.7,3.2,8.5,1.2,3.3,0.5,not_fire,2 141 | 30,9,2012,24,64,15,0.2,67.3,3.8,16.5,1.2,4.8,0.5,not_fire,2 142 | 15,6,2012,28,90,15,0,66.8,7.2,14.7,1.2,7.1,0.6,not_fire,2 143 | 7,8,2012,34,63,13,2.9,69.7,7.2,9.8,1.2,6.9,0.6,not_fire,2 144 | 25,7,2012,39,64,9,1.2,73.8,11.7,15.9,1.1,11.4,0.7,not_fire,2 145 | 4,9,2012,30,66,15,0.2,73.5,4.1,26.6,1.5,6,0.7,not_fire,2 146 | 29,9,2012,24,54,18,0.1,79.7,4.3,15.2,1.7,5.1,0.7,not_fire,2 147 | 8,6,2012,28,51,17,1.3,71.4,7.7,7.4,1.5,7.3,0.8,not_fire,2 148 | 23,6,2012,33,59,16,0.8,74.2,7,8.3,1.6,6.7,0.8,not_fire,2 149 | 11,7,2012,34,56,15,2.9,74.8,7.1,9.5,1.6,6.8,0.8,not_fire,2 150 | 1,8,2012,38,52,14,0,78.3,4.4,10.5,2,4.4,0.8,not_fire,2 151 | 6,8,2012,30,54,14,3.1,70.5,11,9.1,1.3,10.5,0.8,not_fire,2 152 | 2,9,2012,28,67,19,0,75.4,2.9,16.3,2,4,0.8,not_fire,2 153 | 5,6,2012,32,60,14,0.2,77.1,6,17.6,1.8,6.5,0.9,not_fire,2 154 | 13,6,2012,30,52,15,2,72.3,11.4,7.8,1.4,10.9,0.9,not_fire,2 155 | 4,6,2012,30,64,14,0,79.4,5.2,15.4,2.2,5.6,1,not_fire,2 156 | 26,7,2012,35,58,10,0.2,78.3,10.8,19.7,1.6,10.7,1,not_fire,2 157 | 22,6,2012,33,46,14,1.1,78.3,8.1,8.3,1.9,7.7,1.2,not_fire,2 158 | 11,9,2012,30,73,14,0,79.2,6.5,16.6,2.1,6.6,1.2,not_fire,2 159 | 10,7,2012,34,51,16,3.8,77.5,8,9.5,2,7.7,1.3,not_fire,2 160 | 9,6,2012,27,59,18,0.1,78.1,8.5,14.7,2.4,8.3,1.9,not_fire,2 161 | 30,6,2012,34,42,15,1.7,79.7,12,8.5,2.2,11.5,2.2,not_fire,2 162 | 3,9,2012,28,75,16,0,82.2,4.4,24.3,3.3,6,2.5,fire,2 163 | 24,7,2012,33,63,17,1.1,72.8,20.9,56.6,1.6,21.7,2.5,not_fire,2 164 | 8,9,2012,30,88,14,0,82.5,6.6,26.1,3,8.1,2.7,fire,2 165 | 8,7,2012,35,47,18,6,80.8,9.8,9.7,3.1,9.4,3,fire,2 166 | 25,9,2012,28,70,15,0,79.9,13.8,36.1,2.4,14.1,3,not_fire,2 167 | 6,6,2012,35,54,11,0.1,83.7,8.4,26.3,3.1,9.3,3.1,fire,2 168 | 9,7,2012,36,43,15,1.9,82.3,9.4,9.9,3.2,9,3.1,fire,2 169 | 7,9,2012,31,62,15,0,83.3,5.8,17.7,3.8,6.4,3.2,fire,2 170 | 27,7,2012,29,87,18,0,80,11.8,28.3,2.8,11.8,3.2,not_fire,2 171 | 9,9,2012,30,80,15,0,83.1,7.9,34.5,3.5,10,3.7,fire,2 172 | 20,9,2012,34,58,13,0.2,79.5,18.7,88,2.1,24.4,3.8,not_fire,2 173 | 12,9,2012,31,72,14,0,84.2,8.3,25.2,3.8,9.1,3.9,fire,2 174 | 13,7,2012,39,45,13,0.6,85.2,11.3,10.4,4.2,10.9,4.7,fire,2 175 | 21,6,2012,32,55,14,0,86.2,8.3,18.4,5,8.2,4.9,fire,2 176 | 3,7,2012,34,56,17,0.1,84.7,9.7,27.3,4.7,10.3,5.2,fire,2 177 | 7,7,2012,38,43,13,0.5,85,13,35.4,4.1,13.7,5.2,fire,2 178 | 24,6,2012,35,68,16,0,85.3,10,17,4.9,9.9,5.3,fire,2 179 | 15,8,2012,35,46,13,0.3,83.9,16.9,54.2,3.5,19,5.5,fire,2 180 | 19,9,2012,29,41,8,0.1,83.9,24.9,86,2.7,28.9,5.6,fire,2 181 | 14,9,2012,28,81,15,0,84.6,12.6,41.5,4.3,14.3,5.7,fire,2 182 | 8,8,2012,37,56,11,0,87.4,11.2,20.2,5.2,11,5.9,fire,2 183 | 29,8,2012,35,53,17,0.5,80.2,20.7,149.2,2.7,30.6,5.9,fire,2 184 | 7,6,2012,35,44,17,0.2,85.6,9.9,28.9,5.4,10.7,6,fire,2 185 | 28,8,2012,35,56,14,0.4,79.2,37,166,2.1,30.6,6.1,not_fire,2 186 | 6,7,2012,35,42,15,0.3,84.7,15.5,45.1,4.3,16.7,6.3,fire,2 187 | 2,7,2012,33,48,16,0,87.6,7.9,17.8,6.8,7.8,6.4,fire,2 188 | 26,9,2012,30,65,14,0,85.4,16,44.5,4.5,16.9,6.5,fire,2 189 | 25,6,2012,34,70,16,0,86,12.8,25.6,5.4,12.7,6.7,fire,2 190 | 29,6,2012,37,36,13,0.6,86.2,17.9,36.7,4.8,17.8,7.2,fire,2 191 | 16,7,2012,31,83,17,0,84.5,19.4,33.1,4.7,19.2,7.3,fire,2 192 | 17,7,2012,32,81,17,0,84.6,21.1,42.3,4.7,20.9,7.7,fire,2 193 | 19,8,2012,35,66,15,0.1,82.7,32.7,96.8,3.3,35.5,7.7,fire,2 194 | 28,7,2012,33,57,16,0,87.5,15.7,37.6,6.7,15.7,9,fire,2 195 | 20,8,2012,36,81,15,0,83.7,34.4,107,3.8,38.1,9,fire,2 196 | 18,7,2012,33,68,15,0,86.1,23.9,51.6,5.2,23.9,9.1,fire,2 197 | 26,6,2012,36,62,16,0,87.8,16.5,34.5,7,16.4,9.5,fire,2 198 | 12,7,2012,36,44,13,0,90.1,12.6,19.4,8.3,12.5,9.6,fire,2 199 | 15,9,2012,32,51,13,0,88.7,16,50.2,6.9,17.8,9.8,fire,2 200 | 4,7,2012,34,58,18,0,88,13.6,36.8,8,14.1,9.9,fire,2 201 | 10,6,2012,30,41,15,0,89.4,13.3,22.5,8.4,13.1,10,fire,2 202 | 13,9,2012,29,49,19,0,88.6,11.5,33.4,9.1,12.4,10.3,fire,2 203 | 14,7,2012,37,37,18,0.2,88.9,12.9,14.6,9,12.5,10.4,fire,2 204 | 13,8,2012,35,34,16,0.2,88.3,16.9,45.1,7.5,17.5,10.5,fire,2 205 | 10,8,2012,39,39,15,0.2,89.3,15.8,35.4,8.2,15.8,10.7,fire,2 206 | 29,7,2012,34,59,16,0,88.1,19.5,47.2,7.4,19.5,10.9,fire,2 207 | 27,6,2012,36,55,15,0,89.1,20.9,43.3,8,20.8,12,fire,2 208 | 21,8,2012,36,71,15,0,86,36.9,117.1,5.1,41.3,12.2,fire,2 209 | 5,8,2012,34,42,17,0.1,88.3,23.6,52.5,19,23.5,12.6,fire,2 210 | 9,8,2012,39,43,12,0,91.7,16.5,30.9,9.6,16.4,12.7,fire,2 211 | 12,6,2012,27,58,17,0,88.9,21.3,37.8,8.7,21.2,12.9,fire,2 212 | 19,7,2012,34,58,16,0,88.1,27.8,61.1,7.3,27.7,13,fire,2 213 | 30,7,2012,36,56,16,0,88.9,23.8,57.1,8.2,23.8,13.2,fire,2 214 | 2,8,2012,40,34,14,0,93.3,10.8,21.4,13.8,10.6,13.5,fire,2 215 | 22,9,2012,33,64,13,0,88.9,26.1,106.3,7.1,32.4,13.7,fire,2 216 | 15,7,2012,34,45,17,0,90.5,18,24.1,10.9,17.7,14.1,fire,2 217 | 31,7,2012,37,55,15,0,89.3,28.3,67.2,8.3,28.3,14.5,fire,2 218 | 16,8,2012,40,41,10,0.1,92,22.6,65.1,9.5,24.2,14.8,fire,2 219 | 5,7,2012,34,45,18,0,90.5,18.7,46.4,11.3,18.7,15,fire,2 220 | 23,9,2012,35,56,14,0,89,29.4,115.6,7.5,36,15.2,fire,2 221 | 18,9,2012,36,33,13,0.1,90.6,25.8,77.8,9,28.2,15.4,fire,2 222 | 14,8,2012,37,40,13,0,91.9,22.3,55.5,10.8,22.3,15.7,fire,2 223 | 30,8,2012,34,49,15,0,89.2,24.8,159.1,8.1,35.7,16,fire,2 224 | 23,7,2012,31,71,17,0,87.3,46.6,99,6.9,46.5,16.3,fire,2 225 | 11,6,2012,31,42,21,0,90.6,18.2,30.5,13.4,18,16.7,fire,2 226 | 20,7,2012,36,50,16,0,89.9,32.7,71,9.5,32.6,17.3,fire,2 227 | 17,9,2012,34,44,12,0,92.5,25.2,63.3,11.2,26.2,17.5,fire,2 228 | 22,8,2012,37,53,14,0,89.5,41.1,127.5,8,45.5,18.1,fire,2 229 | 28,6,2012,37,37,13,0,92.5,27.2,52.4,11.7,27.1,18.4,fire,2 230 | 12,8,2012,39,21,17,0.4,93,18.4,41.5,15.5,18.4,18.8,fire,2 231 | 16,9,2012,33,26,13,0,93.9,21.2,59.2,14.2,22.4,19.3,fire,2 232 | 31,8,2012,30,59,19,0,89.1,27.8,168.2,9.8,39.3,19.4,fire,2 233 | 3,8,2012,39,33,17,0,93.7,17.1,32.1,17.2,16.9,19.5,fire,2 234 | 4,8,2012,38,35,15,0,93.8,23,42.7,15.7,22.9,20.9,fire,2 235 | 11,8,2012,40,31,15,0,94.2,22.5,46.3,16.6,22.4,21.6,fire,2 236 | 21,9,2012,35,34,17,0,92.2,23.6,97.3,13.8,29.4,21.6,fire,2 237 | 17,8,2012,42,24,9,0,96,30.3,76.4,15.7,30.4,24,fire,2 238 | 23,8,2012,36,43,16,0,91.2,46.1,137.7,11.5,50.2,24.5,fire,2 239 | 22,7,2012,32,48,18,0,91.5,44.2,90.1,13.2,44,25.4,fire,2 240 | 27,8,2012,36,54,14,0,91,65.9,177.3,10,68,26.1,fire,2 241 | 18,8,2012,37,37,14,0,94.3,35.9,86.8,16,35.9,26.3,fire,2 242 | 24,8,2012,35,38,15,0,92.1,51.3,147.7,12.2,54.9,26.9,fire,2 243 | 21,7,2012,36,29,18,0,93.9,39.6,80.6,18.5,39.5,30,fire,2 244 | 26,8,2012,33,37,16,0,92.2,61.3,167.2,13.1,64,30.3,fire,2 245 | 25,8,2012,34,40,18,0,92.1,56.3,157.5,14.3,59.5,31.1,fire,2 246 | -------------------------------------------------------------------------------- /Data Science File/5_ExploratoryDataAnalysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "id": "hg_XIstvlZfr" 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#importing libraries\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import matplotlib.mlab as mlab\n", 14 | "import seaborn as sns\n", 15 | "import pandas as pd\n", 16 | "import numpy as np" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": { 23 | "id": "prbQ_6Jvlf2Z" 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "url='/content/drive/MyDrive/Summer 2022/CSE 511 (MSc)/Week-01/data-dhaka-weather1953-2016.csv'\n", 28 | "df = pd.read_csv(url)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "colab": { 36 | "base_uri": "https://localhost:8080/" 37 | }, 38 | "id": "dOiSKy4R_jRT", 39 | "outputId": "fd5030ec-8ec2-418b-bf99-369c2bcf3309" 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "from google.colab import drive\n", 44 | "drive.mount('/content/drive')" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "colab": { 52 | "base_uri": "https://localhost:8080/", 53 | "height": 206 54 | }, 55 | "id": "6QlqxHJelnCY", 56 | "outputId": "813e6190-004b-4971-df05-0a7ec7f12121" 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "#Reading Data Head\n", 61 | "df.head()" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": { 68 | "colab": { 69 | "base_uri": "https://localhost:8080/" 70 | }, 71 | "id": "9BX7qdsg-p5g", 72 | "outputId": "ceabdf4f-0860-40f6-9889-1d260ec79dd0" 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "df['MaxTemp'].corr(df['Rainfall'])" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "colab": { 84 | "base_uri": "https://localhost:8080/", 85 | "height": 206 86 | }, 87 | "id": "3GeITfhV5rHW", 88 | "outputId": "bb7bc204-9337-49a9-f858-4c85c2740cec" 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "#Reading Data Tail\n", 93 | "df.tail()" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "colab": { 101 | "base_uri": "https://localhost:8080/" 102 | }, 103 | "id": "ftrUBlWV1I9k", 104 | "outputId": "cc753b08-d0d7-4051-8098-11752458b8ed" 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "#Check Null\n", 109 | "df.isnull().values.any()" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": { 116 | "colab": { 117 | "base_uri": "https://localhost:8080/" 118 | }, 119 | "id": "TkN6gRi2lp1J", 120 | "outputId": "df616965-068a-49c7-b265-2663322e72a9" 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "#Showing Columns\n", 125 | "df.columns" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": { 132 | "colab": { 133 | "base_uri": "https://localhost:8080/", 134 | "height": 284 135 | }, 136 | "id": "cltiMN1tlzp5", 137 | "outputId": "d30fdb46-d132-436f-b7f7-fa060b2aa401" 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "#Describing Data Set\n", 142 | "df.describe()" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": { 149 | "colab": { 150 | "base_uri": "https://localhost:8080/", 151 | "height": 34 152 | }, 153 | "id": "5Bmj2V8VmLFg", 154 | "outputId": "4c448a96-c560-44db-89b1-d419e6246e11" 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "#Data set Shape\n", 159 | "df.shape" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": { 166 | "colab": { 167 | "base_uri": "https://localhost:8080/", 168 | "height": 202 169 | }, 170 | "id": "MRSDKAREmRyJ", 171 | "outputId": "a3855f8f-210d-4c75-f7db-807d486a2fc7" 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "#Data set info\n", 176 | "df.info()" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": { 183 | "colab": { 184 | "base_uri": "https://localhost:8080/", 185 | "height": 134 186 | }, 187 | "id": "XzHZsv6PmWHY", 188 | "outputId": "6704ecc8-3dc4-495c-be14-2a03340fc31d" 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "#Dataset median\n", 193 | "df.median()" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": { 200 | "colab": { 201 | "base_uri": "https://localhost:8080/", 202 | "height": 134 203 | }, 204 | "id": "1T7KTZ_imczg", 205 | "outputId": "687e1e4e-2a21-4761-85ae-bea0a2febbef" 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "#Checking kurtosis\n", 210 | "df.kurtosis()" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": { 217 | "colab": { 218 | "base_uri": "https://localhost:8080/", 219 | "height": 134 220 | }, 221 | "id": "lQkIc3UHmmrR", 222 | "outputId": "ed9f491d-ffc6-4256-bc55-7f41fef53c62" 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "#Checking skewness\n", 227 | "df.skew()" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": { 234 | "colab": { 235 | "base_uri": "https://localhost:8080/", 236 | "height": 225 237 | }, 238 | "id": "kIf-P6xwmo6I", 239 | "outputId": "96e9b924-38ad-46a9-d9c5-ab5770eb8f95" 240 | }, 241 | "outputs": [], 242 | "source": [ 243 | "#Checking correlation\n", 244 | "df.corr()" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": { 251 | "colab": { 252 | "base_uri": "https://localhost:8080/", 253 | "height": 235 254 | }, 255 | "id": "9cPIdy4Km270", 256 | "outputId": "167dd80e-65d2-4afc-f632-302ff1a69ec6" 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "#Checking covariance\n", 261 | "df.cov()" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": { 268 | "colab": { 269 | "base_uri": "https://localhost:8080/", 270 | "height": 505 271 | }, 272 | "id": "YALSUxxym9cx", 273 | "outputId": "94aed830-d941-4bb2-9e4b-fdcce85bdc54" 274 | }, 275 | "outputs": [], 276 | "source": [ 277 | "#Generating Heatmap\n", 278 | "correlation = df.corr()\n", 279 | "plt.figure(figsize=(16, 8))\n", 280 | "sns.heatmap(correlation, annot=True, linewidths=0, vmin=-1, cmap=\"RdBu_r\")\n", 281 | "plt.show()" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": { 288 | "colab": { 289 | "base_uri": "https://localhost:8080/", 290 | "height": 252 291 | }, 292 | "id": "D2zpMCwl1dEn", 293 | "outputId": "7b41745e-37b5-4362-d318-cc80c7925bea" 294 | }, 295 | "outputs": [], 296 | "source": [ 297 | "#month wise parameter checking\n", 298 | "rain=df.groupby('Month')['Rainfall'].mean()\n", 299 | "rain\n", 300 | "#sns.distplot(rain)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": { 307 | "colab": { 308 | "base_uri": "https://localhost:8080/", 309 | "height": 296 310 | }, 311 | "id": "tQ0nsZn-3LWC", 312 | "outputId": "aef4574a-9834-4d02-858d-eb463c9a59b2" 313 | }, 314 | "outputs": [], 315 | "source": [ 316 | "#month wise distribution\n", 317 | "sns.distplot(rain)" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": { 324 | "colab": { 325 | "base_uri": "https://localhost:8080/", 326 | "height": 252 327 | }, 328 | "id": "bYir_BRYwvbr", 329 | "outputId": "4aa5931a-05fa-4c10-c24c-50e5cd71900b" 330 | }, 331 | "outputs": [], 332 | "source": [ 333 | "# Finding out most rainy months\n", 334 | "df.groupby('Month')['Rainfall'].mean().sort_values(ascending=False)\n" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "metadata": { 341 | "colab": { 342 | "base_uri": "https://localhost:8080/", 343 | "height": 252 344 | }, 345 | "id": "UC9Xed9W3CwD", 346 | "outputId": "7561d569-a198-46c5-810c-b2a13927853a" 347 | }, 348 | "outputs": [], 349 | "source": [ 350 | "#month wise parameter checking\n", 351 | "mxt=df.groupby('Month')['MaxTemp'].mean()\n", 352 | "mxt" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "metadata": { 359 | "colab": { 360 | "base_uri": "https://localhost:8080/", 361 | "height": 296 362 | }, 363 | "id": "sAV30V724NuH", 364 | "outputId": "2cd50309-2239-4830-ecad-1b2c78704bc1" 365 | }, 366 | "outputs": [], 367 | "source": [ 368 | "#month wise distribution\n", 369 | "sns.distplot(mxt)" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": { 376 | "colab": { 377 | "base_uri": "https://localhost:8080/", 378 | "height": 252 379 | }, 380 | "id": "_bd-dbxJxNGA", 381 | "outputId": "451d007f-818d-4684-c7fb-efce3fd163ec" 382 | }, 383 | "outputs": [], 384 | "source": [ 385 | "# Finding out most MaxTemp months\n", 386 | "df.groupby('Month')['MaxTemp'].mean().sort_values(ascending=True)" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": { 393 | "colab": { 394 | "base_uri": "https://localhost:8080/", 395 | "height": 252 396 | }, 397 | "id": "vtxkK5Oo4bsI", 398 | "outputId": "38ed168e-7937-4c44-e277-b8d4665101ba" 399 | }, 400 | "outputs": [], 401 | "source": [ 402 | "#month wise parameter checking\n", 403 | "mt=df.groupby('Month')['MinTemp'].mean()\n", 404 | "mt" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "metadata": { 411 | "colab": { 412 | "base_uri": "https://localhost:8080/", 413 | "height": 296 414 | }, 415 | "id": "vU7eDAQt42ow", 416 | "outputId": "9f626447-1f33-4cbe-9641-ea7308274fd9" 417 | }, 418 | "outputs": [], 419 | "source": [ 420 | "#month wise distribution\n", 421 | "sns.distplot(mt)" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": null, 427 | "metadata": { 428 | "colab": { 429 | "base_uri": "https://localhost:8080/", 430 | "height": 252 431 | }, 432 | "id": "vjA9TAgQxbsZ", 433 | "outputId": "d1c8f811-a486-49a2-de63-b35a9532c010" 434 | }, 435 | "outputs": [], 436 | "source": [ 437 | "# Finding out most MaxTemp months\n", 438 | "df.groupby('Month')['MinTemp'].mean().sort_values(ascending=True)" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": { 445 | "colab": { 446 | "base_uri": "https://localhost:8080/", 447 | "height": 252 448 | }, 449 | "id": "g_A8G_v04-Ig", 450 | "outputId": "97de91d7-51a9-4e24-aace-6ecb4999d41c" 451 | }, 452 | "outputs": [], 453 | "source": [ 454 | "#month wise parameter checking\n", 455 | "hm=df.groupby('Month')['RelativeHumidity'].mean()\n", 456 | "hm" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "metadata": { 463 | "colab": { 464 | "base_uri": "https://localhost:8080/", 465 | "height": 252 466 | }, 467 | "id": "43JIreaoxu-g", 468 | "outputId": "e0bf23eb-6b17-4ee4-824c-7d2ef757b58f" 469 | }, 470 | "outputs": [], 471 | "source": [ 472 | "# Finding out most MaxTemp months\n", 473 | "df.groupby('Month')['RelativeHumidity'].mean().sort_values(ascending=False)" 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": null, 479 | "metadata": { 480 | "colab": { 481 | "base_uri": "https://localhost:8080/", 482 | "height": 296 483 | }, 484 | "id": "wUdl-8nt5IYv", 485 | "outputId": "662e0992-c5e0-44c6-d051-88c672fdfd3e" 486 | }, 487 | "outputs": [], 488 | "source": [ 489 | "#month wise distribution\n", 490 | "sns.distplot(hm)" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": { 497 | "colab": { 498 | "base_uri": "https://localhost:8080/", 499 | "height": 374 500 | }, 501 | "id": "3KO4DwvQnHip", 502 | "outputId": "7d550a77-85c2-4ee1-f177-c039e4e3e60a" 503 | }, 504 | "outputs": [], 505 | "source": [ 506 | "#Visualizing Boxplot\n", 507 | "plt.figure(figsize=(16, 6))\n", 508 | "ax = sns.boxplot(data=df, orient=\"h\", palette=\"Set2\")" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": null, 514 | "metadata": { 515 | "colab": { 516 | "base_uri": "https://localhost:8080/", 517 | "height": 279 518 | }, 519 | "id": "ijfCHWu1ngRh", 520 | "outputId": "82cc00b8-1596-4a08-ecc7-104d678b7ec8" 521 | }, 522 | "outputs": [], 523 | "source": [ 524 | "ax = sns.boxplot(x=df[\"Rainfall\"])" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": null, 530 | "metadata": { 531 | "colab": { 532 | "base_uri": "https://localhost:8080/", 533 | "height": 279 534 | }, 535 | "id": "RXQZveO9oZUZ", 536 | "outputId": "9c774efc-1fa7-48a7-c6f0-0530ec254a20" 537 | }, 538 | "outputs": [], 539 | "source": [ 540 | "ax = sns.boxplot(x=df[\"RelativeHumidity\"])" 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": null, 546 | "metadata": { 547 | "colab": { 548 | "base_uri": "https://localhost:8080/", 549 | "height": 388 550 | }, 551 | "id": "w5VQMGQPpQq7", 552 | "outputId": "0d672fa0-d6d6-4f9d-e52d-20b4043d27b2" 553 | }, 554 | "outputs": [], 555 | "source": [ 556 | "plt.figure(figsize=(16, 6))\n", 557 | "ax = sns.boxplot(x=\"RelativeHumidity\", y=\"Rainfall\", data=df)" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": null, 563 | "metadata": { 564 | "colab": { 565 | "base_uri": "https://localhost:8080/", 566 | "height": 386 567 | }, 568 | "id": "p4Prjty5uUuG", 569 | "outputId": "dff3b8cd-8197-49cb-d250-fb097d728ca9" 570 | }, 571 | "outputs": [], 572 | "source": [ 573 | "plt.figure(figsize=(16, 6))\n", 574 | "ax=sns.catplot(x=\"Month\", y=\"Rainfall\", kind=\"box\", data=df);" 575 | ] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "execution_count": null, 580 | "metadata": { 581 | "colab": { 582 | "base_uri": "https://localhost:8080/", 583 | "height": 369 584 | }, 585 | "id": "esbAFkt7wm1u", 586 | "outputId": "45d65d29-ca52-439d-9344-b0e528de396d" 587 | }, 588 | "outputs": [], 589 | "source": [ 590 | "\n", 591 | "ax=sns.catplot(x=\"Month\", y=\"MinTemp\", kind=\"box\", data=df)" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "metadata": { 598 | "colab": { 599 | "base_uri": "https://localhost:8080/", 600 | "height": 369 601 | }, 602 | "id": "K5vjZVlEwuwi", 603 | "outputId": "84250ed5-f6ca-4101-ebb8-41281c20648c" 604 | }, 605 | "outputs": [], 606 | "source": [ 607 | "ax=sns.catplot(x=\"Month\", y=\"MaxTemp\", kind=\"box\", data=df)" 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "execution_count": null, 613 | "metadata": { 614 | "colab": { 615 | "base_uri": "https://localhost:8080/", 616 | "height": 408 617 | }, 618 | "id": "1uH2QOpRqP27", 619 | "outputId": "ddc517ce-8846-4edb-f774-9f12bc965597" 620 | }, 621 | "outputs": [], 622 | "source": [ 623 | "plt.figure(figsize=(50, 6))\n", 624 | "ax = sns.boxplot(x=\"MinTemp\", y=\"Rainfall\", data=df)" 625 | ] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "execution_count": null, 630 | "metadata": { 631 | "colab": { 632 | "base_uri": "https://localhost:8080/", 633 | "height": 398 634 | }, 635 | "id": "cU2QOIfurMJy", 636 | "outputId": "dccd8fa7-198a-4a48-8fa1-a7874dedd4f3" 637 | }, 638 | "outputs": [], 639 | "source": [ 640 | "\n", 641 | "df.hist()" 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": null, 647 | "metadata": { 648 | "colab": { 649 | "base_uri": "https://localhost:8080/", 650 | "height": 296 651 | }, 652 | "id": "UmF8ueMOpeUJ", 653 | "outputId": "429cba96-2322-499c-d7f6-4c210dc58f5d" 654 | }, 655 | "outputs": [], 656 | "source": [ 657 | "#Distribution Plot\n", 658 | "sns.distplot(df['Rainfall'])" 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": null, 664 | "metadata": { 665 | "colab": { 666 | "base_uri": "https://localhost:8080/", 667 | "height": 296 668 | }, 669 | "id": "SFcNmIJYrST5", 670 | "outputId": "82ac50f0-3882-4d5f-f804-de9997a1d4a2" 671 | }, 672 | "outputs": [], 673 | "source": [ 674 | "#Distribution Plot\n", 675 | "sns.distplot(df['RelativeHumidity'])" 676 | ] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": null, 681 | "metadata": { 682 | "colab": { 683 | "base_uri": "https://localhost:8080/", 684 | "height": 297 685 | }, 686 | "id": "q1V3FiXDsDRy", 687 | "outputId": "2c92411d-6f80-481d-c8e3-67722766f7d5" 688 | }, 689 | "outputs": [], 690 | "source": [ 691 | "#Distribution Plot\n", 692 | "sns.distplot(df['MinTemp'])" 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": null, 698 | "metadata": { 699 | "colab": { 700 | "base_uri": "https://localhost:8080/", 701 | "height": 1000 702 | }, 703 | "id": "suyyQdG9sHHr", 704 | "outputId": "4ccbac8f-ee8b-451d-bd33-7bfe9e6638dd" 705 | }, 706 | "outputs": [], 707 | "source": [ 708 | "#Drawing Pairplot\n", 709 | "sns.pairplot(df);" 710 | ] 711 | }, 712 | { 713 | "cell_type": "code", 714 | "execution_count": null, 715 | "metadata": { 716 | "colab": { 717 | "base_uri": "https://localhost:8080/", 718 | "height": 283 719 | }, 720 | "id": "ydQLF--RV97c", 721 | "outputId": "6d1ed1a7-cb10-4e96-bb29-7f36e11f1307" 722 | }, 723 | "outputs": [], 724 | "source": [ 725 | "plt.scatter(df.Rainfall,df.RelativeHumidity)\n" 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": null, 731 | "metadata": { 732 | "colab": { 733 | "base_uri": "https://localhost:8080/", 734 | "height": 441 735 | }, 736 | "id": "g0UhaXTCsiVC", 737 | "outputId": "eda2fc55-838d-48f5-a94f-76cb8e946520" 738 | }, 739 | "outputs": [], 740 | "source": [ 741 | "sns.jointplot(x=\"Rainfall\", y=\"RelativeHumidity\", data=df, kind=\"reg\");" 742 | ] 743 | }, 744 | { 745 | "cell_type": "code", 746 | "execution_count": null, 747 | "metadata": { 748 | "colab": { 749 | "base_uri": "https://localhost:8080/", 750 | "height": 441 751 | }, 752 | "id": "wy2aTAlrxn7D", 753 | "outputId": "aedeae36-a3ac-4379-c45c-83678ebfc289" 754 | }, 755 | "outputs": [], 756 | "source": [ 757 | "sns.jointplot(x=\"Rainfall\", y=\"MinTemp\", data=df, kind=\"reg\");" 758 | ] 759 | }, 760 | { 761 | "cell_type": "code", 762 | "execution_count": null, 763 | "metadata": { 764 | "colab": { 765 | "base_uri": "https://localhost:8080/", 766 | "height": 441 767 | }, 768 | "id": "PZSMwMls5dKl", 769 | "outputId": "e506c214-c210-408c-9ea2-eb678f0c4df6" 770 | }, 771 | "outputs": [], 772 | "source": [ 773 | "sns.jointplot(x=\"Rainfall\", y=\"MaxTemp\", data=df, kind=\"reg\");" 774 | ] 775 | }, 776 | { 777 | "cell_type": "code", 778 | "execution_count": null, 779 | "metadata": { 780 | "colab": { 781 | "base_uri": "https://localhost:8080/", 782 | "height": 441 783 | }, 784 | "id": "dhrYcwEk5iF1", 785 | "outputId": "f3982e4d-a16f-47b6-c65c-0f7fac0ae821" 786 | }, 787 | "outputs": [], 788 | "source": [ 789 | "sns.jointplot(x=\"Rainfall\", y=\"Month\", data=df, kind=\"reg\");" 790 | ] 791 | }, 792 | { 793 | "cell_type": "code", 794 | "execution_count": null, 795 | "metadata": { 796 | "colab": { 797 | "base_uri": "https://localhost:8080/", 798 | "height": 204 799 | }, 800 | "id": "AVoMIxNB4Vn9", 801 | "outputId": "968760a2-619c-4418-a6cc-44f552621098" 802 | }, 803 | "outputs": [], 804 | "source": [ 805 | "df.head()" 806 | ] 807 | }, 808 | { 809 | "cell_type": "code", 810 | "execution_count": null, 811 | "metadata": { 812 | "id": "T9mnPPDa42vh" 813 | }, 814 | "outputs": [], 815 | "source": [ 816 | "#converting int to string\n", 817 | "df['YAER1'] = df.YEAR.astype(str)\n", 818 | "df['MONTH1'] = df.Month.astype(str)" 819 | ] 820 | }, 821 | { 822 | "cell_type": "code", 823 | "execution_count": null, 824 | "metadata": { 825 | "id": "rBfgQ7nW9yiH" 826 | }, 827 | "outputs": [], 828 | "source": [ 829 | "df.info()" 830 | ] 831 | }, 832 | { 833 | "cell_type": "code", 834 | "execution_count": null, 835 | "metadata": { 836 | "id": "0Xwe6jhu_KVV" 837 | }, 838 | "outputs": [], 839 | "source": [ 840 | "#Creating new columns for time series plot\n", 841 | "df['DateStamp'] = df['YAER1'] +'-'+ df['MONTH1']+'-30'" 842 | ] 843 | }, 844 | { 845 | "cell_type": "code", 846 | "execution_count": null, 847 | "metadata": { 848 | "id": "RHwfx7IcE5jT" 849 | }, 850 | "outputs": [], 851 | "source": [ 852 | "df1=df" 853 | ] 854 | }, 855 | { 856 | "cell_type": "code", 857 | "execution_count": null, 858 | "metadata": { 859 | "id": "q6lUtF62AOSD" 860 | }, 861 | "outputs": [], 862 | "source": [ 863 | "#Setting DateStamp as index\n", 864 | "df1=df1.set_index(\"DateStamp\", inplace = True)" 865 | ] 866 | }, 867 | { 868 | "cell_type": "code", 869 | "execution_count": null, 870 | "metadata": { 871 | "colab": { 872 | "base_uri": "https://localhost:8080/", 873 | "height": 173 874 | }, 875 | "id": "_JEOSPclBDVk", 876 | "outputId": "c37457d1-70c6-4219-d244-3baa5f835029" 877 | }, 878 | "outputs": [], 879 | "source": [ 880 | "df.head(3)" 881 | ] 882 | }, 883 | { 884 | "cell_type": "code", 885 | "execution_count": null, 886 | "metadata": { 887 | "colab": { 888 | "base_uri": "https://localhost:8080/", 889 | "height": 607 890 | }, 891 | "id": "zcdVRAIH_um4", 892 | "outputId": "1bc4aeb9-1af3-451f-d747-2b29a1435c39" 893 | }, 894 | "outputs": [], 895 | "source": [ 896 | "#Overall TS plot using matplotlib\n", 897 | "import matplotlib.pyplot as plt\n", 898 | "\n", 899 | "df.plot(figsize=(20,10))\n", 900 | "plt.title(\"Time series plot of Dhaka weather\")\n", 901 | "plt.show()" 902 | ] 903 | }, 904 | { 905 | "cell_type": "code", 906 | "execution_count": null, 907 | "metadata": { 908 | "colab": { 909 | "base_uri": "https://localhost:8080/", 910 | "height": 404 911 | }, 912 | "id": "-l0wuircDFYW", 913 | "outputId": "b6aaeb09-6faa-4cae-ea9e-041fc4704b44" 914 | }, 915 | "outputs": [], 916 | "source": [ 917 | "#MaxTemp TS plot using sns\n", 918 | "plt.figure(figsize=(16, 6))\n", 919 | "plt.title('Time series plot of MaxTemp')\n", 920 | "ax = sns.lineplot(x=\"YEAR\", y=\"MaxTemp\",err_style=\"bars\",label=\"MaxTemp\", ci=95,data=df)" 921 | ] 922 | }, 923 | { 924 | "cell_type": "code", 925 | "execution_count": null, 926 | "metadata": { 927 | "colab": { 928 | "base_uri": "https://localhost:8080/", 929 | "height": 404 930 | }, 931 | "id": "ncNG4EApGUZN", 932 | "outputId": "8647dcfb-9926-46d4-af53-45c2b82d6b92" 933 | }, 934 | "outputs": [], 935 | "source": [ 936 | "#MinTemp TS plot using sns\n", 937 | "plt.figure(figsize=(16, 6))\n", 938 | "plt.title('Time series plot of MinTemp')\n", 939 | "ax = sns.lineplot(x=\"YEAR\", y=\"MinTemp\",color=\"blue\", ci=90,label=\"MinTemp\",data=df)" 940 | ] 941 | }, 942 | { 943 | "cell_type": "code", 944 | "execution_count": null, 945 | "metadata": { 946 | "colab": { 947 | "base_uri": "https://localhost:8080/", 948 | "height": 424 949 | }, 950 | "id": "qjb1YKjZGlkV", 951 | "outputId": "065d3a1d-8103-46f3-8061-4e45e06e5544" 952 | }, 953 | "outputs": [], 954 | "source": [ 955 | "#Relative HumidityTS plot using sns\n", 956 | "plt.figure(figsize=(16, 6))\n", 957 | "plt.title('Time series plot of Relative Humidity')\n", 958 | "ax = sns.lineplot(x=\"YEAR\", y=\"RelativeHumidity\",label=\"Relative Humidity\",ci=80,data=df)" 959 | ] 960 | }, 961 | { 962 | "cell_type": "code", 963 | "execution_count": null, 964 | "metadata": { 965 | "colab": { 966 | "base_uri": "https://localhost:8080/", 967 | "height": 424 968 | }, 969 | "id": "DLiZ98XkI9ri", 970 | "outputId": "c5d365e8-fab3-4249-b131-1ca747376703" 971 | }, 972 | "outputs": [], 973 | "source": [ 974 | "#Relative HumidityTS plot using sns\n", 975 | "plt.figure(figsize=(16, 6))\n", 976 | "plt.title('Time series plot of Rainfall')\n", 977 | "ax = sns.lineplot(x=\"YEAR\", y=\"Rainfall\",label=\"Rainfall\",ci=80,data=df)" 978 | ] 979 | }, 980 | { 981 | "cell_type": "code", 982 | "execution_count": null, 983 | "metadata": { 984 | "colab": { 985 | "base_uri": "https://localhost:8080/", 986 | "height": 238 987 | }, 988 | "id": "lwYb-gdeKQ9Z", 989 | "outputId": "d3fea4a6-bce4-43f0-8db3-fbe770f137f1" 990 | }, 991 | "outputs": [], 992 | "source": [ 993 | "#Yearly Min Temp Change\n", 994 | "min=df.groupby('YEAR')['MinTemp'].mean()\n" 995 | ] 996 | }, 997 | { 998 | "cell_type": "code", 999 | "execution_count": null, 1000 | "metadata": { 1001 | "colab": { 1002 | "base_uri": "https://localhost:8080/", 1003 | "height": 394 1004 | }, 1005 | "id": "V0PeG2qJKgub", 1006 | "outputId": "3eb11da0-4ca3-425a-89ee-42ca29c479b8" 1007 | }, 1008 | "outputs": [], 1009 | "source": [ 1010 | "#Yearly Min Temp Change plot\n", 1011 | "plt.figure(figsize=(16, 6))\n", 1012 | "ax = sns.lineplot( label=\"Yearly Mean Min Temp.\",data=min)" 1013 | ] 1014 | }, 1015 | { 1016 | "cell_type": "code", 1017 | "execution_count": null, 1018 | "metadata": { 1019 | "colab": { 1020 | "base_uri": "https://localhost:8080/", 1021 | "height": 394 1022 | }, 1023 | "id": "VQAcCW2xPi9o", 1024 | "outputId": "b5716d45-c852-4abc-c5e8-aa782f642cbd" 1025 | }, 1026 | "outputs": [], 1027 | "source": [ 1028 | "#Yearly Min Temp Change\n", 1029 | "rainfall=df.groupby('YEAR')['Rainfall'].sum()\n", 1030 | "plt.figure(figsize=(16, 6))\n", 1031 | "ax = sns.lineplot( label=\"Yearly Total Rainfall\",data=rainfall)" 1032 | ] 1033 | }, 1034 | { 1035 | "cell_type": "code", 1036 | "execution_count": null, 1037 | "metadata": { 1038 | "colab": { 1039 | "base_uri": "https://localhost:8080/", 1040 | "height": 374 1041 | }, 1042 | "id": "5D3FNO87P852", 1043 | "outputId": "b3b84dd5-af68-4ebb-a432-e7572cb20862" 1044 | }, 1045 | "outputs": [], 1046 | "source": [ 1047 | "#Yearly Min Temp Change\n", 1048 | "rh=df.groupby('YEAR')['RelativeHumidity'].mean()\n", 1049 | "plt.figure(figsize=(16, 6))\n", 1050 | "ax = sns.lineplot( label=\"Yearly Mean Relative Humidity\",data=rh)" 1051 | ] 1052 | } 1053 | ], 1054 | "metadata": { 1055 | "colab": { 1056 | "provenance": [] 1057 | }, 1058 | "kernelspec": { 1059 | "display_name": "Python 3 (ipykernel)", 1060 | "language": "python", 1061 | "name": "python3" 1062 | }, 1063 | "language_info": { 1064 | "codemirror_mode": { 1065 | "name": "ipython", 1066 | "version": 3 1067 | }, 1068 | "file_extension": ".py", 1069 | "mimetype": "text/x-python", 1070 | "name": "python", 1071 | "nbconvert_exporter": "python", 1072 | "pygments_lexer": "ipython3", 1073 | "version": "3.11.7" 1074 | } 1075 | }, 1076 | "nbformat": 4, 1077 | "nbformat_minor": 4 1078 | } 1079 | --------------------------------------------------------------------------------