├── README.md
├── Data Science File
├── data.csv
├── Untitled4.ipynb
└── 5_ExploratoryDataAnalysis.ipynb
├── simple.py
├── Assignment 3
├── Question F.py
├── question b.py
├── Question G.py
├── Question E.py
├── question c.py
├── Question d.py
├── Question H.py
├── question i.py
└── Algerian_forest_Modm.csv
├── Assignment 2
├── Ven Diagram.py
├── Data Sheet.py
└── Ven Diagram 2.py
├── Misccode.py
├── Question 1
├── First Code.py
├── Statistics CT.py
└── my file.py
/README.md:
--------------------------------------------------------------------------------
1 | # Data-Science
2 | Reference By Dr Dr Ajit Kumar Majumdar , Daffodil International University dept. of Computer Science and Engennering.
3 |
--------------------------------------------------------------------------------
/Data Science File/data.csv:
--------------------------------------------------------------------------------
1 | Student ID,Student Name,CGPA,Age,Semester,Gender
2 | 111,A,3.22,18,F22,male
3 | 222,B,,20,S22,female
4 | 333,C,3.62,,F23,female
5 | 444,D,4,20,F23,male
6 | 555,E,3.68,21,S23,male
7 | 666,F,3.89,22,S23,male
8 | 777,O,3.91,30,F23,other
9 |
--------------------------------------------------------------------------------
/simple.py:
--------------------------------------------------------------------------------
1 | #Claculate mean, median, mode, standard deviation, variance, Minimum, Maximum of 5, 6, 7, 10 12
2 | #Data Science Code
3 |
4 | import numpy as np
5 | import pandas as pd
6 |
7 | AA=[5,6,7,10,12]
8 | A=pd.DataFrame(AA)
9 |
10 | np.mean(A)
11 | np.median(A)
12 | np.min(A)
13 | np.std(A)
14 | np.var(A)
15 |
--------------------------------------------------------------------------------
/Assignment 3/Question F.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import scipy.stats as stats
3 | # Read the CSV data
4 | data = pd.read_csv("../Assignment 3/Algerian_forest_Modm.csv")
5 | # Select a random sample of size 180 with seed value 5364
6 | sample_data = data['Ws'].sample(n=180, random_state=5364)
7 |
8 | # f) Determine the coefficient of skewness and kurtosis
9 | skewness = stats.skew(sample_data)
10 | kurtosis = stats.kurtosis(sample_data)
11 | print("\nSkewness:", skewness)
12 | print("Kurtosis:", kurtosis)
--------------------------------------------------------------------------------
/Assignment 3/question b.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 | # Read the CSV data
4 | data = pd.read_csv("Algerian_forest_Modm.csv")
5 | # Select a random sample of size 180 with seed value 5364
6 | sample_data = data['Ws'].sample(n=180, random_state=5364)
7 |
8 | # b) Construct a histogram
9 | plt.figure(figsize=(10, 6))
10 | plt.hist(sample_data, bins=20, edgecolor='black')
11 | plt.title('Histogram of Ws Values')
12 | plt.xlabel('Ws')
13 | plt.ylabel('Frequency')
14 | plt.grid(True)
15 | plt.show()
--------------------------------------------------------------------------------
/Assignment 3/Question G.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | # Read the CSV data
4 | data = pd.read_csv("../Assignment 3/Algerian_forest_Modm.csv")
5 | # Select a random sample of size 180 with seed value 5364
6 | sample_data = data['Ws'].sample(n=180, random_state=5364)
7 |
8 | # g) Determine covariance and correlation matrices
9 | covariance_matrix = np.cov(sample_data)
10 | correlation_matrix = np.corrcoef(sample_data)
11 | print("\nCovariance Matrix:")
12 | print(covariance_matrix)
13 | print("\nCorrelation Matrix:")
14 | print(correlation_matrix)
--------------------------------------------------------------------------------
/Assignment 3/Question E.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import statistics
3 | data = pd.read_csv("../Assignment 3/Algerian_forest_Modm.csv")
4 | # Select a random sample of size 180 with seed value 5364
5 | sample_data = data['Ws'].sample(n=180, random_state=5364)
6 |
7 |
8 | # e) Determine population standard deviation and population coefficient of variation
9 | mean_Ws = statistics.mean(sample_data)
10 | population_std_dev = statistics.pstdev(sample_data)
11 | population_coeff_var = (population_std_dev / mean_Ws) * 100
12 | print("\nPopulation Standard Deviation:", population_std_dev)
13 | print("Population Coefficient of Variation:", population_coeff_var)
--------------------------------------------------------------------------------
/Assignment 2/Ven Diagram.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 |
4 | # Create Drew Conway's Venn diagram
5 | plt.figure(figsize=(6, 4))
6 | plt.title("Drew Conway's Venn Diagram")
7 | plt.text(0.5, 0.5, "Data Science", horizontalalignment='center', verticalalignment='center', fontsize=15)
8 | plt.text(0.2, 0.6, "Hacking Skills", horizontalalignment='center', verticalalignment='center', fontsize=12)
9 | plt.text(0.8, 0.6, "Math And Statistics", horizontalalignment='center', verticalalignment='center', fontsize=12)
10 | plt.text(0.5, 0.3, "Domain Expert", horizontalalignment='center', verticalalignment='center', fontsize=12)
11 | plt.axis('off')
12 | plt.show()
13 |
--------------------------------------------------------------------------------
/Misccode.py:
--------------------------------------------------------------------------------
1 | #Misccode.py
2 | wt=[21.4,19.7,19.7,20.6,20.8,20.1,19.7,20.3,20.9]
3 | import scipy.stats as sc
4 | res=sc.describe(wt)
5 |
6 | import numpy as np
7 | m=res[2]
8 | v=res[3]
9 | s=np.sqrt(v)
10 |
11 | cv=(s/m)*100;cv
12 |
13 | sc.gmean(wt)
14 | sc.variation(wt)
15 | #cv=(std/mean)%100
16 |
17 |
18 | m1=100; sd1=15;
19 | m2=15; sd2=10;
20 |
21 | cv1=(sd1/m1)*100
22 | cv2=(sd2/m2)*100
23 |
24 | cv1
25 | cv2
26 |
27 |
28 | sc.variation(wt)
29 |
30 | sc.pmean(wt,1)
31 |
32 | x1=[20,29,12,45,12,34,23]
33 | x2=[13,5,19,39,56,78,22,34,2,5]
34 | x3=[4,5,8,22,55,34,67,12]
35 |
36 | cv1=sc.variation(x1);cv1
37 |
38 | import scipy.stats as sc
39 | def cv(xx):
40 | re=sc.variation(xx)
41 | return(re)
42 |
43 | cv(x1)
44 | cv(x2)
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/Assignment 3/question c.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import statistics
3 | import scipy.stats as stats
4 | # Read the CSV data
5 | data = pd.read_csv("Algerian_forest_Modm.csv")
6 | # Select a random sample of size 180 with seed value 4691
7 | sample_data = data['Ws'].sample(n=180, random_state=5364)
8 |
9 |
10 | # c) Calculate mean, median, mode, geometric mean, and harmonic mean
11 | mean_Ws = statistics.mean(sample_data)
12 | median_Ws = statistics.median(sample_data)
13 | mode_Ws = stats.mode(sample_data)
14 | geometric_mean_Ws = stats.gmean(sample_data)
15 | harmonic_mean_Ws = statistics.harmonic_mean(sample_data)
16 | print("Mean:", mean_Ws)
17 | print("Median:", median_Ws)
18 | print("Mode:", mode_Ws)
19 | print("Geometric Mean:", geometric_mean_Ws)
20 | print("Harmonic Mean:", harmonic_mean_Ws)
--------------------------------------------------------------------------------
/Question 1:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.stats import pearsonr
3 |
4 | # Given temperature data
5 | temperatures = [22, 25, 29, 30, 25, 25, 24, 25, 26, 28, 22, 26, 26, 25, 27, 31, 30, 28, 25, 27, 33, 30, 29, 30, 30, 29, 28, 30, 29, 27, 32, 33, 30, 29, 29, 32, 31, 32, 29, 34, 35, 35, 30, 28, 31, 36, 29, 32, 28, 31]
6 |
7 | # Calculate correlation coefficients for each pair of consecutive readings
8 | correlation_coefficients = []
9 | for i in range(len(temperatures) - 1):
10 | corr, _ = pearsonr([temperatures[i]], [temperatures[i+1]])
11 | correlation_coefficients.append(corr)
12 |
13 | # Identify the highest and lowest correlation coefficient values
14 | max_corr = max(correlation_coefficients)
15 | min_corr = min(correlation_coefficients)
16 |
17 | print("Highest correlation coefficient:", max_corr)
18 | print("Lowest correlation coefficient:", min_corr)
19 |
--------------------------------------------------------------------------------
/Assignment 2/Data Sheet.py:
--------------------------------------------------------------------------------
1 | # Question no i solve problem ( fire / not fire ) code
2 | import numpy as np
3 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
4 |
5 | # Given data points for FWI and RH
6 | RH = np.array([79, 67, 63, 56])
7 | FWI = np.array([0.21, 2.5, 10.2, 6.1])
8 |
9 | # Estimate Fisher Linear Discriminant Function
10 | X = np.column_stack((RH, FWI))
11 | y = np.array([1, 1, 2, 2]) # Assuming 1 for "fire" and 2 for "not fire"
12 |
13 | lda = LinearDiscriminantAnalysis()
14 | lda.fit(X, y)
15 |
16 | # Classify the given points
17 | new_data = np.array([[79, 0.21], [67, 2.5], [63, 10.2], [56, 6.1]])
18 | predicted_classes = lda.predict(new_data)
19 |
20 | for i in range(len(new_data)):
21 | print(f"FWI: {new_data[i][1]}, RH: {new_data[i][0]}, Classified as: {'fire' if predicted_classes[i] == 1 else 'not fire'}")
22 |
--------------------------------------------------------------------------------
/Assignment 3/Question d.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 | import statistics
4 | import numpy as np
5 | import scipy.stats as stats
6 | # Read the CSV data
7 | data = pd.read_csv("../Assignment 3/Algerian_forest_Modm.csv")
8 | # Select a random sample of size 180 with seed value 5364
9 | sample_data = data['Ws'].sample(n=180, random_state=5364)
10 |
11 | # d) Report five summary measures and construct a box plot
12 | summary_measures = {
13 | 'Minimum': np.min(sample_data),
14 | '1st Quartile (Q1)': np.percentile(sample_data, 25),
15 | 'Median': np.median(sample_data),
16 | '3rd Quartile (Q3)': np.percentile(sample_data, 75),
17 | 'Maximum': np.max(sample_data)
18 | }
19 | print("\nSummary Measures:")
20 | for measure, value in summary_measures.items():
21 | print(f"{measure}: {value}")
22 | plt.boxplot(sample_data)
23 | plt.title('Box Plot of Ws Values')
24 | plt.ylabel('Ws')
25 | plt.show()
--------------------------------------------------------------------------------
/Assignment 3/Question H.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import scipy.stats as stats
3 | # Read the CSV data
4 | data = pd.read_csv("../Assignment 3/Algerian_forest_Modm.csv")
5 | # Select a random sample of size 180 with seed value 5364
6 | sample_data = data['Ws'].sample(n=180, random_state=5364)
7 |
8 | # h) Estimate the regression equation of Temperature on RH
9 | # Assuming RH as independent variable (X) and Temperature as dependentvariable (Y)
10 | RH = data['RH'].sample(n=180, random_state=5364)
11 | Temperature = data['Temperature'].sample(n=180, random_state=5364)
12 | RH_reshaped = RH.values.reshape(-1, 1)
13 | # Fit the linear regression model
14 | reg_model = stats.linregress(Temperature,RH)
15 | # Extract the regression coefficients
16 | slope = reg_model.slope
17 | intercept = reg_model.intercept
18 | print("\nRegression Equation:")
19 | print(f"RH = {slope:.4f} * Temperature + {intercept:.4f}")
20 | # Estimate RH when Temperature is 21.5%
21 | predicted_rh = slope * 21.5 + intercept
22 | print("Estimated RH when Temperature is 21.5%:", predicted_rh)
--------------------------------------------------------------------------------
/Assignment 2/Ven Diagram 2.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from matplotlib_venn import venn3
3 |
4 | # Define the sizes of each group
5 | sizes = {
6 | '100': 20, # Math and Statistics
7 | '010': 25, # Domain Experts
8 | '001': 30, # Hacking Skills
9 | '110': 15, # Math and Statistics & Domain Experts
10 | '101': 10, # Math and Statistics & Hacking Skills
11 | '011': 18, # Domain Experts & Hacking Skills
12 | '111': 7 # Math and Statistics & Domain Experts & Hacking Skills
13 | }
14 | # Create the Venn diagram
15 | venn = venn3(subsets=sizes, set_labels=('Math and Statistics', 'Domain Experts', 'Hacking Skills'))
16 |
17 | # Label each subset
18 | venn.get_label_by_id('100').set_text('Math and Statistics')
19 | venn.get_label_by_id('010').set_text('Domain Experts')
20 | venn.get_label_by_id('001').set_text('Hacking Skills')
21 | venn.get_label_by_id('110').set_text('Research')
22 | venn.get_label_by_id('101').set_text('Machine Learning')
23 | venn.get_label_by_id('011').set_text('Danger Zone')
24 | venn.get_label_by_id('111').set_text('Common Data Science')
25 |
26 | # Show the plot
27 | plt.title("Data Science Venn Diagram")
28 | plt.show()
29 |
--------------------------------------------------------------------------------
/First Code.py:
--------------------------------------------------------------------------------
1 | #DIU_315_1.py.docx
2 | import numpy as np
3 | import pandas as pd
4 | import scipy.linalg as slin
5 |
6 | dob=pd.read_csv("C:/Users/HP/Desktop/315/obli.csv")
7 | dnonc=pd.read_csv("C:/Users/HP/Desktop/315/nonc.csv")
8 |
9 | Z2=np.transpose(dob[["activity","antigen"]])
10 | Z1=np.transpose(dnonc[["activity","antigen"]])
11 |
12 | S1=np.cov(Z1,bias=False);S1
13 | S2=np.cov(Z2,bias=False);S2
14 |
15 | X1b=np.mean(Z1,axis=1);X1b
16 | X2b=np.mean(Z2,axis=1);X2b
17 |
18 | n2=len(dob); n1=len(dnonc)
19 |
20 | Sp=((n1-1)/((n1-1)+(n2-1)))*S1+((n2-1)/((n2-1)+(n2-1)))*S2;Sp
21 | #Spp=(29/73)*S1+(44/73)*S2;Spp
22 |
23 | SpI=np.linalg.inv(Sp);
24 |
25 | X12=X1b-X2b;X12
26 | yh=np.dot(X12,SpI);yh
27 |
28 | U=X1b+X2b
29 | mhat=np.dot((1/2)*yh,U)
30 |
31 | x0=[-.0867,-0.07786]
32 |
33 | #x0=[-.1744,.1892]
34 |
35 | #v1 = float(input("Please provide 1st value: "));v2 = float(input("Please provide 2nd value: "));#x0=[v1,v2]
36 |
37 | rule=np.dot(yh,x0)-mhat;rule
38 |
39 | if rule>0:
40 | print("Subject/individual belongs to Goup 1")
41 | else:
42 | print("Subject/individual belongs to Goup 2")
43 |
44 | #--------------------------------------#
45 |
46 |
47 | ##
48 | np.array([[ 64.96, 33.2 , -24.44],
49 | [ 33.2 , 56.4 , -24.1 ],
50 | [-24.44, -24.1 , 75.56]])
51 |
52 |
53 | import numpy as np
54 |
55 | A = [45, 37, 42, 35, 39]
56 | B = [38, 31, 26, 28, 33]
57 | C = [10, 15, 17, 21, 12]
58 |
59 |
60 |
61 | ##
62 |
--------------------------------------------------------------------------------
/Assignment 3/question i.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from matplotlib_venn import venn3
3 |
4 | # Define the sizes of each group
5 | sizes = {
6 | '100': 20, # Math and Statistics
7 | '010': 25, # Domain Experts
8 | '001': 30, # Hacking Skills
9 | '110': 15, # Math and Statistics & Domain Experts
10 | '101': 10, # Math and Statistics & Hacking Skills
11 | '011': 18, # Domain Experts & Hacking Skills
12 | '111': 7 # Math and Statistics & Domain Experts & Hacking Skills
13 | }
14 | # Create the Venn diagram
15 | venn = venn3(subsets=sizes, set_labels=('Math and Statistics', 'Domain Experts', 'Hacking Skills'))
16 |
17 | # Label each subset
18 | venn.get_label_by_id('100').set_text('Math and Statistics')
19 | venn.get_label_by_id('010').set_text('Domain Experts')
20 | venn.get_label_by_id('001').set_text('Hacking Skills')
21 | venn.get_label_by_id('110').set_text('Research')
22 | venn.get_label_by_id('101').set_text('Machine Learning')
23 | venn.get_label_by_id('011').set_text('Danger Zone')
24 | venn.get_label_by_id('111').set_text('Common Data Science')
25 |
26 | # Show the plot
27 | plt.title("Data Science Venn Diagram")
28 | plt.show()
29 |
30 |
31 | import numpy as np
32 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
33 | # Given data points for FWI and RH
34 | RH = np.array([79, 67, 63, 56])
35 | FWI = np.array([0.21, 2.5, 10.2, 6.1])
36 | # Estimate Fisher Linear Discriminant Function
37 | X = np.column_stack((RH, FWI))
38 | y = np.array([1, 1, 2, 2]) # Assuming 1 for "fire" and 2 for "not fire"
39 | lda = LinearDiscriminantAnalysis()
40 | lda.fit(X, y)
41 | # Classify the given points
42 | new_data = np.array([[79, 0.21], [67, 2.5], [63, 10.2], [56, 6.1]])
43 | predicted_classes = lda.predict(new_data)
44 |
45 | for i in range(len(new_data)):
46 | print(f"FWI: {new_data[i][1]}, RH: {new_data[i][0]}, Classified as: {'fire' if predicted_classes[i] == 1 else 'not fire'}")
--------------------------------------------------------------------------------
/Statistics CT.py:
--------------------------------------------------------------------------------
1 | #StatisticsCT_24Feb.py
2 | # Data Set used in correlation and regression
3 | x=[9,7,11,12,8,7,8,11,10,12,6,6]
4 | y=[8.1,6,3.6,4,5,10,7.6,8,8,6,8.6,8]
5 |
6 | #Descriptive statistics USING pandas
7 | import pandas as pd
8 | xd=pd.DataFrame(x)
9 | xd.describe()
10 | xd.mean()
11 | xd.median()
12 | xd.mode()
13 | xd.var()
14 | xd.skew()
15 | xd.kurt()
16 |
17 | #descriptive statistics USING statistics
18 | import statistics
19 | statistics.harmonic_mean(x)
20 | statistics.geometric_mean(x)
21 | statistics.mean(x)
22 | statistics.median(x)
23 | statistics.mode(x)
24 | statistics.pstdev(x)
25 | statistics.stdev(x)
26 | statistics.pvariance(x)
27 | statistics.variance(x)
28 |
29 | # COVARIANCE, CORRELATION, AND REGRESSION
30 | #DATA
31 | x=[9,7,11,12,8,7,8,11,10,12,6,6]
32 | y=[8.1,6,3.6,4,5,10,7.6,8,8,6,8.6,8]
33 |
34 | import numpy as np
35 | r = np.corrcoef(x, y) #creating correlation matrix
36 | np.cov(x,y) # Calculates variance-Covariance Matrix
37 | r
38 | r[0,1]
39 | r[1,0]
40 |
41 | #USING pandas
42 | import pandas as pd
43 | xd=pd.DataFrame(x)
44 | yd=pd.DataFrame(y)
45 | xd=pd.Series(x)
46 | yd=pd.Series(y)
47 |
48 | xd.corr(yd) # Calculates correlation coefficient between x and y
49 | yd.corr(xd) # Calculates correlation coefficient between x and y
50 |
51 | yd.cov(xd) # Calculates Covariance between x and y
52 |
53 |
54 | #SCATTER PLOT
55 | import matplotlib.pyplot as plt
56 | plt.scatter(x, y)
57 | plt.show()
58 |
59 | # Add title and axis labels
60 | plt.title("Figure: Scatter plot between car's ages and selling prices")
61 | plt.xlabel("Age of Car in years (X)")
62 | plt.ylabel("Selling Price of Car (Y)")
63 | plt.scatter(x, y)
64 | plt.show()
65 |
66 |
67 | #Histogram
68 | plt.hist(x)
69 | plt.show()
70 |
71 |
72 | # b, a, r, pvalue, standard_error
73 | x=[9,7,11,12,8,7,8,11,10,12,6,6]
74 | y=[8.1,6,3.6,4,5,10,7.6,8,8,6,8.6,8]
75 | from scipy import stats
76 | slope, intercept, r, p, std_err = stats.linregress(x, y)
77 |
78 | print(r)
79 | slope
80 | intercept
81 | stats.linregress(x, y) # Returns b, a, r, pvalue, standard_error
82 |
--------------------------------------------------------------------------------
/my file.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 | import statistics
5 | from scipy import stats
6 |
7 | # Import the CSV file
8 | data = pd.read_csv("weather_data.csv")
9 |
10 | # Extract FFMC values
11 | ffmc_values = data['FFMC']
12 |
13 | # b) Construct a histogram
14 | plt.figure(figsize=(10, 6))
15 | plt.hist(ffmc_values, bins=20, edgecolor='black')
16 | plt.title('Histogram of FFMC Values')
17 | plt.xlabel('FFMC')
18 | plt.ylabel('Frequency')
19 | plt.grid(True)
20 | plt.show()
21 |
22 | # c) Calculate mean, median, mode, geometric mean, and harmonic mean
23 | mean_ffmc = np.mean(ffmc_values)
24 | median_ffmc = np.median(ffmc_values)
25 | mode_ffmc = statistics.mode(ffmc_values)
26 | geometric_mean_ffmc = stats.gmean(ffmc_values)
27 | harmonic_mean_ffmc = statistics.harmonic_mean(ffmc_values)
28 |
29 | print("Mean:", mean_ffmc)
30 | print("Median:", median_ffmc)
31 | print("Mode:", mode_ffmc)
32 | print("Geometric Mean:", geometric_mean_ffmc)
33 | print("Harmonic Mean:", harmonic_mean_ffmc)
34 |
35 | # d) Report five summary measures and construct a box plot
36 | q1_ffmc = np.percentile(ffmc_values, 25)
37 | q3_ffmc = np.percentile(ffmc_values, 75)
38 | iqr_ffmc = q3_ffmc - q1_ffmc
39 | minimum_ffmc = min(ffmc_values)
40 | maximum_ffmc = max(ffmc_values)
41 |
42 | plt.figure(figsize=(8, 6))
43 | plt.boxplot(ffmc_values, vert=False)
44 | plt.title('Box Plot of FFMC Values')
45 | plt.xlabel('FFMC')
46 | plt.grid(True)
47 | plt.show()
48 |
49 | print("Minimum:", minimum_ffmc)
50 | print("1st Quartile:", q1_ffmc)
51 | print("Median:", median_ffmc)
52 | print("3rd Quartile:", q3_ffmc)
53 | print("Maximum:", maximum_ffmc)
54 |
55 | # e) Determine population standard deviation and population coefficient of variation
56 | population_std_dev_ffmc = statistics.pstdev(ffmc_values)
57 | population_coeff_var_ffmc = (population_std_dev_ffmc / mean_ffmc) * 100
58 |
59 | print("Population Standard Deviation:", population_std_dev_ffmc)
60 | print("Population Coefficient of Variation:", population_coeff_var_ffmc)
61 |
62 | # f) Determine the coefficient of skewness and kurtosis
63 | skewness_ffmc = statistics.skew(ffmc_values)
64 | kurtosis_ffmc = statistics.kurtosis(ffmc_values)
65 |
66 | print("Coefficient of Skewness:", skewness_ffmc)
67 | print("Kurtosis:", kurtosis_ffmc)
68 |
69 | # g) Covariance and correlation matrices
70 | covariance_matrix = data[['Temp', 'RH', 'Ws', 'Rain', 'FFMC', 'DMC', 'DC', 'ISI', 'BUI', 'FWI']].cov()
71 | correlation_matrix = data[['Temp', 'RH', 'Ws', 'Rain', 'FFMC', 'DMC', 'DC', 'ISI', 'BUI', 'FWI']].corr()
72 |
73 | print("Covariance Matrix:")
74 | print(covariance_matrix)
75 | print("\nCorrelation Matrix:")
76 | print(correlation_matrix)
77 |
78 | # h) Regression equation of FWI on RH
79 | fwi_values = data['FWI']
80 | rh_values = data['RH']
81 |
82 | slope, intercept, r_value, p_value, std_err = stats.linregress(rh_values, fwi_values)
83 |
84 | print("Regression Equation: FWI = {:.2f} * RH + {:.2f}".format(slope, intercept))
85 | # Estimate FWI when RH is 21.5%
86 | rh = 21.5
87 | estimated_fwi = slope * rh + intercept
88 | print("Estimated FWI when RH is 21.5%:", estimated_fwi)
89 |
--------------------------------------------------------------------------------
/Data Science File/Untitled4.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "42ab3c93-7b47-43eb-af5d-8fa258234dcb",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import matplotlib.pyplot as plt\n",
11 | "import seaborn as sns\n",
12 | "import numpy as np\n",
13 | "import pandas as pd\n",
14 | "dt = pd.read_csv(\"data.csv\")\n",
15 | "dt"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": 2,
21 | "id": "e1ed0469-cf9a-4a47-9707-fdb1f023fe7a",
22 | "metadata": {},
23 | "outputs": [
24 | {
25 | "data": {
26 | "text/plain": [
27 | "(7, 6)"
28 | ]
29 | },
30 | "execution_count": 2,
31 | "metadata": {},
32 | "output_type": "execute_result"
33 | }
34 | ],
35 | "source": [
36 | "dt.shape"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": null,
42 | "id": "24ce1bf5-8b79-4bc4-bc6c-2d85342ce72e",
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "dt.isnull()"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": null,
52 | "id": "65393823-1eae-4d10-8e74-e479f11b246c",
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "Full_fill_data =dt.fillna(0)\n",
57 | "Full_fill_data"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "id": "7c295e67-6d18-449a-9f51-572edc68c86c",
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "Full_fill_data.describe()"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": 6,
73 | "id": "4eeb347c-4950-44be-a488-97d849e320c4",
74 | "metadata": {},
75 | "outputs": [
76 | {
77 | "data": {
78 | "text/html": [
79 | "
\n",
80 | "\n",
93 | "
\n",
94 | " \n",
95 | " \n",
96 | " | \n",
97 | " Student ID | \n",
98 | " Student Name | \n",
99 | " CGPA | \n",
100 | " Age | \n",
101 | " Semester | \n",
102 | " Gender | \n",
103 | "
\n",
104 | " \n",
105 | " \n",
106 | " \n",
107 | " | 0 | \n",
108 | " 111 | \n",
109 | " A | \n",
110 | " 3.22 | \n",
111 | " 18.0 | \n",
112 | " F22 | \n",
113 | " male | \n",
114 | "
\n",
115 | " \n",
116 | " | 3 | \n",
117 | " 444 | \n",
118 | " D | \n",
119 | " 4.00 | \n",
120 | " 20.0 | \n",
121 | " F23 | \n",
122 | " male | \n",
123 | "
\n",
124 | " \n",
125 | " | 4 | \n",
126 | " 555 | \n",
127 | " E | \n",
128 | " 3.68 | \n",
129 | " 21.0 | \n",
130 | " S23 | \n",
131 | " male | \n",
132 | "
\n",
133 | " \n",
134 | " | 5 | \n",
135 | " 666 | \n",
136 | " F | \n",
137 | " 3.89 | \n",
138 | " 22.0 | \n",
139 | " S23 | \n",
140 | " male | \n",
141 | "
\n",
142 | " \n",
143 | " | 6 | \n",
144 | " 777 | \n",
145 | " O | \n",
146 | " 3.91 | \n",
147 | " 30.0 | \n",
148 | " F23 | \n",
149 | " other | \n",
150 | "
\n",
151 | " \n",
152 | "
\n",
153 | "
"
154 | ],
155 | "text/plain": [
156 | " Student ID Student Name CGPA Age Semester Gender\n",
157 | "0 111 A 3.22 18.0 F22 male\n",
158 | "3 444 D 4.00 20.0 F23 male\n",
159 | "4 555 E 3.68 21.0 S23 male\n",
160 | "5 666 F 3.89 22.0 S23 male\n",
161 | "6 777 O 3.91 30.0 F23 other"
162 | ]
163 | },
164 | "execution_count": 6,
165 | "metadata": {},
166 | "output_type": "execute_result"
167 | }
168 | ],
169 | "source": [
170 | "delete_data =dt.dropna(inplace=False)\n",
171 | "delete_data"
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "execution_count": null,
177 | "id": "4ff77eff-732f-4f83-bc72-32cac26f9aea",
178 | "metadata": {},
179 | "outputs": [],
180 | "source": [
181 | "from sklearn.preprocessing import LabelEncoder\n",
182 | "le = LabelEncoder() \n",
183 | "dt['Gender']= le.fit_transform(dt['Gender']) \n",
184 | "dt "
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": null,
190 | "id": "381f38eb-4dc8-47cd-a42f-54791c1ebbd8",
191 | "metadata": {},
192 | "outputs": [],
193 | "source": [
194 | "Delete_data=dt.dropna(inplace=False)\n",
195 | "Delete_data"
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": null,
201 | "id": "7890adbe-bc44-4989-ae7d-457644f4d220",
202 | "metadata": {},
203 | "outputs": [],
204 | "source": [
205 | "dt1 = pd.get_dummies(dt, columns=['Gender'])\n",
206 | "dt1"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": 10,
212 | "id": "7824c539-d0af-4e3d-ad30-12e04fb2181d",
213 | "metadata": {},
214 | "outputs": [
215 | {
216 | "data": {
217 | "text/html": [
218 | "\n",
219 | "\n",
232 | "
\n",
233 | " \n",
234 | " \n",
235 | " | \n",
236 | " Student ID | \n",
237 | " Student Name | \n",
238 | " CGPA | \n",
239 | " Age | \n",
240 | " Semester | \n",
241 | " Gender | \n",
242 | "
\n",
243 | " \n",
244 | " \n",
245 | " \n",
246 | "
\n",
247 | "
"
248 | ],
249 | "text/plain": [
250 | "Empty DataFrame\n",
251 | "Columns: [Student ID, Student Name, CGPA, Age, Semester, Gender]\n",
252 | "Index: []"
253 | ]
254 | },
255 | "execution_count": 10,
256 | "metadata": {},
257 | "output_type": "execute_result"
258 | }
259 | ],
260 | "source": [
261 | "duplicates = dt[dt.duplicated()]\n",
262 | "duplicates"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": null,
268 | "id": "0aae7aef-4446-44b8-89d4-813ecfcf4363",
269 | "metadata": {},
270 | "outputs": [],
271 | "source": [
272 | "dt.head()"
273 | ]
274 | }
275 | ],
276 | "metadata": {
277 | "kernelspec": {
278 | "display_name": "Python 3 (ipykernel)",
279 | "language": "python",
280 | "name": "python3"
281 | },
282 | "language_info": {
283 | "codemirror_mode": {
284 | "name": "ipython",
285 | "version": 3
286 | },
287 | "file_extension": ".py",
288 | "mimetype": "text/x-python",
289 | "name": "python",
290 | "nbconvert_exporter": "python",
291 | "pygments_lexer": "ipython3",
292 | "version": "3.11.7"
293 | }
294 | },
295 | "nbformat": 4,
296 | "nbformat_minor": 5
297 | }
298 |
--------------------------------------------------------------------------------
/Assignment 3/Algerian_forest_Modm.csv:
--------------------------------------------------------------------------------
1 | day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region
2 | 2,9,2012,22,86,15,10.1,30.5,0.7,7,0,1.1,0,not_fire,1
3 | 4,6,2012,25,89,13,2.5,28.6,1.3,6.9,0,1.7,0,not_fire,1
4 | 16,6,2012,29,89,13,0.7,36.1,1.7,7.6,0,2.2,0,not_fire,1
5 | 17,6,2012,30,89,16,0.6,37.3,1.1,7.8,0,1.6,0,not_fire,1
6 | 3,9,2012,25,78,15,3.8,42.6,1.2,7.5,0.1,1.7,0,not_fire,1
7 | 13,9,2012,25,86,21,4.6,40.9,1.3,7.5,0.1,1.8,0,not_fire,1
8 | 15,9,2012,24,82,15,0.4,44.9,0.9,7.3,0.2,1.4,0,not_fire,1
9 | 1,9,2012,25,76,17,7.2,46,1.3,7.5,0.2,1.8,0.1,not_fire,1
10 | 3,6,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,not_fire,1
11 | 15,6,2012,28,80,17,3.1,49.4,3,7.4,0.4,3,0.1,not_fire,1
12 | 14,9,2012,22,76,26,8.3,47.4,1.1,7,0.4,1.6,0.1,not_fire,1
13 | 25,9,2012,26,81,21,5.8,48.6,3,7.7,0.4,3,0.1,not_fire,1
14 | 29,9,2012,26,80,16,1.8,47.4,2.9,7.7,0.3,3,0.1,not_fire,1
15 | 30,9,2012,25,78,14,1.4,45,1.9,7.5,0.2,2.4,0.1,not_fire,1
16 | 13,6,2012,27,84,21,1.2,50,6.7,17,0.5,6.7,0.2,not_fire,1
17 | 18,6,2012,31,78,14,0.3,56.9,1.9,8,0.7,2.4,0.2,not_fire,1
18 | 9,9,2012,30,77,15,1,56.1,2.1,8.4,0.7,2.6,0.2,not_fire,1
19 | 31,8,2012,28,80,21,16.8,52.5,8.7,8.7,0.6,8.3,0.3,not_fire,1
20 | 9,6,2012,25,88,13,0.2,52.9,7.9,38.8,0.4,10.5,0.3,not_fire,1
21 | 2,7,2012,27,75,19,1.2,55.7,2.4,8.3,0.8,2.8,0.3,not_fire,1
22 | 10,9,2012,33,73,12,1.8,59.9,2.2,8.9,0.7,2.7,0.3,not_fire,1
23 | 11,9,2012,30,77,21,1.8,58.5,1.9,8.4,1.1,2.4,0.3,not_fire,1
24 | 2,6,2012,29,61,13,1.3,64.4,4.1,7.6,1,3.9,0.4,not_fire,1
25 | 14,6,2012,30,78,20,0.5,59,4.6,7.8,1,4.4,0.4,not_fire,1
26 | 20,6,2012,30,80,16,0.4,59.8,3.4,27.1,0.9,5.1,0.4,not_fire,1
27 | 1,7,2012,29,68,19,1,59.9,2.5,8.6,1.1,2.9,0.4,not_fire,1
28 | 16,7,2012,28,76,21,0,72.6,7,25.5,0.7,8.3,0.4,not_fire,1
29 | 8,9,2012,30,73,17,0.9,62,2.6,8.4,1.1,3,0.4,not_fire,1
30 | 1,6,2012,29,57,18,0,65.7,3.4,7.6,1.3,3.4,0.5,not_fire,1
31 | 5,6,2012,27,77,16,0,64.8,3,14.2,1.2,3.9,0.5,not_fire,1
32 | 3,7,2012,32,76,20,0.7,63.1,2.6,9.2,1.3,3,0.5,not_fire,1
33 | 10,7,2012,33,69,13,0.7,66.6,6,9.3,1.1,5.8,0.5,not_fire,1
34 | 15,7,2012,30,80,19,0.4,60.7,5.2,17,1.1,5.9,0.5,not_fire,1
35 | 4,9,2012,29,73,17,0.1,68.4,1.9,15.7,1.4,2.9,0.5,not_fire,1
36 | 12,9,2012,29,88,13,0,71,2.6,16.6,1.2,3.7,0.5,not_fire,1
37 | 9,7,2012,32,68,14,1.4,66.6,7.7,9.2,1.1,7.4,0.6,not_fire,1
38 | 7,9,2012,31,71,17,0.3,69.6,3.2,30.1,1.5,5.1,0.6,not_fire,1
39 | 23,9,2012,32,54,11,0.5,73.7,7.9,30.4,1.2,9.6,0.7,not_fire,1
40 | 24,9,2012,29,65,19,0.6,68.3,5.5,15.2,1.5,5.8,0.7,not_fire,1
41 | 14,7,2012,34,61,13,0.6,73.9,7.8,22.9,1.4,8.4,0.8,not_fire,1
42 | 2,8,2012,35,55,12,0.4,78,5.8,10,1.7,5.5,0.8,not_fire,1
43 | 3,8,2012,35,63,14,0.3,76.6,5.7,10,1.7,5.5,0.8,not_fire,1
44 | 16,9,2012,30,65,14,0,78.1,3.2,15.7,1.9,4.2,0.8,not_fire,1
45 | 10,6,2012,28,79,12,0,73.2,9.5,46.3,1.3,12.6,0.9,not_fire,1
46 | 12,7,2012,31,75,13,0.1,75.1,7.9,27.7,1.5,9.2,0.9,not_fire,1
47 | 1,8,2012,36,45,14,0,78.8,4.8,10.2,2,4.7,0.9,not_fire,1
48 | 6,9,2012,29,74,19,0.1,75.8,3.6,32.2,2.1,5.6,0.9,not_fire,1
49 | 28,9,2012,32,47,14,0.7,77.5,7.1,8.8,1.8,6.8,0.9,not_fire,1
50 | 24,7,2012,28,78,16,0.1,70,9.6,79.7,1.4,14.7,1.3,not_fire,1
51 | 19,6,2012,31,55,16,0.1,79.9,4.5,16,2.5,5.3,1.4,not_fire,1
52 | 5,9,2012,29,75,16,0,80.8,3.4,24,2.8,5.1,1.7,fire,1
53 | 4,7,2012,33,78,17,0,80.1,4.6,18.5,2.7,5.7,1.7,not_fire,1
54 | 26,9,2012,31,54,11,0,82,6,16.3,2.5,6.2,1.7,not_fire,1
55 | 23,7,2012,27,66,22,0.4,68.2,10.5,71.3,1.8,15.4,2.1,not_fire,1
56 | 21,6,2012,30,78,14,0,81,6.3,31.6,2.6,8.4,2.2,fire,1
57 | 11,7,2012,33,76,14,0,81.1,8.1,18.7,2.6,8.1,2.2,not_fire,1
58 | 22,6,2012,31,67,17,0.1,79.1,7,39.5,2.4,9.7,2.3,not_fire,1
59 | 6,6,2012,31,67,14,0,82.6,5.8,22.2,3.1,7,2.5,fire,1
60 | 8,8,2012,32,60,18,0.3,77.1,11.3,47,2.2,14.1,2.6,not_fire,1
61 | 22,7,2012,28,79,18,0.1,73.4,16.4,79.9,1.8,21.7,2.8,not_fire,1
62 | 22,9,2012,31,50,19,0.6,77.8,10.6,41.4,2.4,12.9,2.8,not_fire,1
63 | 7,7,2012,35,64,18,0.2,80,9.7,40.4,2.8,12.1,3.2,not_fire,1
64 | 13,7,2012,34,81,15,0,81.8,9.7,37.2,3,11.7,3.4,not_fire,1
65 | 17,7,2012,29,70,14,0,82.8,9.4,34.1,3.2,11.1,3.6,fire,1
66 | 23,6,2012,32,62,18,0.1,81.4,8.2,47.7,3.3,11.5,3.8,fire,1
67 | 4,8,2012,34,69,13,0,85,8.2,19.8,4,8.2,3.9,fire,1
68 | 29,6,2012,32,47,13,0.3,79.9,18.4,84.4,2.2,23.8,3.9,not_fire,1
69 | 20,7,2012,33,65,15,0.1,81.4,12.3,62.1,2.8,16.5,4,fire,1
70 | 27,9,2012,31,66,11,0,85.7,8.3,24.9,4,9,4.1,fire,1
71 | 16,8,2012,36,61,18,0.3,80.2,11.7,90.4,2.8,17.6,4.2,fire,1
72 | 12,8,2012,35,51,13,0.3,81.3,15.6,75.1,2.5,20.7,4.2,not_fire,1
73 | 30,8,2012,35,70,17,0.8,72.7,25.2,180.4,1.7,37.4,4.2,not_fire,1
74 | 5,7,2012,33,66,14,0,85.9,7.6,27.9,4.8,9.1,4.9,fire,1
75 | 11,6,2012,31,65,14,0,84.5,12.5,54.3,4,15.8,5.6,fire,1
76 | 17,9,2012,31,52,14,0,87.7,6.4,24.3,6.2,7.7,5.9,fire,1
77 | 18,7,2012,31,68,14,0,85.4,12.1,43.1,4.6,14.2,6,fire,1
78 | 5,8,2012,34,65,13,0,86.8,11.1,29.7,5.2,11.5,6.1,fire,1
79 | 20,9,2012,28,84,18,0,83.8,13.5,49.3,4.5,16,6.3,fire,1
80 | 15,8,2012,36,55,13,0.3,82.4,15.6,92.5,3.7,22,6.3,fire,1
81 | 6,7,2012,32,63,14,0,87,10.9,37,5.6,12.5,6.8,fire,1
82 | 6,8,2012,32,75,14,0,86.4,13,39.1,5.2,14.2,6.8,fire,1
83 | 8,6,2012,30,73,15,0,86.6,12.1,38.3,5.6,13.5,7.1,fire,1
84 | 12,6,2012,26,81,19,0,84,13.8,61.4,4.8,17.7,7.1,fire,1
85 | 7,6,2012,33,54,13,0,88.2,9.9,30.5,6.4,10.9,7.2,fire,1
86 | 25,7,2012,31,65,18,0,84.3,12.5,88.7,4.8,18.5,7.3,fire,1
87 | 24,6,2012,32,66,17,0,85.9,11.2,55.8,5.6,14.9,7.5,fire,1
88 | 18,9,2012,32,49,11,0,89.4,9.8,33.1,6.8,11.3,7.7,fire,1
89 | 8,7,2012,33,68,19,0,85.6,12.5,49.8,6,15.4,8,fire,1
90 | 7,8,2012,32,69,16,0,86.5,15.5,48.6,5.5,17.2,8,fire,1
91 | 21,9,2012,31,55,11,0,87.8,16.5,57.9,5.4,19.2,8.3,fire,1
92 | 25,6,2012,31,64,15,0,86.7,14.2,63.8,5.7,18.3,8.4,fire,1
93 | 21,7,2012,33,70,17,0,85.4,18.5,71.5,5.2,22.4,8.8,fire,1
94 | 19,9,2012,29,57,14,0,89.3,12.5,41.3,7.8,14.2,9.7,fire,1
95 | 9,8,2012,35,59,17,0,87.4,14.8,57,6.9,17.9,9.9,fire,1
96 | 13,8,2012,35,63,15,0,87,19,85.1,5.9,24.4,10.2,fire,1
97 | 26,6,2012,31,64,18,0,86.8,17.8,71.8,6.7,21.6,10.6,fire,1
98 | 14,8,2012,33,66,14,0,87,21.7,94.7,5.7,27.2,10.6,fire,1
99 | 30,7,2012,31,79,15,0,85.4,28.5,136,4.7,37.4,10.7,fire,1
100 | 19,7,2012,35,59,17,0,88.1,12,52.8,7.7,18.2,10.9,fire,1
101 | 28,7,2012,33,76,15,0,86.5,24.4,117.8,5.6,32.1,11.3,fire,1
102 | 10,8,2012,35,55,14,0,88.9,18.6,67,7.4,21.9,11.6,fire,1
103 | 29,7,2012,32,73,15,0,86.6,26.7,127,5.6,35,11.9,fire,1
104 | 11,8,2012,35,63,13,0,88.9,21.7,77,7.1,25.5,12.1,fire,1
105 | 30,6,2012,33,50,14,0,88.7,22.9,92.8,7.2,28.3,12.9,fire,1
106 | 27,8,2012,33,82,21,0,84.9,47,200.2,4.4,59.3,13.2,fire,1
107 | 26,8,2012,31,78,18,0,85.8,45.6,190.6,4.7,57.1,13.7,fire,1
108 | 28,6,2012,32,55,14,0,89.1,25.5,88.5,7.6,29.7,13.9,fire,1
109 | 17,8,2012,37,52,18,0,89.3,16,100.7,9.7,22.9,14.6,fire,1
110 | 27,6,2012,34,53,18,0,89,21.6,80.3,9.2,25.8,15,fire,1
111 | 26,7,2012,36,53,19,0,89.2,17.1,98.6,10,23.9,15.3,fire,1
112 | 27,7,2012,36,48,13,0,90.3,22.2,108.5,8.7,29.4,15.3,fire,1
113 | 31,7,2012,35,64,17,0,87.2,31.9,145.7,6.8,41.2,15.7,fire,1
114 | 18,8,2012,36,54,18,0,89.4,20,110.9,9.7,27.5,16.1,fire,1
115 | 20,8,2012,35,68,19,0,88.3,25.9,130.6,8.8,34.7,16.8,fire,1
116 | 19,8,2012,35,62,19,0,89.4,23.2,120.9,9.7,31.3,17.2,fire,1
117 | 21,8,2012,36,58,19,0,88.6,29.6,141.1,9.2,38.8,18.4,fire,1
118 | 28,8,2012,34,64,16,0,89.4,50.2,210.4,7.3,62.9,19.9,fire,1
119 | 25,8,2012,35,60,15,0,88.9,43.9,181.3,8.2,54.7,20.3,fire,1
120 | 22,8,2012,36,55,18,0,89.1,33.5,151.3,9.9,43.1,20.4,fire,1
121 | 24,8,2012,34,64,14,0,88.9,40.5,171.3,9,50.9,20.9,fire,1
122 | 23,8,2012,36,53,16,0,89.5,37.6,161.5,10.4,47.5,22.3,fire,1
123 | 29,8,2012,35,48,18,0,90.1,54.2,220.4,12.5,67.4,30.2,fire,1
124 | 1,9,2012,29,86,16,0,37.9,0.9,8.2,0.1,1.4,0,not_fire,2
125 | 27,9,2012,28,87,15,4.4,41.1,6.5,8,0.1,6.2,0,not_fire,2
126 | 3,6,2012,29,80,14,2,48.7,2.2,7.6,0.3,2.6,0.1,not_fire,2
127 | 16,6,2012,29,87,15,0.4,47.4,4.2,8,0.2,4.1,0.1,not_fire,2
128 | 1,6,2012,32,71,12,0.7,57.1,2.5,8.2,0.6,2.8,0.2,not_fire,2
129 | 2,6,2012,30,73,13,4,55.7,2.7,7.8,0.6,2.9,0.2,not_fire,2
130 | 28,9,2012,27,87,29,0.5,45.9,3.5,7.9,0.4,3.4,0.2,not_fire,2
131 | 14,6,2012,27,79,16,0.7,53.4,6.4,7.3,0.5,6.1,0.3,not_fire,2
132 | 20,6,2012,31,72,14,0.2,60.2,3.8,8,0.8,3.7,0.3,not_fire,2
133 | 10,9,2012,29,74,15,1.1,59.5,4.7,8.2,0.8,4.6,0.3,not_fire,2
134 | 17,6,2012,31,69,17,4.7,62.2,3.9,8,1.1,3.8,0.4,not_fire,2
135 | 18,6,2012,33,62,10,8.7,65.5,4.6,8.3,0.9,4.4,0.4,not_fire,2
136 | 19,6,2012,32,67,14,4.5,64.6,4.4,8.2,1,4.2,0.4,not_fire,2
137 | 5,9,2012,30,58,12,4.1,66.1,4,8.4,1,3.9,0.4,not_fire,2
138 | 6,9,2012,34,71,14,6.5,64.5,3.3,9.1,1,3.5,0.4,not_fire,2
139 | 24,9,2012,26,49,6,2,61.3,11.9,28.1,0.6,11.9,0.4,not_fire,2
140 | 1,7,2012,28,58,18,2.2,63.7,3.2,8.5,1.2,3.3,0.5,not_fire,2
141 | 30,9,2012,24,64,15,0.2,67.3,3.8,16.5,1.2,4.8,0.5,not_fire,2
142 | 15,6,2012,28,90,15,0,66.8,7.2,14.7,1.2,7.1,0.6,not_fire,2
143 | 7,8,2012,34,63,13,2.9,69.7,7.2,9.8,1.2,6.9,0.6,not_fire,2
144 | 25,7,2012,39,64,9,1.2,73.8,11.7,15.9,1.1,11.4,0.7,not_fire,2
145 | 4,9,2012,30,66,15,0.2,73.5,4.1,26.6,1.5,6,0.7,not_fire,2
146 | 29,9,2012,24,54,18,0.1,79.7,4.3,15.2,1.7,5.1,0.7,not_fire,2
147 | 8,6,2012,28,51,17,1.3,71.4,7.7,7.4,1.5,7.3,0.8,not_fire,2
148 | 23,6,2012,33,59,16,0.8,74.2,7,8.3,1.6,6.7,0.8,not_fire,2
149 | 11,7,2012,34,56,15,2.9,74.8,7.1,9.5,1.6,6.8,0.8,not_fire,2
150 | 1,8,2012,38,52,14,0,78.3,4.4,10.5,2,4.4,0.8,not_fire,2
151 | 6,8,2012,30,54,14,3.1,70.5,11,9.1,1.3,10.5,0.8,not_fire,2
152 | 2,9,2012,28,67,19,0,75.4,2.9,16.3,2,4,0.8,not_fire,2
153 | 5,6,2012,32,60,14,0.2,77.1,6,17.6,1.8,6.5,0.9,not_fire,2
154 | 13,6,2012,30,52,15,2,72.3,11.4,7.8,1.4,10.9,0.9,not_fire,2
155 | 4,6,2012,30,64,14,0,79.4,5.2,15.4,2.2,5.6,1,not_fire,2
156 | 26,7,2012,35,58,10,0.2,78.3,10.8,19.7,1.6,10.7,1,not_fire,2
157 | 22,6,2012,33,46,14,1.1,78.3,8.1,8.3,1.9,7.7,1.2,not_fire,2
158 | 11,9,2012,30,73,14,0,79.2,6.5,16.6,2.1,6.6,1.2,not_fire,2
159 | 10,7,2012,34,51,16,3.8,77.5,8,9.5,2,7.7,1.3,not_fire,2
160 | 9,6,2012,27,59,18,0.1,78.1,8.5,14.7,2.4,8.3,1.9,not_fire,2
161 | 30,6,2012,34,42,15,1.7,79.7,12,8.5,2.2,11.5,2.2,not_fire,2
162 | 3,9,2012,28,75,16,0,82.2,4.4,24.3,3.3,6,2.5,fire,2
163 | 24,7,2012,33,63,17,1.1,72.8,20.9,56.6,1.6,21.7,2.5,not_fire,2
164 | 8,9,2012,30,88,14,0,82.5,6.6,26.1,3,8.1,2.7,fire,2
165 | 8,7,2012,35,47,18,6,80.8,9.8,9.7,3.1,9.4,3,fire,2
166 | 25,9,2012,28,70,15,0,79.9,13.8,36.1,2.4,14.1,3,not_fire,2
167 | 6,6,2012,35,54,11,0.1,83.7,8.4,26.3,3.1,9.3,3.1,fire,2
168 | 9,7,2012,36,43,15,1.9,82.3,9.4,9.9,3.2,9,3.1,fire,2
169 | 7,9,2012,31,62,15,0,83.3,5.8,17.7,3.8,6.4,3.2,fire,2
170 | 27,7,2012,29,87,18,0,80,11.8,28.3,2.8,11.8,3.2,not_fire,2
171 | 9,9,2012,30,80,15,0,83.1,7.9,34.5,3.5,10,3.7,fire,2
172 | 20,9,2012,34,58,13,0.2,79.5,18.7,88,2.1,24.4,3.8,not_fire,2
173 | 12,9,2012,31,72,14,0,84.2,8.3,25.2,3.8,9.1,3.9,fire,2
174 | 13,7,2012,39,45,13,0.6,85.2,11.3,10.4,4.2,10.9,4.7,fire,2
175 | 21,6,2012,32,55,14,0,86.2,8.3,18.4,5,8.2,4.9,fire,2
176 | 3,7,2012,34,56,17,0.1,84.7,9.7,27.3,4.7,10.3,5.2,fire,2
177 | 7,7,2012,38,43,13,0.5,85,13,35.4,4.1,13.7,5.2,fire,2
178 | 24,6,2012,35,68,16,0,85.3,10,17,4.9,9.9,5.3,fire,2
179 | 15,8,2012,35,46,13,0.3,83.9,16.9,54.2,3.5,19,5.5,fire,2
180 | 19,9,2012,29,41,8,0.1,83.9,24.9,86,2.7,28.9,5.6,fire,2
181 | 14,9,2012,28,81,15,0,84.6,12.6,41.5,4.3,14.3,5.7,fire,2
182 | 8,8,2012,37,56,11,0,87.4,11.2,20.2,5.2,11,5.9,fire,2
183 | 29,8,2012,35,53,17,0.5,80.2,20.7,149.2,2.7,30.6,5.9,fire,2
184 | 7,6,2012,35,44,17,0.2,85.6,9.9,28.9,5.4,10.7,6,fire,2
185 | 28,8,2012,35,56,14,0.4,79.2,37,166,2.1,30.6,6.1,not_fire,2
186 | 6,7,2012,35,42,15,0.3,84.7,15.5,45.1,4.3,16.7,6.3,fire,2
187 | 2,7,2012,33,48,16,0,87.6,7.9,17.8,6.8,7.8,6.4,fire,2
188 | 26,9,2012,30,65,14,0,85.4,16,44.5,4.5,16.9,6.5,fire,2
189 | 25,6,2012,34,70,16,0,86,12.8,25.6,5.4,12.7,6.7,fire,2
190 | 29,6,2012,37,36,13,0.6,86.2,17.9,36.7,4.8,17.8,7.2,fire,2
191 | 16,7,2012,31,83,17,0,84.5,19.4,33.1,4.7,19.2,7.3,fire,2
192 | 17,7,2012,32,81,17,0,84.6,21.1,42.3,4.7,20.9,7.7,fire,2
193 | 19,8,2012,35,66,15,0.1,82.7,32.7,96.8,3.3,35.5,7.7,fire,2
194 | 28,7,2012,33,57,16,0,87.5,15.7,37.6,6.7,15.7,9,fire,2
195 | 20,8,2012,36,81,15,0,83.7,34.4,107,3.8,38.1,9,fire,2
196 | 18,7,2012,33,68,15,0,86.1,23.9,51.6,5.2,23.9,9.1,fire,2
197 | 26,6,2012,36,62,16,0,87.8,16.5,34.5,7,16.4,9.5,fire,2
198 | 12,7,2012,36,44,13,0,90.1,12.6,19.4,8.3,12.5,9.6,fire,2
199 | 15,9,2012,32,51,13,0,88.7,16,50.2,6.9,17.8,9.8,fire,2
200 | 4,7,2012,34,58,18,0,88,13.6,36.8,8,14.1,9.9,fire,2
201 | 10,6,2012,30,41,15,0,89.4,13.3,22.5,8.4,13.1,10,fire,2
202 | 13,9,2012,29,49,19,0,88.6,11.5,33.4,9.1,12.4,10.3,fire,2
203 | 14,7,2012,37,37,18,0.2,88.9,12.9,14.6,9,12.5,10.4,fire,2
204 | 13,8,2012,35,34,16,0.2,88.3,16.9,45.1,7.5,17.5,10.5,fire,2
205 | 10,8,2012,39,39,15,0.2,89.3,15.8,35.4,8.2,15.8,10.7,fire,2
206 | 29,7,2012,34,59,16,0,88.1,19.5,47.2,7.4,19.5,10.9,fire,2
207 | 27,6,2012,36,55,15,0,89.1,20.9,43.3,8,20.8,12,fire,2
208 | 21,8,2012,36,71,15,0,86,36.9,117.1,5.1,41.3,12.2,fire,2
209 | 5,8,2012,34,42,17,0.1,88.3,23.6,52.5,19,23.5,12.6,fire,2
210 | 9,8,2012,39,43,12,0,91.7,16.5,30.9,9.6,16.4,12.7,fire,2
211 | 12,6,2012,27,58,17,0,88.9,21.3,37.8,8.7,21.2,12.9,fire,2
212 | 19,7,2012,34,58,16,0,88.1,27.8,61.1,7.3,27.7,13,fire,2
213 | 30,7,2012,36,56,16,0,88.9,23.8,57.1,8.2,23.8,13.2,fire,2
214 | 2,8,2012,40,34,14,0,93.3,10.8,21.4,13.8,10.6,13.5,fire,2
215 | 22,9,2012,33,64,13,0,88.9,26.1,106.3,7.1,32.4,13.7,fire,2
216 | 15,7,2012,34,45,17,0,90.5,18,24.1,10.9,17.7,14.1,fire,2
217 | 31,7,2012,37,55,15,0,89.3,28.3,67.2,8.3,28.3,14.5,fire,2
218 | 16,8,2012,40,41,10,0.1,92,22.6,65.1,9.5,24.2,14.8,fire,2
219 | 5,7,2012,34,45,18,0,90.5,18.7,46.4,11.3,18.7,15,fire,2
220 | 23,9,2012,35,56,14,0,89,29.4,115.6,7.5,36,15.2,fire,2
221 | 18,9,2012,36,33,13,0.1,90.6,25.8,77.8,9,28.2,15.4,fire,2
222 | 14,8,2012,37,40,13,0,91.9,22.3,55.5,10.8,22.3,15.7,fire,2
223 | 30,8,2012,34,49,15,0,89.2,24.8,159.1,8.1,35.7,16,fire,2
224 | 23,7,2012,31,71,17,0,87.3,46.6,99,6.9,46.5,16.3,fire,2
225 | 11,6,2012,31,42,21,0,90.6,18.2,30.5,13.4,18,16.7,fire,2
226 | 20,7,2012,36,50,16,0,89.9,32.7,71,9.5,32.6,17.3,fire,2
227 | 17,9,2012,34,44,12,0,92.5,25.2,63.3,11.2,26.2,17.5,fire,2
228 | 22,8,2012,37,53,14,0,89.5,41.1,127.5,8,45.5,18.1,fire,2
229 | 28,6,2012,37,37,13,0,92.5,27.2,52.4,11.7,27.1,18.4,fire,2
230 | 12,8,2012,39,21,17,0.4,93,18.4,41.5,15.5,18.4,18.8,fire,2
231 | 16,9,2012,33,26,13,0,93.9,21.2,59.2,14.2,22.4,19.3,fire,2
232 | 31,8,2012,30,59,19,0,89.1,27.8,168.2,9.8,39.3,19.4,fire,2
233 | 3,8,2012,39,33,17,0,93.7,17.1,32.1,17.2,16.9,19.5,fire,2
234 | 4,8,2012,38,35,15,0,93.8,23,42.7,15.7,22.9,20.9,fire,2
235 | 11,8,2012,40,31,15,0,94.2,22.5,46.3,16.6,22.4,21.6,fire,2
236 | 21,9,2012,35,34,17,0,92.2,23.6,97.3,13.8,29.4,21.6,fire,2
237 | 17,8,2012,42,24,9,0,96,30.3,76.4,15.7,30.4,24,fire,2
238 | 23,8,2012,36,43,16,0,91.2,46.1,137.7,11.5,50.2,24.5,fire,2
239 | 22,7,2012,32,48,18,0,91.5,44.2,90.1,13.2,44,25.4,fire,2
240 | 27,8,2012,36,54,14,0,91,65.9,177.3,10,68,26.1,fire,2
241 | 18,8,2012,37,37,14,0,94.3,35.9,86.8,16,35.9,26.3,fire,2
242 | 24,8,2012,35,38,15,0,92.1,51.3,147.7,12.2,54.9,26.9,fire,2
243 | 21,7,2012,36,29,18,0,93.9,39.6,80.6,18.5,39.5,30,fire,2
244 | 26,8,2012,33,37,16,0,92.2,61.3,167.2,13.1,64,30.3,fire,2
245 | 25,8,2012,34,40,18,0,92.1,56.3,157.5,14.3,59.5,31.1,fire,2
246 |
--------------------------------------------------------------------------------
/Data Science File/5_ExploratoryDataAnalysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "id": "hg_XIstvlZfr"
8 | },
9 | "outputs": [],
10 | "source": [
11 | "#importing libraries\n",
12 | "import matplotlib.pyplot as plt\n",
13 | "import matplotlib.mlab as mlab\n",
14 | "import seaborn as sns\n",
15 | "import pandas as pd\n",
16 | "import numpy as np"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "metadata": {
23 | "id": "prbQ_6Jvlf2Z"
24 | },
25 | "outputs": [],
26 | "source": [
27 | "url='/content/drive/MyDrive/Summer 2022/CSE 511 (MSc)/Week-01/data-dhaka-weather1953-2016.csv'\n",
28 | "df = pd.read_csv(url)"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": null,
34 | "metadata": {
35 | "colab": {
36 | "base_uri": "https://localhost:8080/"
37 | },
38 | "id": "dOiSKy4R_jRT",
39 | "outputId": "fd5030ec-8ec2-418b-bf99-369c2bcf3309"
40 | },
41 | "outputs": [],
42 | "source": [
43 | "from google.colab import drive\n",
44 | "drive.mount('/content/drive')"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": null,
50 | "metadata": {
51 | "colab": {
52 | "base_uri": "https://localhost:8080/",
53 | "height": 206
54 | },
55 | "id": "6QlqxHJelnCY",
56 | "outputId": "813e6190-004b-4971-df05-0a7ec7f12121"
57 | },
58 | "outputs": [],
59 | "source": [
60 | "#Reading Data Head\n",
61 | "df.head()"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": null,
67 | "metadata": {
68 | "colab": {
69 | "base_uri": "https://localhost:8080/"
70 | },
71 | "id": "9BX7qdsg-p5g",
72 | "outputId": "ceabdf4f-0860-40f6-9889-1d260ec79dd0"
73 | },
74 | "outputs": [],
75 | "source": [
76 | "df['MaxTemp'].corr(df['Rainfall'])"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": null,
82 | "metadata": {
83 | "colab": {
84 | "base_uri": "https://localhost:8080/",
85 | "height": 206
86 | },
87 | "id": "3GeITfhV5rHW",
88 | "outputId": "bb7bc204-9337-49a9-f858-4c85c2740cec"
89 | },
90 | "outputs": [],
91 | "source": [
92 | "#Reading Data Tail\n",
93 | "df.tail()"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": null,
99 | "metadata": {
100 | "colab": {
101 | "base_uri": "https://localhost:8080/"
102 | },
103 | "id": "ftrUBlWV1I9k",
104 | "outputId": "cc753b08-d0d7-4051-8098-11752458b8ed"
105 | },
106 | "outputs": [],
107 | "source": [
108 | "#Check Null\n",
109 | "df.isnull().values.any()"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {
116 | "colab": {
117 | "base_uri": "https://localhost:8080/"
118 | },
119 | "id": "TkN6gRi2lp1J",
120 | "outputId": "df616965-068a-49c7-b265-2663322e72a9"
121 | },
122 | "outputs": [],
123 | "source": [
124 | "#Showing Columns\n",
125 | "df.columns"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": null,
131 | "metadata": {
132 | "colab": {
133 | "base_uri": "https://localhost:8080/",
134 | "height": 284
135 | },
136 | "id": "cltiMN1tlzp5",
137 | "outputId": "d30fdb46-d132-436f-b7f7-fa060b2aa401"
138 | },
139 | "outputs": [],
140 | "source": [
141 | "#Describing Data Set\n",
142 | "df.describe()"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": null,
148 | "metadata": {
149 | "colab": {
150 | "base_uri": "https://localhost:8080/",
151 | "height": 34
152 | },
153 | "id": "5Bmj2V8VmLFg",
154 | "outputId": "4c448a96-c560-44db-89b1-d419e6246e11"
155 | },
156 | "outputs": [],
157 | "source": [
158 | "#Data set Shape\n",
159 | "df.shape"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": null,
165 | "metadata": {
166 | "colab": {
167 | "base_uri": "https://localhost:8080/",
168 | "height": 202
169 | },
170 | "id": "MRSDKAREmRyJ",
171 | "outputId": "a3855f8f-210d-4c75-f7db-807d486a2fc7"
172 | },
173 | "outputs": [],
174 | "source": [
175 | "#Data set info\n",
176 | "df.info()"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": null,
182 | "metadata": {
183 | "colab": {
184 | "base_uri": "https://localhost:8080/",
185 | "height": 134
186 | },
187 | "id": "XzHZsv6PmWHY",
188 | "outputId": "6704ecc8-3dc4-495c-be14-2a03340fc31d"
189 | },
190 | "outputs": [],
191 | "source": [
192 | "#Dataset median\n",
193 | "df.median()"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": null,
199 | "metadata": {
200 | "colab": {
201 | "base_uri": "https://localhost:8080/",
202 | "height": 134
203 | },
204 | "id": "1T7KTZ_imczg",
205 | "outputId": "687e1e4e-2a21-4761-85ae-bea0a2febbef"
206 | },
207 | "outputs": [],
208 | "source": [
209 | "#Checking kurtosis\n",
210 | "df.kurtosis()"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": null,
216 | "metadata": {
217 | "colab": {
218 | "base_uri": "https://localhost:8080/",
219 | "height": 134
220 | },
221 | "id": "lQkIc3UHmmrR",
222 | "outputId": "ed9f491d-ffc6-4256-bc55-7f41fef53c62"
223 | },
224 | "outputs": [],
225 | "source": [
226 | "#Checking skewness\n",
227 | "df.skew()"
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": null,
233 | "metadata": {
234 | "colab": {
235 | "base_uri": "https://localhost:8080/",
236 | "height": 225
237 | },
238 | "id": "kIf-P6xwmo6I",
239 | "outputId": "96e9b924-38ad-46a9-d9c5-ab5770eb8f95"
240 | },
241 | "outputs": [],
242 | "source": [
243 | "#Checking correlation\n",
244 | "df.corr()"
245 | ]
246 | },
247 | {
248 | "cell_type": "code",
249 | "execution_count": null,
250 | "metadata": {
251 | "colab": {
252 | "base_uri": "https://localhost:8080/",
253 | "height": 235
254 | },
255 | "id": "9cPIdy4Km270",
256 | "outputId": "167dd80e-65d2-4afc-f632-302ff1a69ec6"
257 | },
258 | "outputs": [],
259 | "source": [
260 | "#Checking covariance\n",
261 | "df.cov()"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": null,
267 | "metadata": {
268 | "colab": {
269 | "base_uri": "https://localhost:8080/",
270 | "height": 505
271 | },
272 | "id": "YALSUxxym9cx",
273 | "outputId": "94aed830-d941-4bb2-9e4b-fdcce85bdc54"
274 | },
275 | "outputs": [],
276 | "source": [
277 | "#Generating Heatmap\n",
278 | "correlation = df.corr()\n",
279 | "plt.figure(figsize=(16, 8))\n",
280 | "sns.heatmap(correlation, annot=True, linewidths=0, vmin=-1, cmap=\"RdBu_r\")\n",
281 | "plt.show()"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": null,
287 | "metadata": {
288 | "colab": {
289 | "base_uri": "https://localhost:8080/",
290 | "height": 252
291 | },
292 | "id": "D2zpMCwl1dEn",
293 | "outputId": "7b41745e-37b5-4362-d318-cc80c7925bea"
294 | },
295 | "outputs": [],
296 | "source": [
297 | "#month wise parameter checking\n",
298 | "rain=df.groupby('Month')['Rainfall'].mean()\n",
299 | "rain\n",
300 | "#sns.distplot(rain)"
301 | ]
302 | },
303 | {
304 | "cell_type": "code",
305 | "execution_count": null,
306 | "metadata": {
307 | "colab": {
308 | "base_uri": "https://localhost:8080/",
309 | "height": 296
310 | },
311 | "id": "tQ0nsZn-3LWC",
312 | "outputId": "aef4574a-9834-4d02-858d-eb463c9a59b2"
313 | },
314 | "outputs": [],
315 | "source": [
316 | "#month wise distribution\n",
317 | "sns.distplot(rain)"
318 | ]
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": null,
323 | "metadata": {
324 | "colab": {
325 | "base_uri": "https://localhost:8080/",
326 | "height": 252
327 | },
328 | "id": "bYir_BRYwvbr",
329 | "outputId": "4aa5931a-05fa-4c10-c24c-50e5cd71900b"
330 | },
331 | "outputs": [],
332 | "source": [
333 | "# Finding out most rainy months\n",
334 | "df.groupby('Month')['Rainfall'].mean().sort_values(ascending=False)\n"
335 | ]
336 | },
337 | {
338 | "cell_type": "code",
339 | "execution_count": null,
340 | "metadata": {
341 | "colab": {
342 | "base_uri": "https://localhost:8080/",
343 | "height": 252
344 | },
345 | "id": "UC9Xed9W3CwD",
346 | "outputId": "7561d569-a198-46c5-810c-b2a13927853a"
347 | },
348 | "outputs": [],
349 | "source": [
350 | "#month wise parameter checking\n",
351 | "mxt=df.groupby('Month')['MaxTemp'].mean()\n",
352 | "mxt"
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": null,
358 | "metadata": {
359 | "colab": {
360 | "base_uri": "https://localhost:8080/",
361 | "height": 296
362 | },
363 | "id": "sAV30V724NuH",
364 | "outputId": "2cd50309-2239-4830-ecad-1b2c78704bc1"
365 | },
366 | "outputs": [],
367 | "source": [
368 | "#month wise distribution\n",
369 | "sns.distplot(mxt)"
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": null,
375 | "metadata": {
376 | "colab": {
377 | "base_uri": "https://localhost:8080/",
378 | "height": 252
379 | },
380 | "id": "_bd-dbxJxNGA",
381 | "outputId": "451d007f-818d-4684-c7fb-efce3fd163ec"
382 | },
383 | "outputs": [],
384 | "source": [
385 | "# Finding out most MaxTemp months\n",
386 | "df.groupby('Month')['MaxTemp'].mean().sort_values(ascending=True)"
387 | ]
388 | },
389 | {
390 | "cell_type": "code",
391 | "execution_count": null,
392 | "metadata": {
393 | "colab": {
394 | "base_uri": "https://localhost:8080/",
395 | "height": 252
396 | },
397 | "id": "vtxkK5Oo4bsI",
398 | "outputId": "38ed168e-7937-4c44-e277-b8d4665101ba"
399 | },
400 | "outputs": [],
401 | "source": [
402 | "#month wise parameter checking\n",
403 | "mt=df.groupby('Month')['MinTemp'].mean()\n",
404 | "mt"
405 | ]
406 | },
407 | {
408 | "cell_type": "code",
409 | "execution_count": null,
410 | "metadata": {
411 | "colab": {
412 | "base_uri": "https://localhost:8080/",
413 | "height": 296
414 | },
415 | "id": "vU7eDAQt42ow",
416 | "outputId": "9f626447-1f33-4cbe-9641-ea7308274fd9"
417 | },
418 | "outputs": [],
419 | "source": [
420 | "#month wise distribution\n",
421 | "sns.distplot(mt)"
422 | ]
423 | },
424 | {
425 | "cell_type": "code",
426 | "execution_count": null,
427 | "metadata": {
428 | "colab": {
429 | "base_uri": "https://localhost:8080/",
430 | "height": 252
431 | },
432 | "id": "vjA9TAgQxbsZ",
433 | "outputId": "d1c8f811-a486-49a2-de63-b35a9532c010"
434 | },
435 | "outputs": [],
436 | "source": [
437 | "# Finding out most MaxTemp months\n",
438 | "df.groupby('Month')['MinTemp'].mean().sort_values(ascending=True)"
439 | ]
440 | },
441 | {
442 | "cell_type": "code",
443 | "execution_count": null,
444 | "metadata": {
445 | "colab": {
446 | "base_uri": "https://localhost:8080/",
447 | "height": 252
448 | },
449 | "id": "g_A8G_v04-Ig",
450 | "outputId": "97de91d7-51a9-4e24-aace-6ecb4999d41c"
451 | },
452 | "outputs": [],
453 | "source": [
454 | "#month wise parameter checking\n",
455 | "hm=df.groupby('Month')['RelativeHumidity'].mean()\n",
456 | "hm"
457 | ]
458 | },
459 | {
460 | "cell_type": "code",
461 | "execution_count": null,
462 | "metadata": {
463 | "colab": {
464 | "base_uri": "https://localhost:8080/",
465 | "height": 252
466 | },
467 | "id": "43JIreaoxu-g",
468 | "outputId": "e0bf23eb-6b17-4ee4-824c-7d2ef757b58f"
469 | },
470 | "outputs": [],
471 | "source": [
472 | "# Finding out most MaxTemp months\n",
473 | "df.groupby('Month')['RelativeHumidity'].mean().sort_values(ascending=False)"
474 | ]
475 | },
476 | {
477 | "cell_type": "code",
478 | "execution_count": null,
479 | "metadata": {
480 | "colab": {
481 | "base_uri": "https://localhost:8080/",
482 | "height": 296
483 | },
484 | "id": "wUdl-8nt5IYv",
485 | "outputId": "662e0992-c5e0-44c6-d051-88c672fdfd3e"
486 | },
487 | "outputs": [],
488 | "source": [
489 | "#month wise distribution\n",
490 | "sns.distplot(hm)"
491 | ]
492 | },
493 | {
494 | "cell_type": "code",
495 | "execution_count": null,
496 | "metadata": {
497 | "colab": {
498 | "base_uri": "https://localhost:8080/",
499 | "height": 374
500 | },
501 | "id": "3KO4DwvQnHip",
502 | "outputId": "7d550a77-85c2-4ee1-f177-c039e4e3e60a"
503 | },
504 | "outputs": [],
505 | "source": [
506 | "#Visualizing Boxplot\n",
507 | "plt.figure(figsize=(16, 6))\n",
508 | "ax = sns.boxplot(data=df, orient=\"h\", palette=\"Set2\")"
509 | ]
510 | },
511 | {
512 | "cell_type": "code",
513 | "execution_count": null,
514 | "metadata": {
515 | "colab": {
516 | "base_uri": "https://localhost:8080/",
517 | "height": 279
518 | },
519 | "id": "ijfCHWu1ngRh",
520 | "outputId": "82cc00b8-1596-4a08-ecc7-104d678b7ec8"
521 | },
522 | "outputs": [],
523 | "source": [
524 | "ax = sns.boxplot(x=df[\"Rainfall\"])"
525 | ]
526 | },
527 | {
528 | "cell_type": "code",
529 | "execution_count": null,
530 | "metadata": {
531 | "colab": {
532 | "base_uri": "https://localhost:8080/",
533 | "height": 279
534 | },
535 | "id": "RXQZveO9oZUZ",
536 | "outputId": "9c774efc-1fa7-48a7-c6f0-0530ec254a20"
537 | },
538 | "outputs": [],
539 | "source": [
540 | "ax = sns.boxplot(x=df[\"RelativeHumidity\"])"
541 | ]
542 | },
543 | {
544 | "cell_type": "code",
545 | "execution_count": null,
546 | "metadata": {
547 | "colab": {
548 | "base_uri": "https://localhost:8080/",
549 | "height": 388
550 | },
551 | "id": "w5VQMGQPpQq7",
552 | "outputId": "0d672fa0-d6d6-4f9d-e52d-20b4043d27b2"
553 | },
554 | "outputs": [],
555 | "source": [
556 | "plt.figure(figsize=(16, 6))\n",
557 | "ax = sns.boxplot(x=\"RelativeHumidity\", y=\"Rainfall\", data=df)"
558 | ]
559 | },
560 | {
561 | "cell_type": "code",
562 | "execution_count": null,
563 | "metadata": {
564 | "colab": {
565 | "base_uri": "https://localhost:8080/",
566 | "height": 386
567 | },
568 | "id": "p4Prjty5uUuG",
569 | "outputId": "dff3b8cd-8197-49cb-d250-fb097d728ca9"
570 | },
571 | "outputs": [],
572 | "source": [
573 | "plt.figure(figsize=(16, 6))\n",
574 | "ax=sns.catplot(x=\"Month\", y=\"Rainfall\", kind=\"box\", data=df);"
575 | ]
576 | },
577 | {
578 | "cell_type": "code",
579 | "execution_count": null,
580 | "metadata": {
581 | "colab": {
582 | "base_uri": "https://localhost:8080/",
583 | "height": 369
584 | },
585 | "id": "esbAFkt7wm1u",
586 | "outputId": "45d65d29-ca52-439d-9344-b0e528de396d"
587 | },
588 | "outputs": [],
589 | "source": [
590 | "\n",
591 | "ax=sns.catplot(x=\"Month\", y=\"MinTemp\", kind=\"box\", data=df)"
592 | ]
593 | },
594 | {
595 | "cell_type": "code",
596 | "execution_count": null,
597 | "metadata": {
598 | "colab": {
599 | "base_uri": "https://localhost:8080/",
600 | "height": 369
601 | },
602 | "id": "K5vjZVlEwuwi",
603 | "outputId": "84250ed5-f6ca-4101-ebb8-41281c20648c"
604 | },
605 | "outputs": [],
606 | "source": [
607 | "ax=sns.catplot(x=\"Month\", y=\"MaxTemp\", kind=\"box\", data=df)"
608 | ]
609 | },
610 | {
611 | "cell_type": "code",
612 | "execution_count": null,
613 | "metadata": {
614 | "colab": {
615 | "base_uri": "https://localhost:8080/",
616 | "height": 408
617 | },
618 | "id": "1uH2QOpRqP27",
619 | "outputId": "ddc517ce-8846-4edb-f774-9f12bc965597"
620 | },
621 | "outputs": [],
622 | "source": [
623 | "plt.figure(figsize=(50, 6))\n",
624 | "ax = sns.boxplot(x=\"MinTemp\", y=\"Rainfall\", data=df)"
625 | ]
626 | },
627 | {
628 | "cell_type": "code",
629 | "execution_count": null,
630 | "metadata": {
631 | "colab": {
632 | "base_uri": "https://localhost:8080/",
633 | "height": 398
634 | },
635 | "id": "cU2QOIfurMJy",
636 | "outputId": "dccd8fa7-198a-4a48-8fa1-a7874dedd4f3"
637 | },
638 | "outputs": [],
639 | "source": [
640 | "\n",
641 | "df.hist()"
642 | ]
643 | },
644 | {
645 | "cell_type": "code",
646 | "execution_count": null,
647 | "metadata": {
648 | "colab": {
649 | "base_uri": "https://localhost:8080/",
650 | "height": 296
651 | },
652 | "id": "UmF8ueMOpeUJ",
653 | "outputId": "429cba96-2322-499c-d7f6-4c210dc58f5d"
654 | },
655 | "outputs": [],
656 | "source": [
657 | "#Distribution Plot\n",
658 | "sns.distplot(df['Rainfall'])"
659 | ]
660 | },
661 | {
662 | "cell_type": "code",
663 | "execution_count": null,
664 | "metadata": {
665 | "colab": {
666 | "base_uri": "https://localhost:8080/",
667 | "height": 296
668 | },
669 | "id": "SFcNmIJYrST5",
670 | "outputId": "82ac50f0-3882-4d5f-f804-de9997a1d4a2"
671 | },
672 | "outputs": [],
673 | "source": [
674 | "#Distribution Plot\n",
675 | "sns.distplot(df['RelativeHumidity'])"
676 | ]
677 | },
678 | {
679 | "cell_type": "code",
680 | "execution_count": null,
681 | "metadata": {
682 | "colab": {
683 | "base_uri": "https://localhost:8080/",
684 | "height": 297
685 | },
686 | "id": "q1V3FiXDsDRy",
687 | "outputId": "2c92411d-6f80-481d-c8e3-67722766f7d5"
688 | },
689 | "outputs": [],
690 | "source": [
691 | "#Distribution Plot\n",
692 | "sns.distplot(df['MinTemp'])"
693 | ]
694 | },
695 | {
696 | "cell_type": "code",
697 | "execution_count": null,
698 | "metadata": {
699 | "colab": {
700 | "base_uri": "https://localhost:8080/",
701 | "height": 1000
702 | },
703 | "id": "suyyQdG9sHHr",
704 | "outputId": "4ccbac8f-ee8b-451d-bd33-7bfe9e6638dd"
705 | },
706 | "outputs": [],
707 | "source": [
708 | "#Drawing Pairplot\n",
709 | "sns.pairplot(df);"
710 | ]
711 | },
712 | {
713 | "cell_type": "code",
714 | "execution_count": null,
715 | "metadata": {
716 | "colab": {
717 | "base_uri": "https://localhost:8080/",
718 | "height": 283
719 | },
720 | "id": "ydQLF--RV97c",
721 | "outputId": "6d1ed1a7-cb10-4e96-bb29-7f36e11f1307"
722 | },
723 | "outputs": [],
724 | "source": [
725 | "plt.scatter(df.Rainfall,df.RelativeHumidity)\n"
726 | ]
727 | },
728 | {
729 | "cell_type": "code",
730 | "execution_count": null,
731 | "metadata": {
732 | "colab": {
733 | "base_uri": "https://localhost:8080/",
734 | "height": 441
735 | },
736 | "id": "g0UhaXTCsiVC",
737 | "outputId": "eda2fc55-838d-48f5-a94f-76cb8e946520"
738 | },
739 | "outputs": [],
740 | "source": [
741 | "sns.jointplot(x=\"Rainfall\", y=\"RelativeHumidity\", data=df, kind=\"reg\");"
742 | ]
743 | },
744 | {
745 | "cell_type": "code",
746 | "execution_count": null,
747 | "metadata": {
748 | "colab": {
749 | "base_uri": "https://localhost:8080/",
750 | "height": 441
751 | },
752 | "id": "wy2aTAlrxn7D",
753 | "outputId": "aedeae36-a3ac-4379-c45c-83678ebfc289"
754 | },
755 | "outputs": [],
756 | "source": [
757 | "sns.jointplot(x=\"Rainfall\", y=\"MinTemp\", data=df, kind=\"reg\");"
758 | ]
759 | },
760 | {
761 | "cell_type": "code",
762 | "execution_count": null,
763 | "metadata": {
764 | "colab": {
765 | "base_uri": "https://localhost:8080/",
766 | "height": 441
767 | },
768 | "id": "PZSMwMls5dKl",
769 | "outputId": "e506c214-c210-408c-9ea2-eb678f0c4df6"
770 | },
771 | "outputs": [],
772 | "source": [
773 | "sns.jointplot(x=\"Rainfall\", y=\"MaxTemp\", data=df, kind=\"reg\");"
774 | ]
775 | },
776 | {
777 | "cell_type": "code",
778 | "execution_count": null,
779 | "metadata": {
780 | "colab": {
781 | "base_uri": "https://localhost:8080/",
782 | "height": 441
783 | },
784 | "id": "dhrYcwEk5iF1",
785 | "outputId": "f3982e4d-a16f-47b6-c65c-0f7fac0ae821"
786 | },
787 | "outputs": [],
788 | "source": [
789 | "sns.jointplot(x=\"Rainfall\", y=\"Month\", data=df, kind=\"reg\");"
790 | ]
791 | },
792 | {
793 | "cell_type": "code",
794 | "execution_count": null,
795 | "metadata": {
796 | "colab": {
797 | "base_uri": "https://localhost:8080/",
798 | "height": 204
799 | },
800 | "id": "AVoMIxNB4Vn9",
801 | "outputId": "968760a2-619c-4418-a6cc-44f552621098"
802 | },
803 | "outputs": [],
804 | "source": [
805 | "df.head()"
806 | ]
807 | },
808 | {
809 | "cell_type": "code",
810 | "execution_count": null,
811 | "metadata": {
812 | "id": "T9mnPPDa42vh"
813 | },
814 | "outputs": [],
815 | "source": [
816 | "#converting int to string\n",
817 | "df['YAER1'] = df.YEAR.astype(str)\n",
818 | "df['MONTH1'] = df.Month.astype(str)"
819 | ]
820 | },
821 | {
822 | "cell_type": "code",
823 | "execution_count": null,
824 | "metadata": {
825 | "id": "rBfgQ7nW9yiH"
826 | },
827 | "outputs": [],
828 | "source": [
829 | "df.info()"
830 | ]
831 | },
832 | {
833 | "cell_type": "code",
834 | "execution_count": null,
835 | "metadata": {
836 | "id": "0Xwe6jhu_KVV"
837 | },
838 | "outputs": [],
839 | "source": [
840 | "#Creating new columns for time series plot\n",
841 | "df['DateStamp'] = df['YAER1'] +'-'+ df['MONTH1']+'-30'"
842 | ]
843 | },
844 | {
845 | "cell_type": "code",
846 | "execution_count": null,
847 | "metadata": {
848 | "id": "RHwfx7IcE5jT"
849 | },
850 | "outputs": [],
851 | "source": [
852 | "df1=df"
853 | ]
854 | },
855 | {
856 | "cell_type": "code",
857 | "execution_count": null,
858 | "metadata": {
859 | "id": "q6lUtF62AOSD"
860 | },
861 | "outputs": [],
862 | "source": [
863 | "#Setting DateStamp as index\n",
864 | "df1=df1.set_index(\"DateStamp\", inplace = True)"
865 | ]
866 | },
867 | {
868 | "cell_type": "code",
869 | "execution_count": null,
870 | "metadata": {
871 | "colab": {
872 | "base_uri": "https://localhost:8080/",
873 | "height": 173
874 | },
875 | "id": "_JEOSPclBDVk",
876 | "outputId": "c37457d1-70c6-4219-d244-3baa5f835029"
877 | },
878 | "outputs": [],
879 | "source": [
880 | "df.head(3)"
881 | ]
882 | },
883 | {
884 | "cell_type": "code",
885 | "execution_count": null,
886 | "metadata": {
887 | "colab": {
888 | "base_uri": "https://localhost:8080/",
889 | "height": 607
890 | },
891 | "id": "zcdVRAIH_um4",
892 | "outputId": "1bc4aeb9-1af3-451f-d747-2b29a1435c39"
893 | },
894 | "outputs": [],
895 | "source": [
896 | "#Overall TS plot using matplotlib\n",
897 | "import matplotlib.pyplot as plt\n",
898 | "\n",
899 | "df.plot(figsize=(20,10))\n",
900 | "plt.title(\"Time series plot of Dhaka weather\")\n",
901 | "plt.show()"
902 | ]
903 | },
904 | {
905 | "cell_type": "code",
906 | "execution_count": null,
907 | "metadata": {
908 | "colab": {
909 | "base_uri": "https://localhost:8080/",
910 | "height": 404
911 | },
912 | "id": "-l0wuircDFYW",
913 | "outputId": "b6aaeb09-6faa-4cae-ea9e-041fc4704b44"
914 | },
915 | "outputs": [],
916 | "source": [
917 | "#MaxTemp TS plot using sns\n",
918 | "plt.figure(figsize=(16, 6))\n",
919 | "plt.title('Time series plot of MaxTemp')\n",
920 | "ax = sns.lineplot(x=\"YEAR\", y=\"MaxTemp\",err_style=\"bars\",label=\"MaxTemp\", ci=95,data=df)"
921 | ]
922 | },
923 | {
924 | "cell_type": "code",
925 | "execution_count": null,
926 | "metadata": {
927 | "colab": {
928 | "base_uri": "https://localhost:8080/",
929 | "height": 404
930 | },
931 | "id": "ncNG4EApGUZN",
932 | "outputId": "8647dcfb-9926-46d4-af53-45c2b82d6b92"
933 | },
934 | "outputs": [],
935 | "source": [
936 | "#MinTemp TS plot using sns\n",
937 | "plt.figure(figsize=(16, 6))\n",
938 | "plt.title('Time series plot of MinTemp')\n",
939 | "ax = sns.lineplot(x=\"YEAR\", y=\"MinTemp\",color=\"blue\", ci=90,label=\"MinTemp\",data=df)"
940 | ]
941 | },
942 | {
943 | "cell_type": "code",
944 | "execution_count": null,
945 | "metadata": {
946 | "colab": {
947 | "base_uri": "https://localhost:8080/",
948 | "height": 424
949 | },
950 | "id": "qjb1YKjZGlkV",
951 | "outputId": "065d3a1d-8103-46f3-8061-4e45e06e5544"
952 | },
953 | "outputs": [],
954 | "source": [
955 | "#Relative HumidityTS plot using sns\n",
956 | "plt.figure(figsize=(16, 6))\n",
957 | "plt.title('Time series plot of Relative Humidity')\n",
958 | "ax = sns.lineplot(x=\"YEAR\", y=\"RelativeHumidity\",label=\"Relative Humidity\",ci=80,data=df)"
959 | ]
960 | },
961 | {
962 | "cell_type": "code",
963 | "execution_count": null,
964 | "metadata": {
965 | "colab": {
966 | "base_uri": "https://localhost:8080/",
967 | "height": 424
968 | },
969 | "id": "DLiZ98XkI9ri",
970 | "outputId": "c5d365e8-fab3-4249-b131-1ca747376703"
971 | },
972 | "outputs": [],
973 | "source": [
974 | "#Relative HumidityTS plot using sns\n",
975 | "plt.figure(figsize=(16, 6))\n",
976 | "plt.title('Time series plot of Rainfall')\n",
977 | "ax = sns.lineplot(x=\"YEAR\", y=\"Rainfall\",label=\"Rainfall\",ci=80,data=df)"
978 | ]
979 | },
980 | {
981 | "cell_type": "code",
982 | "execution_count": null,
983 | "metadata": {
984 | "colab": {
985 | "base_uri": "https://localhost:8080/",
986 | "height": 238
987 | },
988 | "id": "lwYb-gdeKQ9Z",
989 | "outputId": "d3fea4a6-bce4-43f0-8db3-fbe770f137f1"
990 | },
991 | "outputs": [],
992 | "source": [
993 | "#Yearly Min Temp Change\n",
994 | "min=df.groupby('YEAR')['MinTemp'].mean()\n"
995 | ]
996 | },
997 | {
998 | "cell_type": "code",
999 | "execution_count": null,
1000 | "metadata": {
1001 | "colab": {
1002 | "base_uri": "https://localhost:8080/",
1003 | "height": 394
1004 | },
1005 | "id": "V0PeG2qJKgub",
1006 | "outputId": "3eb11da0-4ca3-425a-89ee-42ca29c479b8"
1007 | },
1008 | "outputs": [],
1009 | "source": [
1010 | "#Yearly Min Temp Change plot\n",
1011 | "plt.figure(figsize=(16, 6))\n",
1012 | "ax = sns.lineplot( label=\"Yearly Mean Min Temp.\",data=min)"
1013 | ]
1014 | },
1015 | {
1016 | "cell_type": "code",
1017 | "execution_count": null,
1018 | "metadata": {
1019 | "colab": {
1020 | "base_uri": "https://localhost:8080/",
1021 | "height": 394
1022 | },
1023 | "id": "VQAcCW2xPi9o",
1024 | "outputId": "b5716d45-c852-4abc-c5e8-aa782f642cbd"
1025 | },
1026 | "outputs": [],
1027 | "source": [
1028 | "#Yearly Min Temp Change\n",
1029 | "rainfall=df.groupby('YEAR')['Rainfall'].sum()\n",
1030 | "plt.figure(figsize=(16, 6))\n",
1031 | "ax = sns.lineplot( label=\"Yearly Total Rainfall\",data=rainfall)"
1032 | ]
1033 | },
1034 | {
1035 | "cell_type": "code",
1036 | "execution_count": null,
1037 | "metadata": {
1038 | "colab": {
1039 | "base_uri": "https://localhost:8080/",
1040 | "height": 374
1041 | },
1042 | "id": "5D3FNO87P852",
1043 | "outputId": "b3b84dd5-af68-4ebb-a432-e7572cb20862"
1044 | },
1045 | "outputs": [],
1046 | "source": [
1047 | "#Yearly Min Temp Change\n",
1048 | "rh=df.groupby('YEAR')['RelativeHumidity'].mean()\n",
1049 | "plt.figure(figsize=(16, 6))\n",
1050 | "ax = sns.lineplot( label=\"Yearly Mean Relative Humidity\",data=rh)"
1051 | ]
1052 | }
1053 | ],
1054 | "metadata": {
1055 | "colab": {
1056 | "provenance": []
1057 | },
1058 | "kernelspec": {
1059 | "display_name": "Python 3 (ipykernel)",
1060 | "language": "python",
1061 | "name": "python3"
1062 | },
1063 | "language_info": {
1064 | "codemirror_mode": {
1065 | "name": "ipython",
1066 | "version": 3
1067 | },
1068 | "file_extension": ".py",
1069 | "mimetype": "text/x-python",
1070 | "name": "python",
1071 | "nbconvert_exporter": "python",
1072 | "pygments_lexer": "ipython3",
1073 | "version": "3.11.7"
1074 | }
1075 | },
1076 | "nbformat": 4,
1077 | "nbformat_minor": 4
1078 | }
1079 |
--------------------------------------------------------------------------------