├── Data_Structure ├── .gitkeep ├── Trie │ ├── Readme.md │ └── Sample_code.py └── PDF_Parsing │ ├── PDF_Fitz │ ├── Sample.pdf │ └── Sample_Fitz.py │ └── PDF_plumber │ ├── Sample.pdf │ └── Sample_pdf_plumber.py ├── MonteCarlo └── .gitkeep ├── EDA └── Preprocessing │ └── .gitkeep ├── LinearProgramming └── .gitkeep ├── Python ├── 07_lambda_filter_map │ ├── .gitkeep │ └── lambda_map_filter.py ├── 04-Seaborn │ ├── Lecture Code │ │ ├── data1.txt │ │ ├── 8-Scatterplot_Matrix.py │ │ ├── 9-Plotting_model_residuals.py │ │ ├── 1-Anscombe_quartet.py │ │ ├── 3-Annotated_heatmaps.py │ │ ├── 6-Scatterplot.py │ │ ├── 5-Faceted_logistic.py │ │ ├── 11-Plotting_large_distributions.py │ │ ├── 4-Grouped_violinplots.py │ │ ├── 10-Joint_kernel_density.py │ │ ├── 2-Distribution_plot.py │ │ ├── 7-Overlapping_densities.py │ │ └── Lecture4_3_Code.py │ └── Class Ex │ │ └── Class-Ex-Lecture4_3.py ├── 03-Matplotlib │ ├── Lecture Code │ │ ├── data1.txt │ │ ├── 5-Ploting_points.py │ │ ├── 1-First_Simple_Plot.py │ │ ├── 3-Plotting_multiple_curves.py │ │ ├── 12-Plotting_boxplots.py │ │ ├── 11-Pie_and_Histogram.py │ │ ├── 13-Plotting_triangulations.py │ │ ├── 8-Plotting_stacked_bar_charts.py │ │ ├── 10-Simple_Trick.py │ │ ├── 2-x_coordinate.py │ │ ├── 6-Plotting_Bar_Charts.py │ │ ├── 16-Color_list.py │ │ ├── 7-Plotting_multiple_bar_charts.py │ │ ├── 21- Marker_1.py │ │ ├── 9-Stacking_more_than_two.py │ │ ├── 24-Ticks.py │ │ ├── 15-Custom_Coloring.py │ │ ├── 14-Coloring.py │ │ ├── 4-Text_File_Data.py │ │ ├── 17-Using_colormaps.py │ │ ├── 23-Shapes.py │ │ ├── 18-Line_pattern_and_thickness.py │ │ ├── 20-Custom_shapes.py │ │ ├── 19-Marker.py │ │ └── 22-Annotation.py │ └── Class Ex │ │ ├── data2.txt │ │ └── Class-Ex-Lecture4_2.py ├── 01-Pyhton-Programming │ ├── 3- Lecture_3(Python Adavnce) │ │ ├── Lecture Code │ │ │ ├── output.txt │ │ │ ├── logic.py │ │ │ ├── 9-Modules.py │ │ │ ├── 1-Integer_Objects.py │ │ │ ├── 13-Filenames_and-paths.py │ │ │ ├── 7-Geometric_Points_Example.py │ │ │ ├── data.dat │ │ │ ├── 12-Reading_and_Writing.py │ │ │ ├── 6-Tuple_Objects.py │ │ │ ├── 10-Linear_Regression_Example.py │ │ │ ├── 11-Handling_Exceptions_Example.py │ │ │ ├── 2-String_Objects-Example.py │ │ │ ├── 5-Dictionary_Objects.py │ │ │ ├── 3-String_Objects_Example_2.py │ │ │ ├── 4-List_Objects.py │ │ │ └── 8-Employee_Example.py │ │ └── Class Ex │ │ │ ├── Class-Ex-Lecture3.py │ │ │ └── Class_Questions.py │ ├── 1- Lecture_1(Python Basics) │ │ ├── Lecture Code │ │ │ ├── 6-Expressions.py │ │ │ ├── 7-Arithmetic_Examples.py │ │ │ ├── 3-Floating_point_Types.py │ │ │ ├── 18-Iteration_Examples.py │ │ │ ├── 11-Nested_Conditionals.py │ │ │ ├── 10-Boolean_Expressions.py │ │ │ ├── 2-Variables_and_Assignment.py │ │ │ ├── 14-Definite_Loops.py │ │ │ ├── 1-Integer_values.py │ │ │ ├── 15-Times_table.py │ │ │ ├── 4-Control_Codes_within_Strings.py │ │ │ ├── 9-If_else_statement.py │ │ │ ├── 5-User_Input.py │ │ │ ├── 13-Nested_Conditionals_elif.py │ │ │ ├── 17-Infinite_Loops.py │ │ │ ├── 16-Break_and_Continue.py │ │ │ ├── 8-If_statement.py │ │ │ └── 12-Nested_Conditionals-Troubleshoot.py │ │ └── Class Ex │ │ │ └── Class-Ex-Lecture1.py │ └── 2- Lecture_2(Python Intermediate) │ │ ├── Lecture Code │ │ ├── 5- Random.py │ │ ├── 9-Global_Variables.py │ │ ├── 15-Slicing_Example.py │ │ ├── 7-Greatest_Common_Divisor.py │ │ ├── 18-Tuples.py │ │ ├── 8-Parameter_Passing.py │ │ ├── 10-Recursion.py │ │ ├── 1-Squar_Root_Example.py │ │ ├── 14-List_Assignment.py │ │ ├── 6-Simple_Function.py │ │ ├── 11-Documenting_Functions.py │ │ ├── 4-Time_Example_2.py │ │ ├── 2-Satellite_Problem.py │ │ ├── 3-Time_Example_1.py │ │ ├── 19-Dictionary_Example.py │ │ ├── 17-List_Permutation_Example.py │ │ ├── 12-List.py │ │ ├── 13-Using_List_Examples.py │ │ └── 16-Sorting_Example.py │ │ └── Class Ex │ │ └── Class-Ex-Lecture2.py ├── 06-Scipy │ ├── Lecture Code │ │ ├── image.png │ │ ├── 2-Documentation.py │ │ ├── 3-Image.py │ │ ├── 5-Matrix_creation.py │ │ ├── 9-Fit_and-polynomial.py │ │ ├── 14-Clusters_1.py │ │ ├── 7-Image compression.py │ │ ├── 7-Scipy_linag_Matrix_Hadam.py │ │ ├── 8-Interpolation.py │ │ ├── 8-Image compression.py │ │ ├── 12-Lorenz_Attractors.py │ │ ├── 4-Combination_of_arrays.py │ │ ├── 1-Scipy_vs_Numpy.py │ │ ├── 6-Operations_on_matrices.py │ │ ├── Clusters.py │ │ ├── 10-Curve_fitting.py │ │ ├── Lecture6_3_Code.py │ │ ├── 11-Advance-optimization.py │ │ ├── 13-Stats_and_tests.py │ │ ├── e.csv │ │ └── data.dat │ └── Class Ex │ │ └── Class-Ex-Lecture6.py ├── 02-Numpy │ └── Lecture Code │ │ ├── 12-Copy_No_Copy.py │ │ ├── 7-Universal_Functions.py │ │ ├── 11-Splitting_One_Array.py │ │ ├── 9-Changing_The_Shape_Array.py │ │ ├── 21-Indexing_Multi_dimensional_arrays.py │ │ ├── 1-Numpy_Types_Example.py │ │ ├── 3-Printing_Arrays-Examples.py │ │ ├── 19-Intrinsic_Numpy_array_creation.py │ │ ├── 17-Automatic_Reshaping.py │ │ ├── 4-Numpy_Basic_Operations.py │ │ ├── 15-Indexing_With_Boolean.py │ │ ├── 6-Numpy_Basic_Operations_2.py │ │ ├── 20-Indexing.py │ │ ├── 22-Structural_indexing_tools.py │ │ ├── 18-Converting_Python_array_like.py │ │ ├── 5-Numpy_Basic_Operations_1.py │ │ ├── 8-Indexing_Slicing_and_Iterating.py │ │ ├── 23-Structural_indexing_tools_2.py │ │ ├── 10-Stacking_Together.py │ │ ├── 16-The_ix_function.py │ │ ├── 13-Fancy_Indexing.py │ │ ├── 14-Fancy Indexing-Tricks.py │ │ └── 2-Array_Creation.py └── 05-Pandas │ ├── Lecture Code │ ├── 8-Pandas_Function_Application.py │ ├── 10-Pandas_Iterator.py │ ├── 15-Statistical_Column_Aggregation.py │ ├── 7-Pandas_Statistics.py │ ├── 11-Pandas_Sorting.py │ ├── 12-Pandas_Text Data.py │ ├── 21-Pandas_Importing Data.py │ ├── 2-Pandas_Series-Example.py │ ├── 20-Pandas_Visualization.py │ ├── 5-Pandas_DataFrame_Example.py │ ├── 9-Reindexing_and_Renaming.py │ ├── 13-Indexing_and_Selecting.py │ ├── 18-Merging_and_Joining.py │ ├── 1-Pandas_Series.py │ ├── 14-Pandas_Statistical_Functions.py │ ├── 4-DataFrame_Options.py │ ├── 19-Pandas_Concatenation.py │ ├── 16-Pandas_Missing_Data.py │ ├── 3-DataFrame_creation.py │ ├── 17-Pandas_GroupBy.py │ └── microbiome.csv │ └── Class Ex │ └── Class-Ex-Lecture5.py ├── Extra_Packages ├── Save_Console_Output │ ├── console.txt │ └── Save_Console.py ├── Cryptographic │ └── sample.py ├── Request_Package │ └── Sample_Request.py ├── ENV │ └── keyname.py ├── Cryptography_Package │ └── Sample_Crypto.py └── pyvis │ └── eval_code.py ├── .gitignore ├── NetworkX ├── Tutorial │ └── networkx_tutorial.pdf ├── Sample_Example.py └── Sample_Example_1.py ├── Supervised_Learning ├── Logitic_Regression │ ├── Data │ │ └── bank-additional.zip │ ├── Sample_Logistic_Reg_Exercise.py │ ├── Sample_Logistic_Reg_Example_wine.py │ └── Readme.md ├── Decision_Tree │ ├── Readme.md │ ├── DT │ │ ├── Data │ │ │ └── tennis.csv │ │ ├── Sample_DT_Exercise.py │ │ ├── Readme.md │ │ ├── Sample_DT_Example_Tennis.py │ │ └── Sample_DT_Example_balance.py │ └── DT-Graphing │ │ ├── Decision_tree_export_color.py │ │ ├── Decision_tree_export.py │ │ ├── Simple_Decision_Tree.py │ │ ├── plot_decision_regions.py │ │ ├── Decision_tree_graphviz_web.py │ │ └── Purchase_Simple_Example.py ├── SVM │ ├── Sample_KNN_Exercise.py │ └── Sample_KNN_Example_mushrom.py ├── Naive-Bayes │ ├── Sample_NB_Exercise.py │ ├── Sample_NB_Example.py │ └── Readme.md ├── KNN │ ├── Sample_KNN_Exercise.py │ ├── Sample_KNN_Example_iris.py │ ├── Readme.md │ └── Data │ │ └── iris.data.csv └── Random Forest │ ├── Sample_RF_Exercise.py │ ├── Readme.md │ └── Sample_RF_Example_breast.py └── Unsuperised_Learning ├── Pitfalss └── Pitfalls_Kmean.py ├── Kmean └── Sample_Kmean.py ├── Mean-Shift ├── Sample_Meanshift_iris.py └── Sample_Meanshift.py ├── Affinity_Propagation ├── Affnity_Propagation_iris.py └── Sample_Affinity_Propagation.py └── Agglomerative └── Sample_Agglomerative.py /Data_Structure/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MonteCarlo/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /EDA/Preprocessing/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LinearProgramming/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Python/07_lambda_filter_map/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Extra_Packages/Save_Console_Output/console.txt: -------------------------------------------------------------------------------- 1 | Amir 2 | -------------------------------------------------------------------------------- /Python/04-Seaborn/Lecture Code/data1.txt: -------------------------------------------------------------------------------- 1 | 0 0 2 | 1 1 3 | 2 4 4 | 4 16 5 | 5 25 6 | 6 36 -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/data1.txt: -------------------------------------------------------------------------------- 1 | 0 0 2 | 1 1 3 | 2 4 4 | 4 16 5 | 5 25 6 | 6 36 -------------------------------------------------------------------------------- /Python/03-Matplotlib/Class Ex/data2.txt: -------------------------------------------------------------------------------- 1 | 0 0 6 2 | 1 1 5 3 | 2 4 4 4 | 4 16 3 5 | 5 25 2 6 | 6 36 1 -------------------------------------------------------------------------------- /Data_Structure/Trie/Readme.md: -------------------------------------------------------------------------------- 1 | # Link 2 | https://wangyy395.medium.com/implement-a-trie-in-python-e8dd5c5fde3a -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /Demo/ 2 | /.idea/ 3 | *_Sol.py 4 | *_sol.py 5 | .Temp 6 | /Temp/ 7 | .env 8 | /Extra_Packages/ENV/.env 9 | -------------------------------------------------------------------------------- /NetworkX/Tutorial/networkx_tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amir-jafari/Data-Mining/HEAD/NetworkX/Tutorial/networkx_tutorial.pdf -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/output.txt: -------------------------------------------------------------------------------- 1 | This is a file example, 2 | I am writing on a file. 3 | -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amir-jafari/Data-Mining/HEAD/Python/06-Scipy/Lecture Code/image.png -------------------------------------------------------------------------------- /Data_Structure/PDF_Parsing/PDF_Fitz/Sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amir-jafari/Data-Mining/HEAD/Data_Structure/PDF_Parsing/PDF_Fitz/Sample.pdf -------------------------------------------------------------------------------- /Data_Structure/PDF_Parsing/PDF_plumber/Sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amir-jafari/Data-Mining/HEAD/Data_Structure/PDF_Parsing/PDF_plumber/Sample.pdf -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/logic.py: -------------------------------------------------------------------------------- 1 | def a(x,y): 2 | print(x and y) 3 | 4 | def o(x,y): 5 | print(x or y) -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/9-Modules.py: -------------------------------------------------------------------------------- 1 | import logic 2 | logic.a(True, False) 3 | logic.o(True, False) 4 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/07_lambda_filter_map/lambda_map_filter.py: -------------------------------------------------------------------------------- 1 | x = lambda a: a * 2 2 | print(list(map(x, [1, 2, 3]))) 3 | print(list(filter(lambda a: a % 2 == 0, [1, 2, 3, 4]))) 4 | -------------------------------------------------------------------------------- /Supervised_Learning/Logitic_Regression/Data/bank-additional.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amir-jafari/Data-Mining/HEAD/Supervised_Learning/Logitic_Regression/Data/bank-additional.zip -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/12-Copy_No_Copy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.arange(12) 3 | b = a 4 | print(b is a) 5 | c = a.view() 6 | print(c is a) 7 | d = a.copy() 8 | print(d is a) 9 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/5-Ploting_points.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | data = np.random.rand(1024, 2) 4 | plt.scatter(data[:,0], data[:,1]) 5 | plt.show() 6 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/6-Expressions.py: -------------------------------------------------------------------------------- 1 | x, y, z = 3, -4, 0 2 | x = -x 3 | y = -y 4 | z = -z 5 | print(x, y, z) 6 | x = +y 7 | print(x) 8 | print(10/3, 3/10, 10//3, 3//10) 9 | print(10%3, 3%10) -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/1-Integer_Objects.py: -------------------------------------------------------------------------------- 1 | x = 1 2 | print(dir(x)) 3 | print(x.__add__(3)) 4 | s = "abs" 5 | print(s.__add__("Car")) 6 | print(str.__add__(s, "Car")) 7 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/5- Random.py: -------------------------------------------------------------------------------- 1 | from random import randrange, seed 2 | seed(23) 3 | for i in range(0, 100): 4 | print(randrange(1, 1000), end=' ') 5 | print() 6 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/9-Global_Variables.py: -------------------------------------------------------------------------------- 1 | def get_input(): 2 | global arg1, arg2 3 | arg1 = float(input("Enter argument #1: ")) 4 | arg2 = float(input("Enter argument #2: ")) -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/1-First_Simple_Plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | x1 = np.arange(0,2*np.pi,0.002) 4 | y1 = np.sin(x1) 5 | plt.plot(x1, y1) 6 | plt.draw() 7 | plt.show() 8 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/7-Universal_Functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | B = np.arange(3) 3 | print(B) 4 | z = np.exp(B); print(z) 5 | z1 = np.sqrt(B); print(z1) 6 | C = np.array([2., -1., 4.]) 7 | z2 = np.add(B, C) 8 | print(z2) 9 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/13-Filenames_and-paths.py: -------------------------------------------------------------------------------- 1 | import os 2 | cwd = os.getcwd() 3 | print(cwd) 4 | print(os.path.exists('output.txt')) 5 | print(os.path.isdir('output.txt')) 6 | print(os.listdir(cwd)) 7 | print('#', 50 * "-") 8 | -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/7-Geometric_Points_Example.py: -------------------------------------------------------------------------------- 1 | class Point: 2 | def __init__(self, x, y): 3 | self.x = x 4 | self.y = y 5 | pt = Point(2.5, 6) 6 | print("(", pt.x, ",", pt.y, ")", sep="") 7 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/3-Plotting_multiple_curves.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | x4 = np.linspace(0, 2 * np.pi, 100) 4 | ya = np.sin(x4) 5 | yb = np.cos(x4) 6 | plt.plot(x4, ya,'r--') 7 | plt.plot(x4, yb) 8 | plt.show() 9 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/2-Documentation.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | 5 | import scipy.stats 6 | help(scipy.stats) 7 | help(scipy.stats.bayes_mvs) 8 | help(scipy.stats.kurtosis) 9 | numpy.info('random') 10 | print('#',50*"-") -------------------------------------------------------------------------------- /Extra_Packages/Cryptographic/sample.py: -------------------------------------------------------------------------------- 1 | from cryptography.fernet import Fernet 2 | # Put this somewhere safe! 3 | key = Fernet.generate_key() 4 | f = Fernet(key) 5 | token = f.encrypt(b"A really secret message. Not for prying eyes.") 6 | print(token) 7 | 8 | 9 | print(f.decrypt(token)) -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/12-Plotting_boxplots.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | data = np.random.randn(100) 4 | plt.boxplot(data) 5 | plt.show() 6 | 7 | data = np.random.randn(100, 5) 8 | plt.boxplot(data) 9 | plt.show() 10 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/7-Arithmetic_Examples.py: -------------------------------------------------------------------------------- 1 | degreesF = eval(input('Enter the temperature in degrees F: ')) 2 | # Perform the conversion 3 | degreesC = 5/9*(degreesF - 32) 4 | # Report the result 5 | print('temperature in degrees C: ', degreesC) -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/11-Pie_and_Histogram.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | data = np.array([5, 25, 50, 20]) 4 | plt.pie(data) 5 | plt.show() 6 | 7 | x12 = np.random.randn(1000) 8 | plt.hist(x12, bins = 20) 9 | plt.show() 10 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/data.dat: -------------------------------------------------------------------------------- 1 | 'Alex', 324, 10.50 2 | 'Wilma', 371, 12.19 3 | 'Amir', 129, 15.45 4 | 'Jack', 120, 16.00 5 | 'John', 412, 9.34 6 | 'Julie', 420, 9.15 7 | 'Jessica', 1038, 19.86 8 | 'Jane', 966, 19.86 9 | 'Juddy', 1210, 15.61 10 | -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/11-Splitting_One_Array.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.floor(10*np.random.random((2,12))) 3 | print(a) 4 | z4 = np.hsplit(a,3) # Split a into 3 5 | print(z4) 6 | # Split a after the third and the fourth column 7 | z5 = np.hsplit(a,(3,4)) 8 | print(z5) 9 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/9-Changing_The_Shape_Array.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.floor(10*np.random.random((3,4))) 3 | print(a) 4 | print(a.shape) 5 | print(a.ravel()) 6 | a.shape = (6, 2) 7 | print(a.T) 8 | print(a.resize((2,6))) 9 | print(a.reshape(3,-1)) 10 | a= print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/12-Reading_and_Writing.py: -------------------------------------------------------------------------------- 1 | fout = open('output.txt', 'w') 2 | print(fout) 3 | line1 = "This is a file example,\n" 4 | fout.write(line1) 5 | line2 = "I am writing on a file.\n" 6 | fout.write(line2) 7 | fout.close() 8 | print('#', 50 * "-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/13-Plotting_triangulations.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import matplotlib.tri as tri 4 | data = np.random.rand(100, 2) 5 | triangles = tri.Triangulation(data[:,0], data[:,1]) 6 | plt.triplot(triangles) 7 | plt.show() 8 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/8-Plotting_stacked_bar_charts.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | a = np.random.rand(4) 4 | b = np.random.rand(4) 5 | x9 = np.arange(4) 6 | plt.bar(x9, a, color = 'b') 7 | plt.bar(x9, b, color = 'r', bottom = a) 8 | plt.show() 9 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/04-Seaborn/Lecture Code/8-Scatterplot_Matrix.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | sns.set(style="ticks") 5 | 6 | df = sns.load_dataset("iris") 7 | sns.pairplot(df, hue="species") 8 | plt.show() 9 | print('#',50*"-") 10 | # ------------------ -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/10-Simple_Trick.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | BMI = np.array([5., 30., 45., 22.]) 4 | age = np.array( [5., 25., 50., 20.]) 5 | x11 = np.arange(4) 6 | plt.barh(x11, BMI, color = 'r') 7 | plt.barh(x11, -age, color = 'b') 8 | plt.show() 9 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/3-Floating_point_Types.py: -------------------------------------------------------------------------------- 1 | x = 5.62 2 | print(type(x)) 3 | pi = 3.14159 4 | print("Pi =", pi) 5 | print("or", 3.14, "short") 6 | avogadros_number = 6.022e23 7 | c = 2.998e8 8 | print("Avogadro's number =", avogadros_number) 9 | print("Speed of light =", c) -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/6-Tuple_Objects.py: -------------------------------------------------------------------------------- 1 | def printall(*args): 2 | print (args) 3 | printall(1, 2.0, '3') 4 | t = (7, 3) 5 | # divmod(t) --->Wrong 6 | divmod(*t) 7 | s = 'abc' 8 | t = [0, 1, 2] 9 | print(zip(s, t)) 10 | print(zip('Anne', 'Elk')) 11 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/21-Indexing_Multi_dimensional_arrays.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | y = np.arange(35).reshape(5,7) 3 | print(y[np.array([0,2,4]), np.array([0,1,2])]) 4 | #print(y[np.array([0,2,4]), np.array([0,1])]) 5 | print(y[np.array([0,2,4]), 1]) # broadcasting 6 | print(y[np.array([0,2,4])]) 7 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/2-x_coordinate.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | x2 = np.linspace(0, 2 * np.pi, 100) 4 | y2 = np.sin(x2) 5 | plt.plot(x2, y2) 6 | plt.show() 7 | x3 = np.linspace(1, 3, 100) 8 | y3 = x3 ** 2 - 2 * x3 + 1 9 | plt.plot(x3, y3) 10 | plt.show() 11 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/1-Numpy_Types_Example.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.arange(15).reshape(3, 5) 3 | print(a) 4 | print(a.shape) 5 | print(a.ndim) 6 | print(a.dtype.name) 7 | print(a.itemsize) 8 | print(a.size) 9 | print(type(a)) 10 | b = np.array([6, 7, 8]) 11 | print(b) 12 | print(type(b)) 13 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/3-Printing_Arrays-Examples.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.arange(6) # 1d array 3 | print(a) 4 | b = np.arange(12).reshape(4,3) # 2d array 5 | print(b) 6 | c = np.arange(24).reshape(2,3,4) # 3d array 7 | print(c) 8 | print(np.arange(10000)) 9 | print(np.arange(10000).reshape(100,100)) 10 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/15-Slicing_Example.py: -------------------------------------------------------------------------------- 1 | lst = [10, 20, 30, 40, 50, 60, 70, 80] 2 | print(lst) 3 | print(lst[0:3]) 4 | print(lst[4:8]) 5 | print(lst[2:5]) 6 | print(lst[-5:-3]) 7 | print(lst[:3]) 8 | print(lst[4:]) 9 | print(lst[:]) 10 | print(lst[-100:3]) 11 | print(lst[4:100]) 12 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/10-Linear_Regression_Example.py: -------------------------------------------------------------------------------- 1 | class simplenet: 2 | def __init__(self,var1,var2): 3 | self.a = var1 4 | self.b = var2 5 | 6 | def sim(self,p): 7 | return self.a*p + self.b 8 | 9 | net = simplenet(4.0,2.0) 10 | print (net.sim(3.0)) 11 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/18-Iteration_Examples.py: -------------------------------------------------------------------------------- 1 | val = eval(input('Enter number: ')) 2 | root = 1.0; 3 | diff = root*root - val 4 | while diff > 0.00000001 or diff < -0.00000001: 5 | root = (root + val/root) / 2 6 | print(root, 'squared is', root*root) 7 | diff = root*root - val 8 | print('Square root of', val, "=", root) -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/6-Plotting_Bar_Charts.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | data = [5, 10, 30, 8] 4 | plt.bar(range(len(data)), data) 5 | plt.show() 6 | print('#',50*"-") 7 | 8 | plt.bar(range(len(data)), data, width = 1.) 9 | plt.show() 10 | 11 | plt.barh(range(len(data)), data) 12 | plt.show() 13 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/11-Handling_Exceptions_Example.py: -------------------------------------------------------------------------------- 1 | # x = int(input("Please enter a small positive integer: ")) 2 | # print("x =", x) 3 | try: 4 | x = int(input("Please enter a small positive integer: ")) 5 | print("x =", x) 6 | except: 7 | print("Input cannot be parsed as an integer") 8 | print('#', 50 * "-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/19-Intrinsic_Numpy_array_creation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a1 = np.zeros((2, 3)) 3 | a2 = np.arange(10) 4 | print(a1); print(a2) 5 | a3= np.arange(2, 10, dtype=np.float) 6 | a4 = np.arange(2, 3, 0.1) 7 | print(a3); print(a4) 8 | a5 = np.linspace(1., 4., 6) 9 | a6 = np.indices((3,3)) 10 | print(a5); print(a6) 11 | print('#',50*"-") 12 | -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/3-Image.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | import scipy.misc 7 | img=scipy.misc.ascent() 8 | plt.imshow(img) 9 | plt.show() 10 | print(img[0:3,0:7]) 11 | print(img) 12 | img=scipy.misc.face() 13 | plt.imshow(img) 14 | plt.show() 15 | print(img[0:3,0:7]) 16 | print(img) 17 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/04-Seaborn/Lecture Code/9-Plotting_model_residuals.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | sns.set(style="whitegrid") 5 | 6 | rs = np.random.RandomState(7) 7 | x = rs.normal(2, 1, 75) 8 | y = 2 + 1.5 * x + rs.normal(0, 2, 75) 9 | 10 | sns.residplot(x, y, lowess=True, color="g") 11 | plt.show() 12 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/17-Automatic_Reshaping.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.arange(30) 3 | a.shape = 2,-1,5 # -1 means "whatever size" 4 | print(a.shape) 5 | print(a) 6 | a.shape = 6,5 7 | print(a.shape) 8 | print(a) 9 | x = np.arange(0,10,2) 10 | y = np.arange(5) 11 | m = np.vstack([x,y]) 12 | n = np.hstack([x,y]) 13 | print(m) 14 | print(n) 15 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/16-Color_list.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | values = np.random.random_integers(99, size = 50) 4 | color_set = ('.00', '.25', '.50', '.75') 5 | color_list = [color_set[(len(color_set) * val) // 100] for val in 6 | values] 7 | plt.bar(np.arange(len(values)), values, color = color_list) 8 | plt.show() 9 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/7-Plotting_multiple_bar_charts.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | data = np.random.rand(3,4) 4 | x8 = np.arange(4) 5 | plt.bar(x8 + 0.00, data[0], color = 'b', width = 0.25) 6 | plt.bar(x8 + 0.25, data[1], color = 'g', width = 0.25) 7 | plt.bar(x8 + 0.50, data[2], color = 'r', width = 0.25) 8 | plt.show() 9 | print('#',50*"-") -------------------------------------------------------------------------------- /Extra_Packages/Request_Package/Sample_Request.py: -------------------------------------------------------------------------------- 1 | import urllib.request 2 | 3 | url = "https://www.cnn.com" # Replace with your actual URL 4 | try: 5 | with urllib.request.urlopen(url) as response: 6 | data = response.read().decode() 7 | print(data) # This will print the HTML content of the page 8 | except urllib.error.HTTPError: 9 | print('HTTP request failed') -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/4-Numpy_Basic_Operations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.array( [20,30,40,50] ) 3 | b = np.arange( 4 ) 4 | print(b) 5 | c = a-b 6 | print(c) 7 | print(b**2) 8 | print(10*np.sin(a)) 9 | print(a<35) 10 | A = np.array( [[1,1], [0,1]] ) 11 | B = np.array( [[2,0], [3,4]] ) 12 | print(A*B) 13 | print(A.dot(B)) 14 | print(np.dot(A, B)) 15 | print('#',50*"-") -------------------------------------------------------------------------------- /Supervised_Learning/Decision_Tree/Readme.md: -------------------------------------------------------------------------------- 1 | # Install Graphviz 2 | 3 | ## windows 4 | - pip install pydotplus 5 | - Download https://graphviz.org/download/ 6 | - Add path 7 | ``` 8 | import os 9 | os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz/bin/' 10 | ``` 11 | 12 | # Mac 13 | - brew install graphviz 14 | 15 | # Linux 16 | - sudo apt install graphviz 17 | -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/15-Indexing_With_Boolean.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.arange(12).reshape(3,4) 3 | b = a > 4; print(b) 4 | print(a[b]) 5 | a[b] = 0 ; print(a) 6 | a = np.arange(12).reshape(3,4) 7 | b1 = np.array([False,True,True]) 8 | b2 = np.array([True,False,True,False]) 9 | print(a[b1,:]) 10 | print(a[b1]) 11 | print(a[:,b2]) 12 | print(a[b1,b2]) 13 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/21- Marker_1.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | plt.rc('text', usetex=True) 4 | plt.rc('font', family='serif') 5 | x16 = np.linspace(-4, 4, 1000) 6 | y16 = .25 * (x16 + 4) * (x16 + 1) * (x16 - 2) 7 | plt.title('A polynomial' r'$f(x)=\frac{1}{4}(x+4)(x+1)(x-2)$') 8 | plt.plot(x16, y16, c = 'k') 9 | plt.show() 10 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/11-Nested_Conditionals.py: -------------------------------------------------------------------------------- 1 | value = eval(input("Enter value in the range 0 to10: ")) 2 | if int(value) >= 0: # First check 3 | if int(value) <= 10: 4 | print("In range") 5 | print("Done") 6 | value = eval(input("Enter value in the range 0 to10: ")) 7 | if int(value) >= 0 and int(value) <= 10: 8 | print("In range") 9 | print("Done") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/7-Greatest_Common_Divisor.py: -------------------------------------------------------------------------------- 1 | def gcd(num1, num2): 2 | min = num1 if num1 < num2 else num2 3 | largestFactor = 1 4 | for i in range(1, min + 1): 5 | if num1 % i == 0 and num2 % i == 0: 6 | largestFactor = i 7 | return largestFactor 8 | L = gcd(24,6) 9 | print(L) 10 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/9-Stacking_more_than_two.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | a = np.random.rand(4) 4 | b = np.random.rand(4) 5 | c = np.random.rand(4) 6 | x10 = np.arange(4) 7 | plt.bar(x10, a, color = 'b' ) 8 | plt.bar(x10, b, color = 'g', bottom = a) 9 | plt.bar(x10, c, color = 'r', bottom = a + b) 10 | plt.show() 11 | print('#',50*"-") 12 | 13 | -------------------------------------------------------------------------------- /Extra_Packages/ENV/keyname.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------- 2 | from dotenv import load_dotenv 3 | import os 4 | # ----------------------------------------- 5 | # create a .env file in the directory and add the following line 6 | # keyname = 'Secret Key' 7 | load_dotenv() 8 | # ----------------------------------------- 9 | 10 | keyname = os.getenv('keyname') 11 | print(keyname) 12 | -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/18-Tuples.py: -------------------------------------------------------------------------------- 1 | a = (1, 2, 3, 4) 2 | print(a[1]) 3 | print(a[0:3]) 4 | b = a.index(1) 5 | print(b) 6 | t = ("a", "b", "mpilgrim", "z", "example") 7 | t[0] 8 | t[-1] 9 | # t.append("new") 10 | v = ('a', 'b', 'e') 11 | (x, y, z) = v 12 | print(list(range(2))) 13 | (MONDAY, TUESDAY, WEDNESDAY) = list(range(3)) 14 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/6-Numpy_Basic_Operations_2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.random.random((2,3)) 3 | print(a) 4 | print(a.sum()) 5 | print(a.min()) 6 | print(a.max()) 7 | b = np.arange(12).reshape(3,4) 8 | print(b) 9 | print(b.sum(axis=0)) # sum of each column 10 | print(b.min(axis=1)) # min of each row 11 | print(b.cumsum(axis=1)) # cumulative sum along each row 12 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/8-Parameter_Passing.py: -------------------------------------------------------------------------------- 1 | def increment(x): 2 | print("Beginning execution of increment, x =", x) 3 | x += 1 4 | print("Ending execution of increment, x =", x) 5 | def main(): 6 | x = 5 7 | print("Before increment, x =", x) 8 | increment(x) 9 | print("After increment, x =", x) 10 | main() 11 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/20-Indexing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | x = np.arange(10) 3 | print(x[1]) 4 | print(x[-2]) 5 | x.shape = (2, 5) 6 | print(x[1, 3]) 7 | print(x[1,-1]) 8 | print(x[0]) 9 | print(x[0][2]) 10 | print(x[2:5]) 11 | print(x[:-7]) 12 | print(x[1:7:2]) 13 | y = np.arange(35).reshape(5,7) 14 | print(y[1:5:2,::3]) 15 | v = np.arange(10,1,-1) 16 | print(v[np.array([3, 3, 1, 8])]) 17 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/24-Ticks.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import matplotlib.ticker as ticker 4 | X = np.linspace(-15, 15, 1000) 5 | Y = np.sinc(X) 6 | ax = plt.axes() 7 | ax.xaxis.set_major_locator(ticker.MultipleLocator(5)) 8 | ax.xaxis.set_minor_locator(ticker.MultipleLocator(1)) 9 | plt.grid(True, which='both') 10 | plt.plot(X, Y) 11 | plt.show() 12 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/04-Seaborn/Lecture Code/1-Anscombe_quartet.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | sns.set(style="ticks") 5 | 6 | df = sns.load_dataset("anscombe") 7 | 8 | sns.lmplot(x="x", y="y", col="dataset", hue="dataset", data=df, 9 | col_wrap=2, ci=None, palette="muted", 10 | scatter_kws={"s": 50, "alpha": 1}) 11 | 12 | plt.show() 13 | print('#',50*"-") -------------------------------------------------------------------------------- /NetworkX/Sample_Example.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import matplotlib.pyplot as plt 3 | 4 | # creating an empty graph object 5 | G = nx.Graph() 6 | 7 | # adding nodes to the graph 8 | G.add_node(1) 9 | G.add_nodes_from([2, 3, 4]) 10 | 11 | # adding edges to the graph 12 | G.add_edge(1, 2) 13 | G.add_edges_from([(2, 3), (1, 3), (1,4), (1,1)]) 14 | 15 | # display the graph 16 | nx.draw(G, with_labels = True) 17 | plt.show() -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/2-String_Objects-Example.py: -------------------------------------------------------------------------------- 1 | word1 = 'Wow' 2 | word2 = 'Wow' 3 | print('Equal:',word1 == word2, 4 | ' Alias:',word1 is word2) 5 | name = input("Please enter your name: ") 6 | print("Hello " + name.upper() + ", how are you?") 7 | word = "ABCD" 8 | print(word.rjust(15, "*")) 9 | print(word.rjust(15, ">")) 10 | print(word.rjust(10)); print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/10-Boolean_Expressions.py: -------------------------------------------------------------------------------- 1 | x = 1 2 | y = 2 3 | b = (x == 1) 4 | b = (x != 1) 5 | b = (x == 1 and y == 2) 6 | b = (x != 1 and y == 2) 7 | b = (x == 1 and y != 2) 8 | b = (x != 1 and y != 2) 9 | b = (x == 1 or y == 2) 10 | b = (x != 1 or y == 2) 11 | b = (x == 1 or y != 2) 12 | b = (x != 1 or y != 2) 13 | if x == 1 or 2 or 3: 14 | print("OK") 15 | x == 1 or 2 or 3 -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/2-Variables_and_Assignment.py: -------------------------------------------------------------------------------- 1 | x = 2 2 | print(x) 3 | print('x') 4 | # 5 = x 5 | print('x = ' + str(x)) 6 | x = 20 7 | print('x = ' + str(x)) 8 | x , y, z = 20, 40, -30 9 | print('x =', x, ' y =', y, ' z =', z) 10 | a = 1 11 | print('First, variable a has value', a, 'and type', type(a)) 12 | a = 'abc' 13 | print('Now, variable a has value', a, 'and type', type(a)) -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/10-Recursion.py: -------------------------------------------------------------------------------- 1 | def factorial(n): 2 | if n == 0: 3 | return 1 4 | else: 5 | return n * factorial(n - 1) 6 | def main(): 7 | # Try out the factorial function 8 | print(" 0! = ", factorial(0)) 9 | print(" 1! = ", factorial(1)) 10 | print(" 6! = ", factorial(6)) 11 | print("10! = ", factorial(10)) 12 | main() 13 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/22-Structural_indexing_tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | x = np.arange(5) 3 | print(x[:,np.newaxis]) 4 | print(x[np.newaxis,:]) 5 | print(x[:,np.newaxis] + x[np.newaxis,:]) 6 | z = np.arange(81).reshape(3,3,3,3) 7 | print(z[1,...,2]) 8 | print(z[1,:,:,2]) 9 | x = np.arange(10) 10 | x[2:7] = 1 11 | x[2:7] = np.arange(5) 12 | x = np.arange(0, 50, 10) 13 | x[np.array([1, 1, 3, 1])] += 1 14 | print(x) 15 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/04-Seaborn/Lecture Code/3-Annotated_heatmaps.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | sns.set() 6 | 7 | flights_long = sns.load_dataset("flights") 8 | 9 | flights = flights_long.pivot("month", "year", "passengers") 10 | 11 | 12 | f, ax = plt.subplots(figsize=(9, 6)) 13 | 14 | sns.heatmap(flights, annot=True, fmt="d", linewidths=.5, ax=ax) 15 | 16 | plt.show() 17 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/14-Definite_Loops.py: -------------------------------------------------------------------------------- 1 | count = 0 2 | while count <= 3: 3 | print(count) 4 | count += 1 5 | entry = 0 6 | sum = 0 7 | print("Enter numbers to sum, negative number ends list:") 8 | while entry >= 0: 9 | entry = int(input()) 10 | if int(entry) >= 0: 11 | sum += int(entry) 12 | print("Sum =", sum) 13 | n = 1 14 | stop = int(input()) 15 | while n <= stop: 16 | print(n) 17 | n += 1 -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/18-Converting_Python_array_like.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | x1 = np.float32(1.0) 3 | x2 = np.int_([1,2,4]) 4 | x3 = np.arange(3, dtype=np.uint8) 5 | x4 = np.array([1, 2, 3], dtype='f') 6 | print(x1, x2, x3, x4) 7 | x5 = x3.astype(float) 8 | print(x5) 9 | print(x5.dtype) 10 | x6 = np.dtype(int) 11 | x7 = np.array([2,3,1,0]) 12 | x8 = np.array([2, 3, 1, 0]) 13 | x9 = np.array([[1,2.0],[0,0],(1+1j,3.)]) 14 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/5-Dictionary_Objects.py: -------------------------------------------------------------------------------- 1 | def histogram(s): 2 | d = dict() 3 | for c in s: 4 | if c not in d: 5 | d[c] = 1 6 | else: 7 | d[c] += 1 8 | return d 9 | h = histogram('brontosaurus') 10 | print(h) 11 | def print_hist(h): 12 | for c in h: 13 | print( c, h[c]) 14 | h = histogram('parrot') 15 | print_hist(h) 16 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/5-Numpy_Basic_Operations_1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import pi 3 | 4 | a = np.ones((2,3), dtype=int) 5 | b = np.random.random((2,3)) 6 | a *= 3 7 | print(a) 8 | b += a 9 | print(b) 10 | # print(a += b) 11 | a = np.ones(3, dtype=np.int32) 12 | b = np.linspace(0,pi,3) 13 | print(b.dtype.name) 14 | c = a+b; print(c) 15 | print(c.dtype.name) 16 | d = np.exp(c*1j); print(d) 17 | print(d.dtype.name) 18 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/1-Integer_values.py: -------------------------------------------------------------------------------- 1 | 1 + 2 + 4 + 10 + 3 2 | print(1 + 2 + 4 + 10 + 3) 3 | print(10) 4 | print("10") 5 | print('10') 6 | print("Amir") 7 | print('Amir') 8 | # print(Amir)---> Error 9 | print(type(4)) 10 | print(type("4")) 11 | print(str(4)) 12 | print(int('5')) 13 | # int("Hi")---> Error 14 | print('5' + "10") 15 | print('abc' + 'efg') 16 | # print(5 + '10') ---> Error 17 | print(2 + int('11')) -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/1-Squar_Root_Example.py: -------------------------------------------------------------------------------- 1 | from math import sqrt 2 | num = eval(input("Enter number: ")) 3 | root = sqrt(num); 4 | print("Square root of", num, "=", root) 5 | x = 16 6 | print(sqrt(18)) 7 | print(sqrt(x)) 8 | print(sqrt(2 * x - 5)) 9 | y = sqrt(x) 10 | print(y) 11 | y = 2 * sqrt(x + 10) - 3 12 | print(y) 13 | y = sqrt(sqrt(56.0)) 14 | print(y) 15 | print(sqrt(int('23'))) 16 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/04-Seaborn/Lecture Code/6-Scatterplot.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | sns.set(style="whitegrid", palette="muted") 6 | 7 | iris = sns.load_dataset("iris") 8 | 9 | iris = pd.melt(iris, "species", var_name="measurement") 10 | 11 | sns.swarmplot(x="measurement", y="value", hue="species", 12 | palette=["r", "c", "y"], data=iris) 13 | plt.show() 14 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/15-Times_table.py: -------------------------------------------------------------------------------- 1 | print(" 1 2 3 4 5 6 7 8 9 10") 2 | print(" +----------------------------------------") 3 | for row in range(1, 11): 4 | if row < 10: 5 | print(" ", end="") 6 | print(row, "| ", end="") 7 | for column in range(1, 11): 8 | product = row*column 9 | if product < 100: 10 | print(end=" ") 11 | if product < 10: 12 | print(end=" ") 13 | print(product, end=" ") 14 | print() -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/4-Control_Codes_within_Strings.py: -------------------------------------------------------------------------------- 1 | print('A\nB\nC') 2 | print('D\tE\tF') 3 | print('WX\bYZ') #---> Backspace 4 | print('1\a2\a3\a4\a5\a6') #---> Alert 5 | print("Did you know that 'word' is a word?") 6 | print('Did you know that "word" is a word?') 7 | print('Did you know that \'word\' is a word?') 8 | print("Did you know that \"word\" is a word?") 9 | filename = 'C:\\Users\\rick' 10 | print(filename) -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/9-If_else_statement.py: -------------------------------------------------------------------------------- 1 | dividend, divisor = eval(input('Please enter two numbers: ')) 2 | if int(divisor) != 0: 3 | print(str(dividend)+ '/'+ str(divisor)+ "=", int(dividend)/int(divisor)) 4 | else: 5 | print('Division by zero is not allowed') 6 | a1 = 1 - 1 7 | a2 = 2 - 2 8 | print('a1 =', a1, ' a2 =', a2) 9 | if a1 == a2: 10 | print('They are Same.') 11 | else: 12 | print('They are Different.') -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/8-Pandas_Function_Application.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | def adder(ele1,ele2): 6 | return ele1+ele2 7 | 8 | df = pd.DataFrame(np.random.rand(2,2),columns=['col1','col2']) 9 | print(df) 10 | df.pipe(adder, 2) 11 | print(df.apply(np.mean)) 12 | print(df.apply(np.mean, axis=1)) 13 | 14 | df['col1'].map(lambda x:x*2) 15 | print(df.apply(np.mean, axis=1)) 16 | print('#',50*"-") -------------------------------------------------------------------------------- /Extra_Packages/Cryptography_Package/Sample_Crypto.py: -------------------------------------------------------------------------------- 1 | from cryptography.fernet import Fernet 2 | 3 | # Generate a Key and Instantiate a Fernet Instance 4 | key = Fernet.generate_key() 5 | cipher_suite = Fernet(key) 6 | 7 | # Text to be encrypted 8 | text = b"Hello, World" 9 | cipher_text = cipher_suite.encrypt(text) 10 | print("Encrypted Text: ", cipher_text) 11 | 12 | # Decrypt the Text 13 | plaintext = cipher_suite.decrypt(cipher_text) 14 | print("Decrypted Text: ", plaintext) -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/8-Indexing_Slicing_and_Iterating.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.arange(10)**3; print(a) 3 | print(a[2]) 4 | print(a[2:5]) 5 | a[:6:2] = -1000 6 | print(a[ : :-1]) 7 | a = np.arange(10)**3 8 | for i in a: 9 | print(np.power(i,1/3)) 10 | def f(x,y): 11 | return 10*x+y 12 | b = np.fromfunction(f,(5,4),dtype=int); print(b) 13 | print(b[2,3]) 14 | print(b[0:5, 1]) 15 | print(b[ : ,1]) 16 | print(b[1:3, : ]) 17 | print(b[-1]) 18 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/5-Matrix_creation.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | 5 | A = numpy.array([1,2,3]) 6 | print(A) 7 | B = A[::-1].copy() 8 | B[0]=123 9 | print(B) 10 | C = A + B 11 | C = A - B 12 | dotProduct1 = numpy.dot(A, B) 13 | print(dotProduct1) 14 | Product = (A* B) 15 | print(Product) 16 | dotProduct2 = (A* B).sum() 17 | print(dotProduct2) 18 | crossProduct = numpy.cross(A,B) 19 | print(crossProduct) 20 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/23-Structural_indexing_tools_2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.array([1.0, 2.0, 3.0]) 3 | b = np.array([2.0, 2.0, 2.0]) 4 | print(a * b) 5 | a = np.array([1.0, 2.0, 3.0]) 6 | b = 2.0 7 | print(a * b) 8 | x = np.arange(4) 9 | xx = x.reshape(4,1) 10 | y = np.ones(5) 11 | z = np.ones((3,4)) 12 | print(x.shape) 13 | print(y.shape) 14 | print(xx.shape) 15 | print(y.shape) 16 | print((xx + y).shape) 17 | print(xx + y) 18 | print((x + z).shape) 19 | print(x + z) 20 | -------------------------------------------------------------------------------- /Python/04-Seaborn/Lecture Code/5-Faceted_logistic.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | sns.set(style="darkgrid") 6 | 7 | df = sns.load_dataset("titanic") 8 | 9 | pal = dict(male="#6495ED", female="#F08080") 10 | 11 | g = sns.lmplot(x="age", y="survived", col="sex", hue="sex", data=df, 12 | palette=pal, y_jitter=.02, logistic=True) 13 | 14 | g.set(xlim=(0, 80), ylim=(-.05, 1.05)) 15 | 16 | plt.show() 17 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/5-User_Input.py: -------------------------------------------------------------------------------- 1 | print('Please enter your First Name: ') 2 | x = input() 3 | print('Text entered:', x) 4 | print('Type:', type(x)) 5 | print('Please enter an integer value:') 6 | x = input() 7 | print('Please enter another integer value:') 8 | y = input() 9 | w, x, y, z = 10, 15, 20, 25 10 | print(w, x, y, z) 11 | print(w, x, y, z, sep=',') 12 | print(w, x, y, z, sep='') 13 | print(w, x, y, z, sep=':') 14 | print(w, x, y, z, sep='-----') -------------------------------------------------------------------------------- /Python/04-Seaborn/Lecture Code/11-Plotting_large_distributions.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | 6 | sns.set(style="whitegrid") 7 | 8 | diamonds = sns.load_dataset("diamonds") 9 | clarity_ranking = ["I1", "SI2", "SI1", "VS2", "VS1", "VVS2", "VVS1", "IF"] 10 | 11 | sns.boxplot(x="clarity", y="carat", 12 | color="b", order=clarity_ranking, 13 | data=diamonds) 14 | plt.show() 15 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/04-Seaborn/Lecture Code/4-Grouped_violinplots.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | sns.set(style="whitegrid", palette="pastel", color_codes=True) 5 | 6 | tips = sns.load_dataset("tips") 7 | 8 | sns.violinplot(x="day", y="total_bill", hue="smoker", 9 | split=True, inner="quart", 10 | palette={"Yes": "y", "No": "b"}, 11 | data=tips) 12 | 13 | sns.despine(left=True) 14 | plt.show() 15 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/14-List_Assignment.py: -------------------------------------------------------------------------------- 1 | a = [1, 2, 3, 4] 2 | b = [1, 2, 3, 4] 3 | print('Is ', a, ' equal to ', b, '?', sep='', end=' ') 4 | print(a == b) 5 | print('Are ', a, ' and ', b, ' aliases?', sep='', end=' ') 6 | print(a is b) 7 | c = [10, 20, 30, 40] 8 | d = c 9 | print('Is ', c, ' equal to ', d, '?', sep='', end=' ') 10 | print(c == d) 11 | print('Are ', c, ' and ', d, ' aliases?', sep='', end=' ') 12 | print(c is d) 13 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/3-String_Objects_Example_2.py: -------------------------------------------------------------------------------- 1 | s = " ABCDEFGHBCDIJKLMNOPQRSBCDTUVWXYZ " 2 | print("[", s, "]", sep="") 3 | s = s.strip() 4 | print("[", s, "]", sep="") 5 | print(s.count("BCD")) 6 | s = "ABCDEFGHIJK" 7 | print(s) 8 | for i in range(len(s)): 9 | print("[", s[i], "]", sep="", end="") 10 | print() 11 | for ch in s: 12 | print("<", ch, ">", sep="", end="") 13 | print() 14 | s = "ABCDEFGHIJK" 15 | print(len(s) == s.__len__()) 16 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/04-Seaborn/Lecture Code/10-Joint_kernel_density.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | sns.set(style="white") 6 | 7 | rs = np.random.RandomState(5) 8 | mean = [0, 0] 9 | cov = [(1, .5), (.5, 1)] 10 | x1, x2 = rs.multivariate_normal(mean, cov, 500).T 11 | 12 | x1 = pd.Series(x1, name="$X_1$") 13 | x2 = pd.Series(x2, name="$X_2$") 14 | 15 | 16 | g = sns.jointplot(x1, x2, kind="kde", height=7, space=0) 17 | plt.show() 18 | print('#',50*"-") -------------------------------------------------------------------------------- /Supervised_Learning/Decision_Tree/DT/Data/tennis.csv: -------------------------------------------------------------------------------- 1 | outlook,temp,humidity,windy,play 2 | sunny,hot,high,false,no 3 | sunny,hot,high,true,no 4 | overcast,hot,high,false,yes 5 | rainy,mild,high,false,yes 6 | rainy,cool,normal,false,yes 7 | rainy,cool,normal,true,no 8 | overcast,cool,normal,true,yes 9 | sunny,mild,high,false,no 10 | sunny,cool,normal,false,yes 11 | rainy,mild,normal,false,yes 12 | sunny,mild,normal,true,yes 13 | overcast,mild,high,true,yes 14 | overcast,hot,normal,false,yes 15 | rainy,mild,high,true,no 16 | -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/13-Nested_Conditionals_elif.py: -------------------------------------------------------------------------------- 1 | value = eval(input("Enter a value from range 0 to 5: ")) 2 | if int(value) < 0: 3 | print("Too small") 4 | elif int(value) == 0: 5 | print("zero") 6 | elif int(value) == 1: 7 | print("one") 8 | elif int(value) == 2: 9 | print("two") 10 | elif int(value) == 3: 11 | print("three") 12 | elif int(value) == 4: 13 | print("four") 14 | elif int(value) == 5: 15 | print("five") 16 | else: 17 | print("Too large") 18 | print("Done") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/10-Stacking_Together.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.floor(10*np.random.random((2,2)));print(a) 3 | b = np.floor(10*np.random.random((2,2)));print(b) 4 | z = np.vstack((a,b)); print(z) 5 | z1 = np.hstack((a,b)); print(z1) 6 | from numpy import newaxis 7 | z2 = np.column_stack((a,b)) 8 | a = np.array([4.,2.]) 9 | b = np.array([2.,8.]) 10 | print(a[:,newaxis]) 11 | z3= np.column_stack((a[:,newaxis],b[:,newaxis])) 12 | print(z3) 13 | z4= np.vstack((a[:,newaxis],b[:,newaxis])); print(z4) 14 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/10-Pandas_Iterator.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | df = pd.DataFrame({ 6 | 'A': np.linspace(0,stop=20-1,num=20), 7 | 'B': np.random.rand(20), 8 | 'D': np.random.normal(100, 10, size=(20)).tolist() 9 | }) 10 | print(df) 11 | for col in df: 12 | print(col) 13 | 14 | for key,value in df.iteritems(): 15 | print(key,value) 16 | 17 | for row_index,row in df.iterrows(): 18 | print (row_index,row) 19 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/17-Infinite_Loops.py: -------------------------------------------------------------------------------- 1 | # while True: 2 | # Do something forever. . . 3 | MAX = 20 4 | n = 21 5 | while n <= MAX: 6 | factor = 1 7 | print(end=str(n) + ': ') 8 | while factor <= n: 9 | if n % factor == 0: 10 | print(factor, end=' ') 11 | factor += 1 12 | print() 13 | n += 1 14 | 15 | MAX = 20 16 | for n in range(1, MAX + 1): 17 | print(end=str(n) + ': ') 18 | for factor in range(1, n + 1): 19 | if n % factor == 0: 20 | print(factor, end=' ') 21 | print() -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/6-Simple_Function.py: -------------------------------------------------------------------------------- 1 | def prompt(): 2 | print("Please enter an integer value: ", end="") 3 | print("This program adds together two integers.") 4 | prompt() 5 | value1 = int(input()) 6 | prompt() 7 | value2 = int(input()) 8 | sum = value1 + value2 9 | print(value1, "+", value2, "=", sum) 10 | def count_to_n(n): 11 | for i in range(1, n + 1): 12 | print(i) 13 | print("Going to count to five . . .") 14 | count_to_n(5) 15 | count_to_n(10) 16 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/11-Documenting_Functions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Contains the definition of the is_prime function 3 | ''' 4 | from math import sqrt 5 | 6 | def is_prime(n): 7 | ''' 8 | Returns True if non-negative integer n is prime; 9 | otherwise, returns false 10 | ''' 11 | trial_factor = 2 12 | root = sqrt(n) 13 | while trial_factor <= root: 14 | if n % trial_factor == 0: 15 | return False 16 | return True 17 | print(help(is_prime)) 18 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/4-Time_Example_2.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | max_value = 10000 3 | count = 0 4 | start_time = time() 5 | for value in range(2, max_value + 1): 6 | is_prime = True 7 | for trial_factor in range(2, value): 8 | if value % trial_factor == 0: 9 | is_prime = False 10 | break 11 | if is_prime: 12 | count += 1 13 | print() 14 | elapsed = time() - start_time 15 | print("Count:", count, " Elapsed time:", elapsed, "sec") 16 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/15-Custom_Coloring.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | a = np.random.standard_normal((100, 2)) 4 | a += np.array((-2, -2)) # Center 5 | b = np.random.standard_normal((100, 2)) 6 | b += np.array((2, 2)) # Center 7 | plt.scatter(a[:,0], a[:,1], color = '.2') 8 | plt.scatter(b[:,0], b[:,1], color = '.8') 9 | plt.show() 10 | print('#',50*"-") 11 | 12 | data = np.random.standard_normal((100, 2)) 13 | plt.scatter(data[:,0], data[:,1], color = '1.0', edgecolor='0.0') 14 | plt.show() 15 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/14-Coloring.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | def pdf(X, mu, sigma): 4 | a = 1. / (sigma * np.sqrt(2. * np.pi)) 5 | b = -1. / (2. * sigma ** 2) 6 | return a * np.exp(b * (X - mu) ** 2) 7 | x13 = np.linspace(-6, 6, 1000) 8 | for i in range(5): 9 | samples = np.random.standard_normal(50) 10 | mu, sigma = np.mean(samples), np.std(samples) 11 | plt.plot(x13, pdf(x13, mu, sigma), color = '.75') 12 | plt.plot(x13, pdf(x13, 0., 1.), color = 'k') 13 | plt.show() 14 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/2-Satellite_Problem.py: -------------------------------------------------------------------------------- 1 | from math import sqrt, sin, cos, pi, radians 2 | I_x = 100 3 | I_y = 0 4 | Degree = 10 5 | x, y = eval(input("Enter satellite coordinates (x,y):")) 6 | d1 = sqrt((I_x - x)*(I_x - x) + (I_y - y)*(I_y - y)) 7 | x_old = x 8 | x = x*(cos(radians(Degree))) - y*(sin(radians(Degree))) 9 | y = x_old*(sin(radians(Degree))) + y*(cos(radians(Degree))) 10 | d2 = sqrt((I_x - x)*(I_x - x) + (I_y - y)*(I_y - y)) 11 | print("Difference in distances: %.3f" % (d2 - d1)) 12 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/15-Statistical_Column_Aggregation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | df = pd.DataFrame(np.random.randn(15, 3), 6 | index = pd.date_range('1/1/2019', periods=15), 7 | columns = ['A', 'B', 'C']) 8 | 9 | R = df.rolling(window=3,min_periods=1) 10 | print(R) 11 | print(R.aggregate(np.sum)) 12 | print(R['A'].aggregate(np.sum)) 13 | print(R[['A','B']].aggregate([np.mean,np.std])) 14 | print(R.aggregate({'A' : np.sum,'B' : np.count_nonzero})) 15 | print('#',50*"-") 16 | # --------------------- -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/3-Time_Example_1.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | print("Enter your name: ", end="") 3 | start_time = time() 4 | name = input() 5 | elapsed = time() - start_time 6 | print(name, "It took you", elapsed, "seconds to type") 7 | 8 | sum = 0 9 | start = time() 10 | for n in range(1, 10001): 11 | sum += n 12 | elapsed = time() - start 13 | print("sum:", sum, "time:", elapsed) 14 | 15 | from time import sleep 16 | for count in range(10, -1, -1): 17 | print(count) 18 | sleep(1) 19 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/4-Text_File_Data.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | x6 = [] 4 | y6 = [] 5 | for line in open('data1.txt', 'r'): 6 | data = [float(s) for s in line.split()] 7 | x6.append(data[0]) 8 | y6.append(data[1]) 9 | plt.plot(x6, y6) 10 | plt.show() 11 | 12 | with open('data1.txt', 'r') as f: 13 | x7, y7 = zip(*[[float(s) for s in line.split()] for line in f]) 14 | plt.plot(x7, y7) 15 | plt.show() 16 | 17 | data = np.loadtxt('data1.txt') 18 | plt.plot(data[:,0], data[:,1]) 19 | plt.show() 20 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/7-Pandas_Statistics.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | d = {'Name':pd.Series(['a','b','c','d']), 6 | 'Age':pd.Series([10,15,20,30]), 7 | 'Rating':pd.Series([4,3,2,1])} 8 | 9 | 10 | df = pd.DataFrame(d) 11 | print(df) 12 | print(df.sum()) 13 | print(df.sum(1)) 14 | 15 | print(df.mean()) 16 | print(df.std()) 17 | print(df.count()) 18 | print(df.min()) 19 | print(df.median()) 20 | print(df.mode()) 21 | print(df.cumsum()) 22 | print (df.describe(include='all')) 23 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/9-Fit_and-polynomial.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | import scipy.interpolate 5 | x=scipy.linspace(-1,1,10) 6 | xn=scipy.linspace(-1,1,1000) 7 | y=scipy.sin(x) 8 | polynomial=scipy.interpolate.lagrange(x, scipy.sin(x)) 9 | plt.plot(xn,polynomial(xn),x,y,'or') 10 | plt.show() 11 | 12 | x=numpy.array([0,0,1,1,2,2]) 13 | y=numpy.array([0,0,1,0,2,0]) 14 | interp=scipy.interpolate.KroghInterpolator(x,y) 15 | xn=numpy.linspace(0,2,20) 16 | plt.plot(x,y,'o',xn,interp(xn),'r') 17 | plt.show() 18 | print('#',50*"-") -------------------------------------------------------------------------------- /Extra_Packages/Save_Console_Output/Save_Console.py: -------------------------------------------------------------------------------- 1 | import sys 2 | # ------------------------------------------------------------------------------------------------------------------- 3 | old_stdout = sys.stdout 4 | log_file = open("console.txt", "w") 5 | sys.stdout = log_file 6 | # ------------------------------------------------------------------------------------------------------------------- 7 | print('Amir') 8 | # ------------------------------------------------------------------------------------------------------------------- 9 | sys.stdout = old_stdout 10 | log_file.close() -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/19-Dictionary_Example.py: -------------------------------------------------------------------------------- 1 | d = {"server":"one","database":"master"} 2 | d["database"] 3 | d["database"] = "pubs" 4 | d["uid"] = "sa" 5 | d["retrycount"] = 3 6 | d[42] = "douglas" 7 | del d[42] 8 | d.clear() 9 | empty_dict = {} 10 | my_dict = {'a': 1, 'b': 2, 'c': "3"} 11 | print(my_dict['a']) 12 | del my_dict['b'] 13 | print(my_dict.get('e')) 14 | print(my_dict.keys()) 15 | print(my_dict.values()) 16 | print(my_dict.items()) 17 | print('c' in my_dict) 18 | print('#',50*"-") 19 | 20 | s = 's' 21 | s.capitalize() -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/11-Pandas_Sorting.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | df1=pd.DataFrame(np.random.rand(3, 2), index=[1,6,4],columns=['col2', 'col1']) 6 | print(df1) 7 | 8 | df2 = df1.sort_index() 9 | df3 = df1.sort_index(ascending=False) 10 | df4 = df1.sort_index(axis=1) 11 | df5 = df1.sort_values(by='col1') 12 | df5 = df1.sort_values(by=['col1', 'col2']) 13 | df6 = df1.sort_values(by='col1', kind='mergesort') 14 | 15 | print(df2) 16 | print(df3) 17 | print(df4) 18 | print(df5) 19 | print(df6) 20 | print('#',50*"-") -------------------------------------------------------------------------------- /NetworkX/Sample_Example_1.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import matplotlib.pyplot as plt 3 | # Create a graph 4 | G = nx.Graph() 5 | 6 | # Add nodes 7 | G.add_node(1) 8 | G.add_node(2) 9 | G.add_node(3) 10 | G.add_node(4) 11 | 12 | # Add edges 13 | G.add_edge(1, 2, weight=2) 14 | G.add_edge(1, 3, weight=10) 15 | G.add_edge(2, 4, weight=1) 16 | G.add_edge(3, 4, weight=2) 17 | 18 | # display the graph 19 | nx.draw(G, with_labels = True) 20 | plt.show() 21 | 22 | # Use Dijkstra's algorithm to find the shortest path 23 | path = nx.dijkstra_path(G, 1, 4, weight='weight') 24 | 25 | print(path) -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/17-List_Permutation_Example.py: -------------------------------------------------------------------------------- 1 | def permute(prefix, suffix): 2 | suffix_size = len(suffix) 3 | if suffix_size == 0: 4 | print(prefix) 5 | else: 6 | for i in range(0, suffix_size): 7 | new_pre = prefix + [suffix[i]] 8 | new_suff = suffix[:i] + suffix[i + 1:] 9 | permute(new_pre, new_suff) 10 | def print_permutations(lst): 11 | permute([], lst) 12 | def main(): 13 | a = [1, 2, 3, 4] 14 | print_permutations(a) 15 | main() 16 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/12-Pandas_Text Data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | s = pd.Series(['Amir', 'Brian James', 'Luo', '@gmail','1234','AmirJafari']) 6 | print(s) 7 | print(s.str.lower()) 8 | print(s.str.upper()) 9 | print(s.str.len()) 10 | print(s.str.strip()) 11 | print(s.str.split(' ')) 12 | print(s.str.cat(sep='-')) 13 | print(s.str.get_dummies()) 14 | print(s.str.contains('@')) 15 | print(s.str.replace('@','-')) 16 | print(s.str.count('m')) 17 | print(s.str.startswith ('A')) 18 | print(s.str.endswith('4')) 19 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/16-The_ix_function.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.array([2,3,4,5]) 3 | b = np.array([8,5,4]) 4 | c = np.array([5,4,6,8,3]) 5 | ax,bx,cx = np.ix_(a,b,c) 6 | print(ax); print(bx);print(cx) 7 | print(ax.shape, bx.shape, cx.shape) 8 | result = ax+bx*cx 9 | print(result) 10 | print(result[3,2,4]) 11 | print(a[3]+b[2]*c[4]) 12 | # Automatic Reshaping 13 | a = np.arange(30) 14 | a.shape = 2,-1,3 15 | print(a.shape); print(a) 16 | # Vector Stacking 17 | x = np.arange(0,10,2) 18 | y = np.arange(5) 19 | m = np.vstack([x,y]) 20 | xy = np.hstack([x,y]) 21 | print(xy) 22 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/21-Pandas_Importing Data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | mb = pd.read_csv("microbiome.csv") 7 | print(mb) 8 | print(mb.head()) 9 | mb = pd.read_csv("microbiome.csv", header=None) 10 | print(mb.head()) 11 | mb = pd.read_table("microbiome.csv", sep=',') 12 | print(mb.head()) 13 | mb = pd.read_csv("microbiome.csv", index_col=['Patient','Taxon']) 14 | print(mb.head()) 15 | pd.read_csv("microbiome.csv", nrows=4) 16 | print(mb.head()) 17 | pd.read_csv("microbiome.csv").head(20) 18 | print(mb.head()) 19 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/13-Fancy_Indexing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.arange(12)**2 3 | i = np.array( [ 1,1,3,8,5 ] ) 4 | print(a[i]) 5 | j = np.array( [ [ 3, 4], [ 9, 7 ] ] ) 6 | print(a[j]) 7 | palette = np.array( [ [0, 0, 0 ],# black 8 | [255,0, 0 ],# red 9 | [0,255, 0 ], # green 10 | [0,0, 255 ], # blue 11 | [255,255,255]])# white 12 | image = np.array( [ [ 0, 1, 2, 0 ], 13 | [ 0, 3, 4, 0 ] ] ) 14 | # each value corresponds to a color in the palette 15 | print(palette[image]) 16 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/12-List.py: -------------------------------------------------------------------------------- 1 | nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 2 | print(nums[3]) 3 | nums[2] = (nums[0] + nums[9])/2; 4 | nums[1], nums[4] = eval(input("Enter a, b: ")) 5 | 6 | collection = [1, 2, 'Amir', 19, -3, 'end'] 7 | for item in collection: 8 | print(item) 9 | nums = [2, 4, 6, 8] 10 | for i in range(len(nums) - 1, -1, -1): 11 | print(nums[i]) 12 | a = list(range(0, 10)) 13 | print(a) 14 | a = list(range(10, -1, -1)) 15 | print(a) 16 | a = list(range(0, 100, 10)) 17 | print(a) 18 | a = list(range(-5, 6)) 19 | print(a) 20 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/16-Break_and_Continue.py: -------------------------------------------------------------------------------- 1 | entry = 0 2 | sum = 0 3 | print("Enter numbers to sum, negative number ends list:") 4 | while True: 5 | entry = eval(input()) 6 | if entry < 0: 7 | break 8 | sum += entry 9 | print("Sum =", sum) 10 | 11 | sum = 0 12 | done = False 13 | while not done: 14 | val = eval(input("Enter positive integer (999 quits):")) 15 | if val < 0: 16 | print("Negative value", val, "ignored") 17 | continue 18 | if val != 999: 19 | print("Tallying", val) 20 | sum += val 21 | else: 22 | done = (val == 999) 23 | print("sum =", sum) -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/8-If_statement.py: -------------------------------------------------------------------------------- 1 | a = True 2 | b = False 3 | print('a =', a, ' b =', b) 4 | a = False 5 | x = 10 6 | print('a =', a, ' b =', b) 7 | dividend, divisor = eval(input('Enter two numbers to divide: ')) 8 | if int(divisor) != 0: 9 | print('dividend / divisor = ', int(dividend)/int(divisor)) 10 | if x < 1: 11 | y = x 12 | if x < 1: y = x 13 | dividend, divisor = eval(input('Please enter two numbers: ')) 14 | if int(divisor) != 0: 15 | quotient = int(dividend)/int(divisor) 16 | print(str(dividend) + '/'+ str(divisor) + "=", quotient) 17 | print('Program finished') -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/13-Using_List_Examples.py: -------------------------------------------------------------------------------- 1 | def main(): 2 | sum = 0.0 3 | NUMBER_OF_ENTRIES = 5 4 | numbers = [] 5 | print("Please enter", NUMBER_OF_ENTRIES, "numbers: ") 6 | for i in range(0, NUMBER_OF_ENTRIES): 7 | num = eval(input("Enter number " + str(i) + ": ")) 8 | numbers += [num] 9 | sum += num 10 | 11 | print(end="Numbers entered: ") 12 | for num in numbers: 13 | print(num, end=" ") 14 | print() 15 | print("Average:", sum/NUMBER_OF_ENTRIES) 16 | print(numbers) 17 | main() 18 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/14-Fancy Indexing-Tricks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.arange(12).reshape(3,4) 3 | print(a) 4 | i = np.array( [ [0,1],[1,2] ] ) 5 | j = np.array( [ [2,1],[3,3] ] ) 6 | print(a[i,j]); print(a[i,2]); print(a[:,j]) 7 | s = np.array( [i,j] ); print(a[tuple(s)]) 8 | time = np.linspace(20, 145, 5) 9 | data = np.sin(np.arange(20)).reshape(5,4) 10 | ind = data.argmax(axis=0) 11 | time_max = time[ ind] 12 | data_max = data[ind, range(data.shape[1])] 13 | print(np.all(data_max == data.max(axis=0))) 14 | print('#',50*"-") 15 | a = np.arange(5) 16 | a[[1,3,4]] = 0 17 | print(a) 18 | a[[0,0,2]]+=1 19 | print(a) 20 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/02-Numpy/Lecture Code/2-Array_Creation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.array([2,3,4]) 3 | print(a) 4 | print(a.dtype) 5 | b = np.array([1.2, 3.5, 5.1]) 6 | print(b.dtype) 7 | #a = np.array(1,2,3,4) # WRONG 8 | a = np.array([1,2,3,4]) 9 | b = np.array([(1.5,2,3), (4,5,6)]) 10 | print(b) 11 | c = np.array( [ [1,2], [3,4] ], dtype=complex ) 12 | print(c) 13 | print(np.zeros( (3,4) )) 14 | print(np.ones( (2,3,4), dtype=np.int16 )) 15 | print(np.empty( (2,3) )) 16 | print(np.arange( 10, 30, 5 )) 17 | print(np.arange( 0, 2, 0.3 ) ) 18 | from numpy import pi 19 | x = np.linspace( 0, 2*pi, 100 ) 20 | f = np.sin(x) 21 | print(f) 22 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/17-Using_colormaps.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import matplotlib.cm as cm 4 | N = 256 5 | angle = np.linspace(0, 8 * 2 * np.pi, N) 6 | radius = np.linspace(.5, 1., N) 7 | X = radius * np.cos(angle) 8 | Y = radius * np.sin(angle) 9 | plt.scatter(X, Y, c = angle, cmap = cm.hsv) 10 | plt.show() 11 | print('#',50*"-") 12 | 13 | import matplotlib.colors as col 14 | values = np.random.random_integers(99, size = 50) 15 | cmap = cm.ScalarMappable(col.Normalize(0, 99), cm.binary) 16 | plt.bar(np.arange(len(values)), values, color = cmap.to_rgba(values)) 17 | plt.show() 18 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/14-Clusters_1.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | from scipy.cluster.hierarchy import linkage, dendrogram 7 | 8 | file=open("data.dat","r") 9 | lines=file.readlines() 10 | file.close() 11 | mammals=[] 12 | dataset=numpy.zeros((len(lines),8)) 13 | for index,line in enumerate(lines): 14 | mammals.append( line[0:27].rstrip(" ").capitalize() ) 15 | for tooth in range(8): 16 | dataset[index,tooth]=int(line[27+tooth]) 17 | 18 | plt.rcParams['figure.figsize'] = (10.0, 20.0) 19 | 20 | Z=linkage(dataset) 21 | dendrogram(Z, labels=mammals, orientation="right") 22 | plt.show() -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/7-Image compression.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | import scipy.linalg 5 | a=numpy.arange(5) 6 | A=numpy.mat(a) 7 | print(a.shape, A.shape, a.transpose().shape, A.transpose().shape) 8 | 9 | A=scipy.linalg.hadamard(8) 10 | zero_sum_rows = (numpy.sum(A,0)==0) 11 | B=A[zero_sum_rows,:] 12 | print(B[0:3,:]) 13 | 14 | mu=1/numpy.sqrt(2) 15 | A=numpy.array([[mu,0,mu],[0,1,0],[mu,0,-mu]]) 16 | B=scipy.linalg.kron(A,A) 17 | 18 | a=numpy.arange(0,2*numpy.pi,1.6) 19 | A = scipy.linalg.toeplitz(a) 20 | print (A) 21 | 22 | print (numpy.exp(A)) 23 | print (scipy.linalg.expm(A)) 24 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/2-Pandas_Series-Example.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | counts = pd.Series([10, 20, 30, 40]) 6 | print(counts) 7 | print(counts.values) 8 | print(counts.index) 9 | 10 | stuff = pd.Series([10, 20, 30, 40], 11 | index=['apple', 'temple', 'maple', 'sample']) 12 | 13 | print(stuff) 14 | print(stuff['apple']) 15 | print(stuff[[name.endswith('mple') for name in stuff.index]]) 16 | print(stuff[0]) 17 | 18 | stuff.name = 'MyDataFrame' 19 | stuff.index.name = 'itmes' 20 | 21 | print(stuff) 22 | num = np.log10(stuff) 23 | print(num) 24 | print(stuff.isnull()) 25 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/20-Pandas_Visualization.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | df = pd.DataFrame(np.random.rand(9,3),index=pd.date_range('1/1/2019', 5 | periods=9), columns=list('ABC')) 6 | print(df) 7 | df.plot() 8 | plt.show() 9 | df = pd.DataFrame(np.random.rand(9,3),columns=['a','b','c']) 10 | df.plot.bar() 11 | plt.show() 12 | df.plot.barh(stacked=True) 13 | plt.show() 14 | df.plot.hist(bins=20) 15 | plt.show() 16 | df.plot.box() 17 | plt.show() 18 | df.plot.area() 19 | plt.show() 20 | df.plot.scatter(x='a', y='b') 21 | plt.show() 22 | df.plot.pie(subplots=True) 23 | plt.show() 24 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/5-Pandas_DataFrame_Example.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | data = pd.DataFrame({'value':[10, 20, 30, 40], 6 | 'patient':[1, 1, 1, 2], 7 | 'disease':['Flu', 'Cancer', 'Infection','Aneurysm']}) 8 | print(data) 9 | print(data[['disease', 'value']]) 10 | print(data.columns) 11 | print(data.dtypes) 12 | print(data['disease']==data.disease) 13 | print(data.loc[1]) 14 | print(data.head()) 15 | print(data.tail(3)) 16 | print(data.shape) 17 | data['year'] = 2013 18 | print(data) 19 | # data.value[[0,1,3]]=[1,2,3] 20 | print(data) 21 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/7-Scipy_linag_Matrix_Hadam.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | import scipy.linalg 5 | a=numpy.arange(5) 6 | A=numpy.mat(a) 7 | print(a.shape, A.shape, a.transpose().shape, A.transpose().shape) 8 | 9 | A=scipy.linalg.hadamard(8) 10 | zero_sum_rows = (numpy.sum(A,0)==0) 11 | B=A[zero_sum_rows,:] 12 | print(B[0:3,:]) 13 | 14 | mu=1/numpy.sqrt(2) 15 | A=numpy.matrix([[mu,0,mu],[0,1,0],[mu,0,-mu]]) 16 | B=scipy.linalg.kron(A,A) 17 | 18 | a=numpy.arange(0,2*numpy.pi,1.6) 19 | A = scipy.linalg.toeplitz(a) 20 | print (A) 21 | 22 | print (numpy.exp(A)) 23 | print (scipy.linalg.expm(A)) 24 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/9-Reindexing_and_Renaming.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | df = pd.DataFrame({ 6 | 'A': np.linspace(0,stop=20-1,num=20), 7 | 'B': np.random.rand(20), 8 | 'D': np.random.normal(100, 10, size=(20)).tolist() 9 | }) 10 | print(df) 11 | df_reindexed = df.reindex(index=[0, 1, 2], columns=['A', 'B', 'C']) 12 | print(df_reindexed) 13 | 14 | df1 = pd.DataFrame(np.random.randn(4,3),columns=['col1','col2','col3']) 15 | print(df1) 16 | print(df1.rename(columns={'col1' : 'c1', 'col2' : 'c2'}, 17 | index = {0 : 'apple', 1 : 'banana', 2 : 'orange'})) 18 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/13-Indexing_and_Selecting.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | df = pd.DataFrame(np.random.rand(5, 4), 5 | index = ['a','b','c','d','e'], columns = ['A', 'B', 'C', 'D']) 6 | 7 | print(df.loc[:,'A']) 8 | print(df.loc[:,['A','C']]) 9 | print(df.loc[['a','b','f','h'],['A','C']]) 10 | print(df.loc['a':'h']) 11 | print(df.loc['a']>=0) 12 | print(df.iloc[:3]) 13 | print(df.iloc[:3]) 14 | print(df.iloc[1:3, 2:3]) 15 | print(df.iloc[[1, 3, 4], [1, 3]]) 16 | print(df.iloc[1:3, :]) 17 | print(df.iloc[:,1:3]) 18 | print(df.ix[:4]) 19 | print(df.ix[:,'A']) 20 | print(df[['A','B']]) 21 | print(df.A) 22 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/18-Merging_and_Joining.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | dfl = pd.DataFrame({ 7 | 'in':[1,2,3,4], 8 | 'Name': ['Amir', 'Brian', 'James', 'Mike'], 9 | 'id':['id1','id2','id3','id4']}) 10 | dfr = pd.DataFrame( 11 | {'in':[1,2,3,4], 12 | 'Name': ['Li', 'Brian', 'Bran', 'Xu'], 13 | 'id':['id2','id4','id3','id1']}) 14 | print(dfl) 15 | print(dfr) 16 | print(pd.merge(dfl,dfr,on='in')) 17 | print(pd.merge(dfl,dfr,on=['in','id'])) 18 | print(pd.merge(dfl,dfr,on='id', how='left')) 19 | print(pd.merge(dfl,dfr,on='id', how='right')) 20 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/8-Interpolation.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | import scipy.misc 5 | from scipy.linalg import svd 6 | 7 | plt.rcParams['figure.figsize'] = (12.0, 8.0) 8 | img=scipy.misc.ascent() 9 | U,s,Vh=svd(img) 10 | A = numpy.dot( U[:,0:32], numpy.dot( numpy.diag(s[0:32]), Vh[0:32,:])) 11 | plt.subplot(121,aspect='equal'); 12 | plt.gray() 13 | plt.imshow(img) 14 | plt.subplot(122,aspect='equal'); 15 | plt.imshow(A) 16 | plt.show() 17 | 18 | A=numpy.mat(numpy.eye(3,k=1)) 19 | print(A) 20 | 21 | b=numpy.mat(numpy.arange(3) + 1).T 22 | print(b) 23 | 24 | xinfo=scipy.linalg.lstsq(A,b) 25 | print (xinfo[0].T) 26 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/1-Pandas_Series.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | s1 = pd.Series() 6 | 7 | print(s1) 8 | 9 | data = np.array(['a','b','c','d']) 10 | s2 = pd.Series(data) 11 | print(s2) 12 | 13 | s3 = pd.Series(data, index=[3,2,1,0]) 14 | print(s3) 15 | 16 | data = {'0' : 'a', '1' : 'b', '2' : 'c', '3': 'd'} 17 | s4 = pd.Series(data) 18 | print(s4) 19 | 20 | s5 = pd.Series(data,index=['a','b','c','d']) 21 | print(s5) 22 | 23 | s6 = pd.Series(0, index=[0, 1, 2, 3]) 24 | print(s6) 25 | 26 | s7 = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e']) 27 | print (s7[3]) ;print (s7[3:]);print (s7[:3]) 28 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/8-Image compression.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | import scipy.misc 5 | from scipy.linalg import svd 6 | 7 | plt.rcParams['figure.figsize'] = (12.0, 8.0) 8 | img=scipy.misc.ascent() 9 | U,s,Vh=svd(img) 10 | A = numpy.dot( U[:,0:32], numpy.dot( numpy.diag(s[0:32]), Vh[0:32,:])) 11 | plt.subplot(121,aspect='equal'); 12 | plt.gray() 13 | plt.imshow(img) 14 | plt.subplot(122,aspect='equal'); 15 | plt.imshow(A) 16 | plt.show() 17 | 18 | A=numpy.mat(numpy.eye(3,k=1)) 19 | print(A) 20 | 21 | b=numpy.mat(numpy.arange(3) + 1).T 22 | print(b) 23 | 24 | xinfo=scipy.linalg.lstsq(A,b) 25 | print (xinfo[0].T) 26 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/12-Lorenz_Attractors.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | 5 | from numpy import linspace 6 | from scipy.integrate import odeint 7 | import matplotlib.pyplot as plt 8 | from mpl_toolkits.mplot3d import Axes3D 9 | 10 | sigma=10.0 11 | b=8.0/3.0 12 | r=28.0 13 | f = lambda x,t: [sigma*(x[1]-x[0]), 14 | r*x[0]-x[1]-x[0]*x[2], 15 | x[0]*x[1]-b*x[2]] 16 | 17 | t=linspace(0,20,2000); y0=[5.0,5.0,5.0] 18 | solution=odeint(f,y0,t) 19 | X=solution[:,0]; Y=solution[:,1]; Z=solution[:,2] 20 | 21 | import matplotlib.pyplot as plt 22 | plt.gca(projection='3d'); plt.plot(X,Y,Z) 23 | plt.show() 24 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/14-Pandas_Statistical_Functions.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | s = pd.Series([1,2,3,4,5,4]) 6 | print(s.pct_change()) 7 | 8 | df = pd.DataFrame(np.random.rand(3, 2)) 9 | print(df.pct_change()) 10 | s1 = pd.Series(np.random.rand(10)) 11 | s2 = pd.Series(np.random.rand(10)) 12 | print(s1.cov(s2)) 13 | 14 | df = pd.DataFrame(np.random.randn(3, 3), columns=['a', 'b', 'c']) 15 | print(df['a'].cov(df['b'])) 16 | print(df.cov()) 17 | 18 | df = pd.DataFrame(np.random.randn(15, 3), 19 | index = pd.date_range('1/1/2019', periods=15), 20 | columns = ['A', 'B', 'C']) 21 | 22 | print(df.rolling(window=3).mean()) 23 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/4-List_Objects.py: -------------------------------------------------------------------------------- 1 | t1 = ['a', 'b', 'c'] 2 | t2 = ['d', 'e'] 3 | print(t1.extend(t2));print(t2) 4 | t = ['d', 'c', 'e', 'b', 'a'] 5 | print(t.sort()); print(t) 6 | def add_all(t): 7 | total = 0 8 | for x in t: 9 | total += x 10 | return total 11 | def capitalize_all(t): 12 | res = [] 13 | for s in t: 14 | res.append(s.capitalize()) 15 | return res 16 | t = ['a', 'b', 'c'] 17 | x = t.pop(1); print(t) 18 | s = 'spam' 19 | print(list(s)) 20 | s = 'spam-spam-spam' 21 | print(s.split('-')); print(s) 22 | t = ['I', 'Love', 'Python', 'Very Much'] 23 | delimiter = ' ' 24 | print(delimiter.join(t)) 25 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/23-Shapes.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import matplotlib.patches as patches 4 | # Circle 5 | shape = patches.Circle((0, 0), radius = 1., color = '.75') 6 | plt.gca().add_patch(shape) 7 | # Rectangle 8 | shape = patches.Rectangle((2.5, -.5), 2., 1., color = '.75') 9 | plt.gca().add_patch(shape) 10 | # Ellipse 11 | shape = patches.Ellipse((0, -2.), 2., 1., angle = 45., color = 12 | '.75') 13 | plt.gca().add_patch(shape) 14 | # Fancy box 15 | shape = patches.FancyBboxPatch((2.5, -2.5), 2., 1., boxstyle = 16 | 'sawtooth', color = '.75') 17 | plt.gca().add_patch(shape) 18 | # Display all 19 | plt.grid(True) 20 | plt.axis('scaled') 21 | plt.show() 22 | print('#',50*"-") -------------------------------------------------------------------------------- /Data_Structure/PDF_Parsing/PDF_plumber/Sample_pdf_plumber.py: -------------------------------------------------------------------------------- 1 | import pdfplumber 2 | # %%-------------------------------------------------------------------------------------------------------------------- 3 | def extract_with_pdfplumber(file_path): 4 | text = "" 5 | with pdfplumber.open(file_path) as pdf: 6 | for page in pdf.pages: 7 | text += page.extract_text() 8 | return text 9 | # %%-------------------------------------------------------------------------------------------------------------------- 10 | if __name__ == "__main__": 11 | pdf_file = "Sample.pdf" 12 | pdfplumber_text = extract_with_pdfplumber(pdf_file) 13 | print("Text extracted with pdfplumber:") 14 | print(pdfplumber_text) -------------------------------------------------------------------------------- /Python/04-Seaborn/Lecture Code/2-Distribution_plot.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | sns.set(style="white", palette="muted", color_codes=True) 6 | rs = np.random.RandomState(10) 7 | 8 | f, axes = plt.subplots(2, 2, figsize=(7, 7), sharex=True) 9 | sns.despine(left=True) 10 | 11 | d = rs.normal(size=100) 12 | 13 | sns.distplot(d, kde=False, color="b", ax=axes[0, 0]) 14 | 15 | sns.distplot(d, hist=False, rug=True, color="r", ax=axes[0, 1]) 16 | 17 | sns.distplot(d, hist=False, color="g", kde_kws={"shade": True}, ax=axes[1, 0]) 18 | 19 | sns.distplot(d, color="m", ax=axes[1, 1]) 20 | plt.setp(axes, yticks=[]) 21 | 22 | plt.tight_layout() 23 | plt.show() 24 | 25 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/4-DataFrame_Options.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | df9 = { 'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']), 6 | 'two' : pd.Series([3, 2, 1], index=['a', 'b', 'c'])} 7 | 8 | df9 = pd.DataFrame(df9) 9 | print(df9) 10 | print(df9 ['one']) 11 | df9['three']=pd.Series([-1,-2,-3],index=['a','b','c']) 12 | df9['four']=df9['one']+df9['three'] 13 | print(df9) 14 | del(df9['three']) 15 | print(df9) 16 | 17 | print(df9.loc['b']) 18 | print(df9.iloc[1]) 19 | print(df9[2:3]) 20 | 21 | df10 = pd.DataFrame([{'one':5, 'two':6,'four':7}],index = ['e']) 22 | df11 = df9.append(df10) 23 | print(df11) 24 | df12 = df11.drop('e') 25 | print(df12) 26 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/4-Combination_of_arrays.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | 5 | import scipy.ndimage 6 | import numpy as np 7 | text = plt.imread('image.png') 8 | letterE = text[37:53,275:291] 9 | corr = scipy.ndimage.correlate(text,letterE) 10 | eLocations = (corr >= 0.95 * corr.max()) 11 | CorrLocIndex = np.where(eLocations==True) 12 | x=CorrLocIndex[1] 13 | print(x) 14 | y=CorrLocIndex[0] 15 | print(y) 16 | thefig=plt.figure() 17 | plt.subplot(211) 18 | plt.imshow(text, cmap=plt.cm.gray, interpolation='nearest') 19 | plt.subplot(212) 20 | plt.imshow(text, cmap=plt.cm.gray, interpolation='nearest') 21 | plt.autoscale(False) 22 | plt.plot(x,y,'wo',markersize=10) 23 | plt.axis('off') 24 | plt.show() 25 | print('#',50*"-") -------------------------------------------------------------------------------- /Data_Structure/PDF_Parsing/PDF_Fitz/Sample_Fitz.py: -------------------------------------------------------------------------------- 1 | import fitz 2 | # %%-------------------------------------------------------------------------------------------------------------------- 3 | def extract_with_fitz(file_path): 4 | text = "" 5 | with fitz.open(file_path) as pdf: 6 | for page_num, page in enumerate(pdf, start=1): 7 | text += f"--- Page {page_num} ---\n" 8 | text += page.get_text() 9 | text += "\n" 10 | return text 11 | # %%-------------------------------------------------------------------------------------------------------------------- 12 | if __name__ == "__main__": 13 | pdf_file = "Sample.pdf" 14 | extracted_text = extract_with_fitz(pdf_file) 15 | print("Text extracted using PyMuPDF (fitz):") 16 | print(extracted_text) -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/1-Scipy_vs_Numpy.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | from scipy import stats #conda remove scipy --force pip install scipy 5 | scores = numpy.array([114, 100, 104, 89, 102, 91, 114, 114, 103, 105, 6 | 108, 130, 120, 132, 111, 128, 118, 119, 86, 72, 111, 103, 74, 112, 107, 7 | 103, 98, 96, 112, 112, 93]) 8 | xmean = numpy.mean(scores) 9 | sigma = numpy.std(scores) 10 | 11 | print((xmean, sigma )) 12 | n = numpy.size(scores) 13 | 14 | print(xmean, xmean - 2.576*sigma /numpy.sqrt(n), xmean + 2.576*sigma / numpy.sqrt(n)) 15 | plt.stem(scores,use_line_collection= True) 16 | plt.show() 17 | 18 | result=scipy.stats.bayes_mvs(scores) 19 | help(scipy.stats.bayes_mvs) 20 | print(result[0]) 21 | 22 | print('#',50*"-") 23 | # ---------------------- -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/19-Pandas_Concatenation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | dfl = pd.DataFrame({ 6 | 'in':[1,2,3,4], 7 | 'Name': ['Amir', 'Brian', 'James', 'Mike'], 8 | 'id':['id1','id2','id3','id4']}) 9 | dfr = pd.DataFrame( 10 | {'in':[1,2,3,4], 11 | 'Name': ['Li', 'Brian', 'Bran', 'Xu'], 12 | 'id':['id2','id4','id3','id1']}) 13 | print(pd.concat([dfl,dfr],keys=['x','y'])) 14 | print(pd.concat([dfl,dfr],keys=['x','y'],ignore_index=True)) 15 | print(pd.concat([dfl,dfr],keys=['x','y'],axis=1)) 16 | print(dfl.append(dfr)) 17 | print(dfl.append([dfl,dfl,dfr])) 18 | 19 | print(pd.Timestamp(1283447255,unit='s')) 20 | print(pd.date_range("12:00", "15:30", freq="30min").time) 21 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/6-Operations_on_matrices.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | 5 | import scipy.sparse 6 | A=numpy.matrix("1,2,3;4,5,6") 7 | print(A) 8 | A=numpy.matrix([[1,2,3],[4,5,6]]) 9 | print(A) 10 | A=numpy.matrix([ [0,10,0,0,0], [0,0,20,0,0], [0,0,0,30,0], 11 | [0,0,0,0,40], [0,0,0,0,0] ]) 12 | print(A) 13 | print(A[0,1],A[1,2],A[2,3],A[3,4]) 14 | rows=numpy.array([0,1,2,3]) 15 | cols=numpy.array([1,2,3,4]) 16 | vals=numpy.array([10,20,30,40]) 17 | A=scipy.sparse.coo_matrix( (vals,(rows,cols)) ) 18 | print(A) 19 | print(A.todense()) 20 | B=numpy.mat(numpy.ones((3,3))) 21 | W=numpy.mat(numpy.zeros((3,3))) 22 | print(numpy.bmat('B,W;W,B')) 23 | a=numpy.array([[1,2],[3,4]]); print(a) 24 | print(a*a) 25 | v = numpy.dot(a,a); print(v) 26 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Lecture Code/16-Sorting_Example.py: -------------------------------------------------------------------------------- 1 | from random import randrange 2 | def random_list(): 3 | result = [] 4 | count = randrange(3, 20) 5 | for i in range(count): 6 | result += [randrange(-50, 50)] 7 | return result 8 | def selection_sort(lst): 9 | n = len(lst) 10 | for i in range(n - 1): 11 | small = i 12 | for j in range(i + 1, n): 13 | if lst[j] < lst[small]: 14 | small = j 15 | if i != small: 16 | lst[i], lst[small] = lst[small], lst[i] 17 | def main(): 18 | for n in range(10): 19 | col = random_list() 20 | print(col) 21 | selection_sort(col) 22 | print(col) 23 | print('==============================') 24 | main() 25 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/Clusters.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | 5 | from scipy.stats import norm 6 | from numpy import array,vstack 7 | from scipy.cluster.vq import * 8 | 9 | data=norm.rvs(0,0.3,size=(10000,2)) 10 | inside_ball=numpy.hypot(data[:,0],data[:,1])<1.0 11 | data=data[inside_ball] 12 | data = vstack((data, data+array([1,1]),data+array([-1,1]))) 13 | 14 | 15 | centroids, distortion = kmeans(data,3) 16 | cluster_assignment, distances = vq(data,centroids) 17 | 18 | plt.rcParams['figure.figsize'] = (8.0, 6.0) 19 | plt.plot(data[cluster_assignment==0,0], data[cluster_assignment==0,1], 'ro') 20 | plt.plot(data[cluster_assignment==1,0], data[cluster_assignment==1,1], 'b+') 21 | plt.plot(data[cluster_assignment==2,0], data[cluster_assignment==2,1], 'k.') 22 | plt.show() 23 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/18-Line_pattern_and_thickness.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | def pdf(X, mu, sigma): 4 | a = 1. / (sigma * np.sqrt(2. * np.pi)) 5 | b = -1. / (2. * sigma ** 2) 6 | return a * np.exp(b * (X - mu) ** 2) 7 | 8 | x13 = np.linspace(-6, 6, 1024) 9 | plt.plot(x13, pdf(x13, 0., 1.), color = 'k', linestyle = 'solid') 10 | plt.plot(x13, pdf(x13, 0., .5), color = 'k', linestyle = 'dashed') 11 | plt.plot(x13, pdf(x13, 0., .25), color = 'k', linestyle = 'dashdot') 12 | plt.show() 13 | 14 | for i in range(64): 15 | samples = np.random.standard_normal(50) 16 | mu, sigma = np.mean(samples), np.std(samples) 17 | plt.plot(x13, pdf(x13, mu, sigma), color = '.75', linewidth = .5) 18 | plt.plot(x13, pdf(x13, 0., 1.), color = 'y', linewidth = 3.) 19 | plt.show() 20 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/16-Pandas_Missing_Data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | df = pd.DataFrame(np.random.randn(15, 3),index = pd.date_range('1/1/2019', periods=15), columns = ['A', 'B', 'C']) 5 | 6 | df13 = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f','h'],columns=['one', 'two', 'three']) 7 | print(df13) 8 | df14 = df13.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']) 9 | print(df14) 10 | print(df14['one'].isnull()) 11 | print(df14['one'].notnull()) 12 | print(df14['one'].sum()) 13 | print(df14['one'].sum()) 14 | print(df.fillna(0)) 15 | df15 = df13.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']) 16 | print(df15.dropna()) 17 | df16 = df13.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']) 18 | print(df16.dropna(axis=1)) 19 | print(df.replace({1:0,2:0})) 20 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/20-Custom_shapes.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import matplotlib.path as mpath 4 | shape_description = [ 5 | ( 1., 2., mpath.Path.MOVETO), 6 | ( 1., 1., mpath.Path.LINETO), 7 | ( 2., 1., mpath.Path.LINETO), 8 | ( 2., -1., mpath.Path.LINETO), 9 | ( 1., -1., mpath.Path.LINETO), 10 | ( 1., -2., mpath.Path.LINETO), 11 | (-1., -2., mpath.Path.LINETO), 12 | (-1., -1., mpath.Path.LINETO), 13 | (-2., -1., mpath.Path.LINETO), 14 | (-2., 1., mpath.Path.LINETO), 15 | (-1., 1., mpath.Path.LINETO), 16 | (-1., 2., mpath.Path.LINETO), 17 | ( 0., 0., mpath.Path.CLOSEPOLY), 18 | ] 19 | u, v, codes = zip(*shape_description) 20 | my_marker = mpath.Path(np.asarray((u, v)).T, codes) 21 | data = np.random.rand(10, 10) 22 | plt.scatter(data[:,0], data[:, 1], c = 'b', marker = my_marker, s = 120) 23 | plt.show() 24 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/10-Curve_fitting.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | A=18; w=3*numpy.pi; h=0.5 5 | x=numpy.linspace(0,1,100); y=A*numpy.sin(w*x+h) 6 | 7 | yc=None 8 | yc=numpy.copy(y) 9 | yc += 4*((0.5-numpy.random.rand(100))*numpy.exp(2*numpy.random.rand(100)**2)) # contamined data 10 | 11 | p0=None 12 | p0 = [20, 2*scipy.pi, 1] 13 | target_function = lambda x,AA,ww,hh: AA*numpy.sin(ww*x+hh) 14 | import scipy.optimize 15 | pF=None 16 | pVar = None 17 | pF,pVar = scipy.optimize.curve_fit(target_function, x, yc, p0) 18 | print(pF) 19 | yFit=None 20 | yFit=target_function(x,*pF) 21 | plot2, = plt.plot(x, yc, 'r+', label="Contamined Data") 22 | plot3, = plt.plot(x, yFit,'k', label="Fit of contamined Data") 23 | plt.legend() 24 | plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) 25 | plt.show() 26 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/3-DataFrame_creation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | df1 = pd.DataFrame() 5 | print (df1) 6 | 7 | data = [1,2,3,4,5] 8 | df2 = pd.DataFrame(data) 9 | print (df2) 10 | 11 | data = [['apples',10],['orange',20],['Bananas',30]] 12 | df3 = pd.DataFrame(data,columns=['Name','Count'], dtype=float) 13 | print(df3) 14 | 15 | data = {'Name':['apples', 'orange', 'Bananas'],'Count':[10, 20, 30]} 16 | df4 = pd.DataFrame(data) 17 | df5 = pd.DataFrame(data, index=['In1','In2','In3']) 18 | print(df4) 19 | 20 | data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}] 21 | df6 = pd.DataFrame(data, index=['first', 'second']) 22 | df7 = pd.DataFrame(data, index=['first', 'second'], columns=['a', 'b']) 23 | df8 = pd.DataFrame(data, index=['first', 'second'], columns=['a', 'bb']) 24 | print(df6); print(df7); print(df8) 25 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/19-Marker.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | a = np.random.standard_normal((100, 2)) 4 | a += np.array((-2, -2)) # Center 5 | b = np.random.standard_normal((100, 2)) 6 | b += np.array((2, 2)) # Center 7 | plt.scatter(a[:,0], a[:,1], color = 'k', marker = 'x') 8 | plt.scatter(b[:,0], b[:,1], color = 'k', marker = '^') 9 | plt.show() 10 | print('#',50*"-") 11 | 12 | X = np.linspace(-6, 6, 1024) 13 | Y1 = np.sinc(X) 14 | Y2 = np.sinc(X) + 1 15 | plt.plot(X, Y1, marker = 'o', color = '.75') 16 | plt.plot(X, Y2, marker = 'o', color = 'k', markevery = 32) 17 | plt.show() 18 | 19 | a = np.random.standard_normal((100, 2)) 20 | a += np.array((-1, -1)) 21 | b = np.random.standard_normal((100, 2)) 22 | b += np.array((1, 1)) 23 | plt.scatter(b[:,0], b[:,1], c = 'r', s = 100) 24 | plt.scatter(a[:,0], a[:,1], c = 'b', s = 25) 25 | plt.show() 26 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/Lecture6_3_Code.py: -------------------------------------------------------------------------------- 1 | # conda install -c conda-forge geopandas 2 | import geopandas 3 | import matplotlib.pyplot as plt 4 | 5 | world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres')) 6 | print(world.head()) 7 | 8 | world.plot() 9 | plt.show() 10 | 11 | print(world.geometry.name) 12 | 13 | world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres')) 14 | world['centroid_column'] = world.centroid 15 | world = world.set_geometry('centroid_column') 16 | world.plot() 17 | plt.show() 18 | print('#',50*"-") 19 | # ----------------------- 20 | import fiona 21 | help(fiona.open) 22 | url = "http://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_110m_land.geojson" 23 | df = geopandas.read_file(url) 24 | print(df.head()) 25 | print('#',50*"-") 26 | # ----------------------- 27 | world = world[(world.pop_est>0) & (world.name!="Antarctica")] 28 | world['gdp_per_cap'] = world.gdp_md_est / world.pop_est 29 | world.plot(column='gdp_per_cap') 30 | plt.show() -------------------------------------------------------------------------------- /Supervised_Learning/SVM/Sample_KNN_Exercise.py: -------------------------------------------------------------------------------- 1 | # Specify what are your features and targets. Why this is a classification 2 | # 1- Use the voice dataset. 3 | # 2- Specify what are your features and targets. 4 | # 3- Why this is a classification problem. 5 | # 4- Run the Support Vector Machine algorithm. 6 | # 5- Explain your findings and write down a paragraph to explain all the results. 7 | # 6- Explain the differences between Support Vector Machine and Logistic Regression. 8 | #----------------------------------------------------------------------- 9 | # 1- 10 | 11 | #----------------------------------------------------------------------- 12 | # 2- 13 | 14 | 15 | #----------------------------------------------------------------------- 16 | # 3- 17 | 18 | 19 | #----------------------------------------------------------------------- 20 | # 4- 21 | 22 | #----------------------------------------------------------------------- 23 | # 5- 24 | 25 | #----------------------------------------------------------------------- 26 | # 6- -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/17-Pandas_GroupBy.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | ipl_data = {'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings', 6 | 'kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'], 7 | 'Rank': [1, 2, 2, 3, 3,4 ,1 ,1,2 , 4,1,2], 8 | 'Year': [2014,2015,2014,2015,2014,2015,2016,2017,2016,2014,2015,2017], 9 | 'Points':[876,789,863,673,741,812,756,788,694,701,804,690]} 10 | df = pd.DataFrame(ipl_data) 11 | print(df) 12 | print(df.groupby('Team').groups) 13 | print(df.groupby(['Team','Year']).groups) 14 | grouped = df.groupby('Year') 15 | for name,group in grouped: 16 | print(name) 17 | print(group) 18 | print(grouped.get_group(2014)) 19 | print(grouped['Points'].agg(np.mean)) 20 | grouped = df.groupby('Team') 21 | print(grouped.agg(np.size)) 22 | score = lambda x: (x - x.mean()) / x.std()*10 23 | print(grouped.transform(score)) 24 | print(df.groupby('Team').filter(lambda x: len(x) >= 3)) 25 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/04-Seaborn/Lecture Code/7-Overlapping_densities.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | 6 | sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)}) 7 | rs = np.random.RandomState(1979) 8 | x = rs.randn(500) 9 | g = np.tile(list("ABCDEFGHIJ"), 50) 10 | df = pd.DataFrame(dict(x=x, g=g)) 11 | m = df.g.map(ord) 12 | df["x"] += m 13 | pal = sns.cubehelix_palette(10, rot=-.25, light=.7) 14 | g = sns.FacetGrid(df, row="g", hue="g", aspect=15, palette=pal) 15 | g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2) 16 | g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2) 17 | g.map(plt.axhline, y=0, lw=2, clip_on=False) 18 | def label(x, color, label): 19 | ax = plt.gca() 20 | ax.text(0, .2, label, fontweight="bold", color=color, 21 | ha="left", va="center", transform=ax.transAxes) 22 | g.map(label, "x") 23 | g.fig.subplots_adjust(hspace=-.25) 24 | g.set_titles("") 25 | g.set(yticks=[]) 26 | g.despine(bottom=True, left=True) 27 | plt.show() 28 | print('#',50*"-") -------------------------------------------------------------------------------- /Supervised_Learning/Naive-Bayes/Sample_NB_Exercise.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------- 2 | # Specify what are your features and targets. Why this is a classification 3 | # 1- Use the adult dataset. 4 | # 2- Specify what are your features and targets. 5 | # 3- Why this is a classification problem. 6 | # 4- Run the Naive Bayes algorithm. 7 | # 5- Explain your findings and write down a paragraph to explain all the results. 8 | # 6- Explain the differences between Naive Bayes and Decision tree. 9 | #----------------------------------------------------------------------- 10 | # 1- 11 | 12 | 13 | 14 | #----------------------------------------------------------------------- 15 | # 2- 16 | 17 | 18 | 19 | #----------------------------------------------------------------------- 20 | # 3- 21 | 22 | 23 | 24 | #----------------------------------------------------------------------- 25 | # 4- 26 | 27 | 28 | 29 | #----------------------------------------------------------------------- 30 | # 5- 31 | 32 | 33 | #----------------------------------------------------------------------- 34 | # 6- -------------------------------------------------------------------------------- /Supervised_Learning/KNN/Sample_KNN_Exercise.py: -------------------------------------------------------------------------------- 1 | # Specify what are your features and targets. Why this is a classification 2 | # 1- Use the chronic_kidney disease dataset. 3 | # 2- Specify what are your features and targets. 4 | # 3- Why this is a classification problem. 5 | # 4- Run the K-Nearest Neighbor algorithm. 6 | # 5- Explain your findings and write down a paragraph to explain all the results. 7 | # 6- Explain the differences between Logistic Regression and K-Nearest Neighbor. 8 | # ----------------------------------------------------------------------- 9 | # 1- 10 | 11 | 12 | 13 | 14 | 15 | 16 | #----------------------------------------------------------------------- 17 | # 2- 18 | 19 | 20 | 21 | 22 | 23 | #----------------------------------------------------------------------- 24 | # 3- 25 | 26 | 27 | 28 | 29 | 30 | #----------------------------------------------------------------------- 31 | # 4- 32 | 33 | 34 | 35 | 36 | 37 | #----------------------------------------------------------------------- 38 | # 5- 39 | 40 | 41 | 42 | 43 | #----------------------------------------------------------------------- 44 | # 6- -------------------------------------------------------------------------------- /Supervised_Learning/Decision_Tree/DT-Graphing/Decision_tree_export_color.py: -------------------------------------------------------------------------------- 1 | import pydotplus 2 | from sklearn.datasets import load_iris 3 | from sklearn import tree 4 | import collections 5 | 6 | import os 7 | os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/' 8 | 9 | clf = tree.DecisionTreeClassifier(random_state=42) 10 | iris = load_iris() 11 | 12 | clf = clf.fit(iris.data, iris.target) 13 | 14 | dot_data = tree.export_graphviz(clf, 15 | feature_names=iris.feature_names, 16 | out_file=None, 17 | filled=True, 18 | rounded=True) 19 | graph = pydotplus.graph_from_dot_data(dot_data) 20 | 21 | colors = ('brown', 'forestgreen') 22 | edges = collections.defaultdict(list) 23 | 24 | for edge in graph.get_edge_list(): 25 | edges[edge.get_source()].append(int(edge.get_destination())) 26 | 27 | for edge in edges: 28 | edges[edge].sort() 29 | for i in range(2): 30 | dest = graph.get_node(str(edges[edge][i]))[0] 31 | dest.set_fillcolor(colors[i]) 32 | 33 | graph.write_png('tree1.png') 34 | graph.write_svg('tree1.svg') -------------------------------------------------------------------------------- /Supervised_Learning/Logitic_Regression/Sample_Logistic_Reg_Exercise.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------- 2 | # Specify what are your features and targets. Why this is a classification 3 | # 1- Use the bank additional dataset. 4 | # 2- Specify what are your features and targets. 5 | # 3- Why this is a classification problem. 6 | # 4- Run the Logistic Regression algorithm. 7 | # 5- Explain your findings and write down a paragraph to explain all the results. 8 | # 6- Explain the differences between Logistic Regression and Decision tree. 9 | #----------------------------------------------------------------------- 10 | # 1- 11 | 12 | 13 | 14 | #----------------------------------------------------------------------- 15 | # 2- 16 | 17 | 18 | 19 | #----------------------------------------------------------------------- 20 | # 3- 21 | 22 | 23 | 24 | #----------------------------------------------------------------------- 25 | # 4- 26 | 27 | 28 | 29 | #----------------------------------------------------------------------- 30 | # 5- 31 | 32 | 33 | 34 | 35 | #----------------------------------------------------------------------- 36 | # 6- 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /Extra_Packages/pyvis/eval_code.py: -------------------------------------------------------------------------------- 1 | 2 | import networkx as nx 3 | from pyvis.network import Network 4 | 5 | # Create a social network graph 6 | G = nx.Graph() 7 | 8 | 9 | # Sample data: Social network connections (friendships) 10 | people = [ 11 | "Alice", "Bob", "Charlie", "David", "Emma", 12 | "Fiona", "George", "Hannah", "Ian", "Julia" 13 | ] 14 | 15 | # Adding nodes (people) 16 | for person in people: 17 | G.add_node(person, title=f"Person: {person}") 18 | 19 | # Adding edges (friendships) 20 | friendships = [ 21 | ("Alice", "Bob"), ("Alice", "Charlie"), ("Alice", "David"), 22 | ("Bob", "Emma"), ("Charlie", "Fiona"), ("David", "George"), 23 | ("Emma", "Hannah"), ("Fiona", "Ian"), ("George", "Julia"), 24 | ("Hannah", "Alice"), ("Ian", "Charlie"), ("Julia", "Emma") 25 | ] 26 | 27 | G.add_edges_from(friendships) 28 | 29 | # Create a Pyvis Network object 30 | net = Network(notebook=True, height="600px", width="100%", bgcolor="#222222", font_color="white") 31 | 32 | # Convert the NetworkX graph to Pyvis 33 | net.from_nx(G) 34 | 35 | # Show the interactive graph 36 | net.show("social_network.html") 37 | 38 | print("Graph saved as social_network.html. Open it in a browser to view.") 39 | -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/11-Advance-optimization.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | 5 | A=18; w=3*numpy.pi; h=0.5 6 | 7 | x=None; y=None 8 | x=numpy.linspace(0,1,100); y=A*numpy.sin(w*x+h) 9 | y += 4*((0.5-numpy.random.rand(100))*numpy.exp(2*numpy.random.rand(100)**2)) 10 | 11 | import scipy.optimize 12 | p0 = [20, 2*numpy.pi, 1] 13 | target_function = lambda x,AA,ww,hh: AA*numpy.sin(ww*x+hh) 14 | 15 | #pF,pVar = scipy.optimize.curve_fit(target_function, x, y, p0) 16 | #print (pF) 17 | 18 | error_function = lambda p,x,y: target_function(x,p[0],p[1],p[2])-y 19 | lpF,lpVar = scipy.optimize.leastsq(error_function,p0,args=(x,y)) 20 | print (lpF) 21 | 22 | import scipy.optimize 23 | scipy.optimize.fmin(scipy.optimize.rosen,[0,0]) 24 | help(scipy.optimize.minimize) 25 | print('#',50*"-") 26 | 27 | 28 | f=lambda x: [x[0]**2 - 2*x[0] - x[1] + 0.5, x[0]**2 + 4*x[1]**2 - 4] 29 | x,y=numpy.mgrid[-0.5:2.5:24j,-0.5:2.5:24j] 30 | U,V=f([x,y]) 31 | plt.quiver(x,y,U,V,color='r', \ 32 | linewidths=(0.2,), edgecolors=('k'), \ 33 | headaxislength=5) 34 | plt.show() 35 | 36 | 37 | import scipy.optimize 38 | f=lambda x: [x[0]**2 - 2*x[0] - x[1] + 0.5, x[0]**2 + 4*x[1]**2 - 4] 39 | 40 | scipy.optimize.root(f,[0,1]) 41 | 42 | scipy.optimize.root(f,[2,0]) 43 | print('#',50*"-") -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Lecture Code/12-Nested_Conditionals-Troubleshoot.py: -------------------------------------------------------------------------------- 1 | print("Help! My computer doesn't work!") 2 | print("Does the computer make any sounds (fans, etc.)") 3 | choice = input("or show any lights? (y/n):") 4 | if choice == 'n': 5 | choice = input("Is it plugged in? (y/n):") 6 | if choice == 'n': 7 | print("Plug it in. If the problem persists, ") 8 | print("please run this program again.") 9 | else: 10 | choice = input("Is switch in \"on\" position?(y/n):") 11 | if choice == 'n': 12 | print("Turn it on. If the problem persists, ") 13 | print("please run this program again.") 14 | else: 15 | choice = input("Does it have a fuse?(y/n):") 16 | if choice == 'n': 17 | choice = input("Is the outlet OK? (y/n):") 18 | if choice == 'n': 19 | print("Check the outlet's circuit ") 20 | print("breaker or fuse. Move to a") 21 | print("new outlet, if necessary. ") 22 | print("If the problem persists, ") 23 | print("please run this program again.") 24 | else: 25 | print("Consult a service technician.") 26 | else: 27 | print("Check the fuse. Replace if ") 28 | print("necessary. If the problem ") 29 | print("persists, then ") 30 | print("please run this program again.") 31 | else: 32 | print("Please consult a service technician.") -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/13-Stats_and_tests.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import matplotlib.pyplot as plt 4 | 5 | from scipy.stats import norm 6 | from scipy.stats import ttest_1samp 7 | 8 | data = numpy.array([[113,105,130,101,138,118,87,116,75,96, 9 | 122,103,116,107,118,103,111,104,111,89,78,100,89,85,88], 10 | [137,105,133,108,115,170,103,145,78,107, 11 | 84,148,147,87,166,146,123,135,112,93,76,116,78,101,123]]) 12 | 13 | dataDiff = data[1,:]-data[0,:] 14 | dataDiff.mean(), dataDiff.std() 15 | 16 | plt.rcParams['figure.figsize'] = (15.0, 5.0) 17 | plt.hist(dataDiff) 18 | plt.show() 19 | 20 | t_stat,p_value=ttest_1samp(dataDiff,0.0) 21 | print (p_value/2.0) 22 | 23 | mean,std=norm.fit(dataDiff) 24 | print(mean,std) 25 | print('#',50*"-") 26 | # ----------------------- 27 | from scipy.stats import gaussian_kde 28 | 29 | plt.hist(dataDiff, density=1) 30 | x=numpy.linspace(dataDiff.min(),dataDiff.max(),1000) 31 | pdf=norm.pdf(x,mean,std) 32 | plt.plot(x,pdf) 33 | 34 | 35 | pdf = gaussian_kde(dataDiff) 36 | pdf = pdf.evaluate(x) 37 | plt.hist(dataDiff, density=1) 38 | plt.plot(x,pdf,'k') 39 | plt.show() 40 | 41 | plt.hist(dataDiff, density=1) 42 | plt.plot(x,pdf,'k.-',label='Kernel fit') 43 | plt.plot(x,norm.pdf(x,mean,std),'r',label='Normal fit') 44 | plt.legend() 45 | plt.show() 46 | print('#',50*"-") 47 | -------------------------------------------------------------------------------- /Python/03-Matplotlib/Lecture Code/22-Annotation.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | x16 = np.linspace(-4, 4, 1000) 4 | y16 = .25 * (x16 + 4.) * (x16 + 1) * (x16 - 2) 5 | plt.title('Speed Plot vs. Average') 6 | plt.xlabel('Speed') 7 | plt.ylabel('Average') 8 | plt.plot(x16, y16, c = 'k') 9 | plt.show() 10 | print('#',50*"-") 11 | 12 | # ----------------------- 13 | x16 = np.linspace(-4, 4, 1000) 14 | y16 = .25 * (x16 + 4.) * (x16 + 1) * (x16 - 2) 15 | box = { 16 | 'facecolor' : '.75', 17 | 'edgecolor' : 'k', 18 | 'boxstyle' : 'round' 19 | } 20 | plt.text(-0.5, -0.20, 'Mark Here', bbox = box) 21 | plt.plot(x16, y16, c = 'k') 22 | plt.show() 23 | print('#',50*"-") 24 | # ----------------------- 25 | x16 = np.linspace(-4, 4, 1000) 26 | y16 = .25 * (x16 + 4.) * (x16 + 1) * (x16 - 2) 27 | 28 | plt.annotate('Mark Here', ha = 'center', va = 'bottom', 29 | xytext = (-1.5, 3.), xy = (0.75, -2.7), 30 | arrowprops = { 'facecolor' : 'black', 'shrink' : 0.05 }) 31 | 32 | plt.plot(x16, y16, c = 'k') 33 | plt.show() 34 | print('#',50*"-") 35 | # ----------------------- 36 | x17 = np.linspace(0, 6, 1024) 37 | y17 = np.sin(x17) 38 | y18 = np.cos(x17) 39 | plt.xlabel('X') 40 | plt.ylabel('Y') 41 | plt.plot(x17, y17, c = 'k', lw = 3., label = 'sin(X)') 42 | plt.plot(x17, y18, c = '.5', lw = 3., ls = '--', label = 'cos(X)') 43 | plt.legend() 44 | plt.grid(True) 45 | plt.show() 46 | print('#',50*"-") -------------------------------------------------------------------------------- /Supervised_Learning/Decision_Tree/DT-Graphing/Decision_tree_export.py: -------------------------------------------------------------------------------- 1 | import pydotplus # pip install pydotplus 2 | from sklearn.datasets import load_iris 3 | from sklearn import tree 4 | import collections 5 | 6 | import os 7 | os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/' 8 | 9 | # Data Collection 10 | X = [[180, 15, 0], 11 | [177, 42, 0], 12 | [136, 35, 1], 13 | [174, 65, 0], 14 | [141, 28, 1]] 15 | 16 | Y = ['man', 'woman', 'woman', 'man', 'woman'] 17 | 18 | data_feature_names = ['height', 'hair length', 'voice pitch'] 19 | 20 | clf = tree.DecisionTreeClassifier() 21 | clf = clf.fit(X,Y) 22 | 23 | # Visualize data 24 | dot_data = tree.export_graphviz(clf, 25 | feature_names=data_feature_names, 26 | out_file=None, 27 | filled=True, 28 | rounded=True) 29 | graph = pydotplus.graph_from_dot_data(dot_data) 30 | 31 | colors = ('turquoise', 'orange') 32 | edges = collections.defaultdict(list) 33 | 34 | for edge in graph.get_edge_list(): 35 | edges[edge.get_source()].append(int(edge.get_destination())) 36 | 37 | for edge in edges: 38 | edges[edge].sort() 39 | for i in range(2): 40 | dest = graph.get_node(str(edges[edge][i]))[0] 41 | dest.set_fillcolor(colors[i]) 42 | 43 | graph.write_png('tree.png') 44 | graph.write_svg('tree.svg') 45 | 46 | -------------------------------------------------------------------------------- /Supervised_Learning/Decision_Tree/DT-Graphing/Simple_Decision_Tree.py: -------------------------------------------------------------------------------- 1 | # import data 2 | from sklearn import datasets 3 | import numpy as np 4 | iris = datasets.load_iris() 5 | X = iris.data[:, [2,3]] 6 | y = iris.target 7 | #----------------------------------------------------------------------------- 8 | # data pre processing 9 | from sklearn.model_selection import train_test_split 10 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) 11 | from sklearn.preprocessing import StandardScaler 12 | sc = StandardScaler() 13 | sc.fit(X_train) 14 | X_train_std = sc.transform(X_train) 15 | X_test_std = sc.transform(X_test) 16 | X_combined_std = np.vstack((X_train_std, X_test_std)) 17 | y_combined = np.hstack((y_train, y_test)) 18 | #----------------------------------------------------------------------------- 19 | 20 | from sklearn.linear_model import LogisticRegression 21 | import plot_decision_regions as pp 22 | import matplotlib.pyplot as plt 23 | 24 | from sklearn.tree import DecisionTreeClassifier 25 | tree = DecisionTreeClassifier(criterion='entropy',max_depth=3, random_state=0) 26 | tree.fit(X_train, y_train) 27 | X_combined = np.vstack((X_train, X_test)) 28 | y_combined = np.hstack((y_train, y_test)) 29 | pp.plot_decision_regions(X_combined, y_combined,classifier=tree, test_idx=range(105,150)) 30 | plt.xlabel('petal length [cm]') 31 | plt.ylabel('petal width [cm]') 32 | plt.legend(loc='upper left') 33 | plt.show() 34 | -------------------------------------------------------------------------------- /Supervised_Learning/Decision_Tree/DT/Sample_DT_Exercise.py: -------------------------------------------------------------------------------- 1 | # %%%%%%%%%%%%% Machine Learning %%%%%%%%%%%%%%%%%%%%%%%% 2 | # %%%%%%%%%%%%% Authors %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 3 | # Dr. Amir Jafari------>Email: amir.h.jafari@okstate.edu 4 | # Deepak Agarwal------>Email:deepakagarwal@gwmail.gwu.edu 5 | # %%%%%%%%%%%%% Date: 6 | # V1 June - 05 - 2018 7 | # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 8 | # %%%%%%%%%%%%% Decision Tree %%%%%%%%%%%%%%%%%%%%%%%%%% 9 | 10 | 11 | #%%----------------------------------------------------------------------- 12 | # Exercise 13 | #%%----------------------------------------------------------------------- 14 | # Specify what are your features and targets. Why this is a classification 15 | # 1- Use the bank banknote dataset. 16 | # 2- Specify what are your features and targets. 17 | # 3- Why this is a classification problem. 18 | # 4- Run the decision tree algorithm. 19 | # 5- Explain your findings and write down a paragraph to explain all the results. 20 | #%%----------------------------------------------------------------------- 21 | # 1- 22 | 23 | #%%----------------------------------------------------------------------- 24 | # 2- 25 | 26 | 27 | #%%----------------------------------------------------------------------- 28 | # 3- 29 | 30 | 31 | #%%----------------------------------------------------------------------- 32 | # 4- 33 | 34 | #%%----------------------------------------------------------------------- 35 | # 5- 36 | 37 | 38 | -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Class Ex/Class-Ex-Lecture3.py: -------------------------------------------------------------------------------- 1 | # ================================================================= 2 | # Class_Ex1: 3 | # Writes a python script (use class) to simulate a Stopwatch . 4 | # push a button to start the clock (call the start method), push a button 5 | # to stop the clock (call the stop method), and then read the elapsed time 6 | # (use the result of the elapsed method). 7 | # ---------------------------------------------------------------- 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | # ================================================================= 16 | # Class_Ex2: 17 | # Write a python script (use class)to implement pow(x, n). 18 | # ---------------------------------------------------------------- 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | # ================================================================= 28 | # Class_Ex3: 29 | # Write a python class to calculate the area of rectangle by length 30 | # and width and a method which will compute the area of a rectangle. 31 | # ---------------------------------------------------------------- 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | # ================================================================= 42 | # Class_Ex4: 43 | # Write a python class and name it Circle to calculate the area of circle 44 | # by a radius and two methods which will compute the area and the perimeter 45 | # of a circle. 46 | # ---------------------------------------------------------------- 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /Supervised_Learning/Decision_Tree/DT-Graphing/plot_decision_regions.py: -------------------------------------------------------------------------------- 1 | from matplotlib.colors import ListedColormap 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): 5 | # setup marker generator and color map 6 | markers = ('s', 'x', 'o', '^', 'v') 7 | colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') 8 | cmap = ListedColormap(colors[:len(np.unique(y))]) 9 | # plot the decision surface 10 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 11 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 12 | xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), 13 | np.arange(x2_min, x2_max, resolution)) 14 | Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) 15 | Z = Z.reshape(xx1.shape) 16 | plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) 17 | plt.xlim(xx1.min(), xx1.max()) 18 | plt.ylim(xx2.min(), xx2.max()) 19 | # plot all samples 20 | X_test, y_test = X[test_idx, :], y[test_idx] 21 | for idx, cl in enumerate(np.unique(y)): 22 | plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], 23 | alpha=0.8, c=cmap(idx), 24 | marker=markers[idx], label=cl) 25 | # highlight test samples 26 | if test_idx: 27 | X_test, y_test = X[test_idx, :], y[test_idx] 28 | plt.scatter(X_test[:, 0], X_test[:, 1], c='', 29 | alpha=1.0, linewidth=1, marker='o', 30 | s=55, label='test set') -------------------------------------------------------------------------------- /Unsuperised_Learning/Pitfalss/Pitfalls_Kmean.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from sklearn.cluster import KMeans 4 | from sklearn.datasets import make_blobs 5 | import warnings 6 | 7 | warnings.filterwarnings("ignore") 8 | 9 | 10 | def visualize_kmeans(n_clusters, X, ax, title): 11 | y_pred = KMeans(n_clusters=n_clusters, random_state=random_state).fit_predict(X) 12 | ax.scatter(X[:, 0], X[:, 1], c=y_pred) 13 | ax.set_title(title) 14 | 15 | 16 | plt.figure(figsize=(12, 12)) 17 | n_samples = 1500 18 | random_state = 170 19 | X, y = make_blobs(n_samples=n_samples, random_state=random_state) 20 | 21 | axes = list(plt.subplots(nrows=2, ncols=2, figsize=(10, 10))[1].flatten()) 22 | titles = ["Incorrect number of blobs", "Anisotropicly distributed blobs", 23 | "Unequal variance", "Unevenly sized blobs"] 24 | 25 | # Incorrect number of clusters 26 | visualize_kmeans(2, X, axes[0], titles[0]) 27 | 28 | # Anisotropicly distributed data 29 | transformation = [[0.60834549, -0.63667341], [-0.40887718, 0.85253229]] 30 | X_aniso = np.dot(X, transformation) 31 | visualize_kmeans(3, X_aniso, axes[1], titles[1]) 32 | 33 | # Different variance 34 | X_varied, y_varied = make_blobs(n_samples=n_samples, random_state=random_state, 35 | cluster_std=[1.0, 2.5, 0.5]) 36 | visualize_kmeans(3, X_varied, axes[2], titles[2]) 37 | 38 | # Unevenly sized blobs 39 | X_filtered = np.vstack((X[y == 0][:500], X[y == 1][:100], X[y == 2][:10])) 40 | visualize_kmeans(3, X_filtered, axes[3], titles[3]) 41 | 42 | plt.show() -------------------------------------------------------------------------------- /Supervised_Learning/Random Forest/Sample_RF_Exercise.py: -------------------------------------------------------------------------------- 1 | # %%%%%%%%%%%%% Machine Learning %%%%%%%%%%%%%%%%%%%%%%%% 2 | # %%%%%%%%%%%%% Authors %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 3 | # Dr. Amir Jafari------>Email: amir.h.jafari@okstate.edu 4 | # Deepak Agarwal------>Email:deepakagarwal@gwmail.gwu.edu 5 | # %%%%%%%%%%%%% Date: 6 | # V1 June - 05 - 2018 7 | # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 8 | # %%%%%%%%%%%%% Random Forest %%%%%%%%%%%%%%%%%%%%%%%%%% 9 | 10 | #%%----------------------------------------------------------------------- 11 | # Exercise 12 | #%%----------------------------------------------------------------------- 13 | # Specify what are your features and targets. Why this is a classification 14 | # 1- Use the bank banknote dataset. 15 | # 2- Specify what are your features and targets. 16 | # 3- Why this is a classification problem. 17 | # 4- Run the Random Forest algorithm. 18 | # 5- Explain your findings and write down a paragraph to explain all the results. 19 | # 6- Explain the differences between Random forest and Decision tree. 20 | #%%----------------------------------------------------------------------- 21 | # 1- 22 | 23 | #%%----------------------------------------------------------------------- 24 | # 2- 25 | 26 | 27 | #%%----------------------------------------------------------------------- 28 | # 3- 29 | 30 | 31 | #%%----------------------------------------------------------------------- 32 | # 4- 33 | 34 | #%%----------------------------------------------------------------------- 35 | # 5- 36 | 37 | #%%----------------------------------------------------------------------- 38 | # 6- -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/1- Lecture_1(Python Basics)/Class Ex/Class-Ex-Lecture1.py: -------------------------------------------------------------------------------- 1 | # ================================================================= 2 | # Class_Ex1: 3 | # Write python program that converts seconds to 4 | # (x Hour, x min, x seconds) 5 | # ---------------------------------------------------------------- 6 | 7 | 8 | 9 | 10 | 11 | # ================================================================= 12 | # Class_Ex2: 13 | # Write a python program to print all the different arrangements of the 14 | # letters A, B, and C. Each string printed is a permutation of ABC. 15 | # ---------------------------------------------------------------- 16 | 17 | 18 | 19 | 20 | 21 | # ================================================================= 22 | # Class_Ex3: 23 | # Write a python program to print all the different arrangements of the 24 | # letters A, B, C and D. Each string printed is a permutation of ABCD. 25 | # ---------------------------------------------------------------- 26 | 27 | 28 | 29 | 30 | 31 | # ================================================================= 32 | # Class_Ex4: 33 | # Suppose we wish to draw a triangular tree, and its height is provided 34 | # by the user. 35 | # ---------------------------------------------------------------- 36 | 37 | 38 | 39 | 40 | 41 | # ================================================================= 42 | # Class_Ex5: 43 | # Write python program to print prime numbers up to a specified values. 44 | # ---------------------------------------------------------------- 45 | 46 | 47 | 48 | 49 | 50 | 51 | # ================================================================= -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/e.csv: -------------------------------------------------------------------------------- 1 | ,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 2 | 0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 3 | 1,0.0,0.0,0.0,0.0,0.0,0.23921569,0.6431373,0.8745098,0.96862745,0.9490196,0.8,0.4745098,0.043137256,0.0,0.0,0.0 4 | 2,0.0,0.0,0.0,0.019607844,0.60784316,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.85490197,0.12941177,0.0,0.0 5 | 3,0.0,0.0,0.0,0.5921569,1.0,0.9490196,0.4745098,0.15294118,0.023529412,0.05490196,0.28627452,0.8039216,1.0,0.8156863,0.023529412,0.0 6 | 4,0.0,0.0,0.21176471,1.0,0.94509804,0.17254902,0.0,0.0,0.0,0.0,0.0,0.03137255,0.8,1.0,0.38039216,0.0 7 | 5,0.0,0.0,0.6117647,1.0,0.47843137,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.28235295,1.0,0.7137255,0.0 8 | 6,0.0,0.0,0.85490197,1.0,0.16862746,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.050980393,1.0,0.90588236,0.0 9 | 7,0.0,0.0,0.9607843,1.0,0.9882353,0.9882353,0.9882353,0.99215686,0.99215686,0.99607843,0.99607843,1.0,1.0,1.0,0.98039216,0.0 10 | 8,0.0,0.0,0.972549,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0 11 | 9,0.0,0.0,0.88235295,1.0,0.078431375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 12 | 10,0.0,0.0,0.6509804,1.0,0.36078432,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 13 | 11,0.0,0.0,0.25882354,1.0,0.8784314,0.09411765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 14 | 12,0.0,0.0,0.0,0.63529414,1.0,0.90588236,0.42745098,0.14117648,0.023529412,0.023529412,0.09803922,0.25490198,0.49411765,0.8235294,0.0,0.0 15 | 13,0.0,0.0,0.0,0.02745098,0.627451,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0 16 | 14,0.0,0.0,0.0,0.0,0.0,0.23921569,0.63529414,0.87058824,0.96862745,0.98039216,0.9019608,0.7411765,0.4627451,0.15294118,0.0,0.0 17 | 15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 18 | -------------------------------------------------------------------------------- /Unsuperised_Learning/Kmean/Sample_Kmean.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from sklearn import datasets 3 | import numpy as np 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.preprocessing import StandardScaler 6 | import matplotlib.pyplot as plt 7 | from sklearn.cluster import KMeans 8 | 9 | warnings.filterwarnings('ignore') 10 | 11 | 12 | def loss_function(data, kmeans_model): 13 | predict1 = kmeans_model.fit_predict(data) 14 | sum_distance = 0 15 | for i in range(len(kmeans_model.cluster_centers_)): 16 | sum_distance += sum(sum((data[predict1 == i] - kmeans_model.cluster_centers_[i]) ** 2)) 17 | return sum_distance 18 | 19 | 20 | def calculate_kmeans_loss(n_clusters, data): 21 | kmeans_model = KMeans(n_clusters=n_clusters, random_state=0) 22 | kmeans_model.fit(X_train_std) 23 | return loss_function(data, kmeans_model) 24 | 25 | 26 | iris = datasets.load_iris() 27 | X = iris.data[:, [2, 3]] 28 | y = iris.target 29 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) 30 | scaler = StandardScaler() 31 | scaler.fit(X_train) 32 | X_train_std = scaler.transform(X_train) 33 | X_test_std = scaler.transform(X_test) 34 | X_combined_std = np.vstack((X_train_std, X_test_std)) 35 | y_combined = np.hstack((y_train, y_test)) 36 | 37 | loss_array = [calculate_kmeans_loss(i, X_test_std) for i in range(1, 6)] 38 | 39 | plt.figure(1) 40 | plt.scatter([1, 2, 3, 4, 5], loss_array) 41 | plt.show() 42 | 43 | kmeans_model = KMeans(n_clusters=5, random_state=0) 44 | kmeans_model.fit(X_train_std) 45 | loss = loss_function(X_test_std, kmeans_model) 46 | print(kmeans_model.cluster_centers_) 47 | print(kmeans_model.fit_predict(X_test_std)) 48 | print('loss is : {} when k = 5'.format(loss)) -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Lecture Code/8-Employee_Example.py: -------------------------------------------------------------------------------- 1 | class EmployeeRecord: 2 | def __init__(self, n, i, r): 3 | self.name = n 4 | self.id = i 5 | self.pay_rate = r 6 | 7 | def open_database(filename, db): 8 | lines = open(filename) 9 | for line in lines: 10 | name, id, rate = eval(line) 11 | db.append(EmployeeRecord(name, id, rate)) 12 | lines.close() 13 | return True 14 | 15 | def print_database(db): 16 | for rec in db: 17 | print(str.format("{:>5}: {:<10} {:>6.2f}", 18 | rec.id, rec.name, rec.pay_rate)) 19 | 20 | def less_than_by_name(e1, e2): 21 | return e1.name < e2.name 22 | 23 | def less_than_by_id(e1, e2): 24 | return e1.id < e2.id 25 | 26 | def less_than_by_pay(e1, e2): 27 | return e1.pay_rate < e2.pay_rate 28 | 29 | def sort(db, comp): 30 | n = len(db) 31 | for i in range(n - 1): 32 | smallest = i 33 | 34 | for j in range(i + 1, n): 35 | if comp(db[j], db[smallest]): 36 | smallest = j 37 | if smallest != i: 38 | db[i], db[smallest] = db[smallest], db[i] 39 | def main(): 40 | database = [] 41 | if open_database("data.dat", database): 42 | print("---- Unsorted:") 43 | print_database(database) 44 | sort(database, less_than_by_name) 45 | print("---- Name order:") 46 | print_database(database) 47 | sort(database, less_than_by_id) 48 | print("---- ID order:") 49 | print_database(database) 50 | sort(database, less_than_by_pay) 51 | print("---- Pay order:") 52 | print_database(database) 53 | else: # Error, could not open file 54 | print("Could not open database file") 55 | main() 56 | print('#',50*"-") -------------------------------------------------------------------------------- /Unsuperised_Learning/Mean-Shift/Sample_Meanshift_iris.py: -------------------------------------------------------------------------------- 1 | from sklearn import datasets 2 | import matplotlib.pyplot as plt 3 | from itertools import cycle 4 | import numpy as np 5 | from sklearn.cluster import MeanShift 6 | from sklearn.preprocessing import StandardScaler 7 | 8 | 9 | def load_and_transform_data(): 10 | iris = datasets.load_iris() 11 | input_data = iris.data[:, [2, 3]] 12 | scaler = StandardScaler() 13 | scaler.fit(input_data) 14 | return scaler.transform(input_data) 15 | 16 | 17 | def apply_meanshift_clustering(input_data): 18 | ms_clustering_model = MeanShift(bandwidth=None, seeds=None, n_jobs=1) 19 | ms_clustering_model.fit(input_data) 20 | labels = ms_clustering_model.labels_ 21 | cluster_centers = ms_clustering_model.cluster_centers_ 22 | unique_labels = np.unique(labels) 23 | clusters_count = len(unique_labels) 24 | return labels, cluster_centers, clusters_count 25 | 26 | 27 | def plot_clusters(input_data, labels, cluster_centers, clusters_count): 28 | plt.figure(1) 29 | plt.clf() 30 | colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk') 31 | for cluster_idx, color in zip(range(clusters_count), colors): 32 | cluster_members = labels == cluster_idx 33 | cluster_center = cluster_centers[cluster_idx] 34 | plt.plot(input_data[cluster_members, 0], input_data[cluster_members, 1], color + '.') 35 | plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=color, 36 | markeredgecolor='k', markersize=14) 37 | plt.title('Estimated number of clusters: %d' % clusters_count) 38 | plt.show() 39 | 40 | 41 | input_data = load_and_transform_data() 42 | labels, cluster_centers, clusters_count = apply_meanshift_clustering(input_data) 43 | print(f"number of estimated clusters : {clusters_count}") 44 | plot_clusters(input_data, labels, cluster_centers, clusters_count) -------------------------------------------------------------------------------- /Data_Structure/Trie/Sample_code.py: -------------------------------------------------------------------------------- 1 | class TrieNode: 2 | def __init__(self): 3 | self.children = {} 4 | self.is_end_of_word = False 5 | 6 | 7 | class Trie: 8 | def __init__(self): 9 | self.root = TrieNode() 10 | 11 | def insert(self, word): 12 | current = self.root 13 | for char in word: 14 | if char not in current.children: 15 | current.children[char] = TrieNode() 16 | current = current.children[char] 17 | current.is_end_of_word = True 18 | 19 | def search(self, word): 20 | current = self.root 21 | for char in word: 22 | if char not in current.children: 23 | return False 24 | current = current.children[char] 25 | return current.is_end_of_word 26 | 27 | def starts_with(self, prefix): 28 | current = self.root 29 | for char in prefix: 30 | if char not in current.children: 31 | return False 32 | current = current.children[char] 33 | return True 34 | 35 | def autocomplete(self, prefix): 36 | current = self.root 37 | for char in prefix: 38 | if char not in current.children: 39 | return [] 40 | current = current.children[char] 41 | 42 | suggestions = [] 43 | 44 | def dfs(node, path): 45 | if node.is_end_of_word: 46 | suggestions.append(''.join(path)) 47 | for char, child_node in node.children.items(): 48 | dfs(child_node, path + [char]) 49 | 50 | dfs(current, list(prefix)) 51 | return suggestions 52 | 53 | 54 | # Example Usage 55 | trie = Trie() 56 | words = ["cat", "car", "cart", "carbon", "dog", "dove", "door"] 57 | for word in words: 58 | trie.insert(word) 59 | 60 | prefix = "car" 61 | print(f"Words starting with '{prefix}': {trie.autocomplete(prefix)}") -------------------------------------------------------------------------------- /Unsuperised_Learning/Mean-Shift/Sample_Meanshift.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from itertools import cycle 4 | from sklearn.datasets import make_blobs 5 | from sklearn.cluster import MeanShift, estimate_bandwidth 6 | 7 | CYCLE_COLORS = 'bgrcmykbgrcmykbgrcmykbgrcmyk' 8 | BLOB_CENTERS = [[1, 1], [-1, -1], [1, -1]] 9 | BLOB_SAMPLES = 10000 10 | BLOB_STD_DEVIATION = 0.6 11 | BANDWIDTH_QUANTILE = 0.2 12 | SAMPLE_BANDWIDTH = 500 13 | PLOT_MARKER_SIZE = 14 14 | 15 | 16 | def generate_data() -> np.array: 17 | data, _ = make_blobs(n_samples=BLOB_SAMPLES, centers=BLOB_CENTERS, cluster_std=BLOB_STD_DEVIATION) 18 | return data 19 | 20 | 21 | def mean_shift_clustering(data: np.array) -> tuple: 22 | bandwidth = estimate_bandwidth(data, quantile=BANDWIDTH_QUANTILE, n_samples=SAMPLE_BANDWIDTH) 23 | ms = MeanShift(bandwidth=bandwidth, bin_seeding=True) 24 | ms.fit(data) 25 | cluster_labels = ms.labels_ 26 | cluster_centers = ms.cluster_centers_ 27 | unique_labels = np.unique(cluster_labels) 28 | return cluster_labels, cluster_centers, unique_labels 29 | 30 | 31 | def plot_data(data: np.array, labels: np.array, centers: np.array, unique_labels: np.array): 32 | plt.figure(1) 33 | plt.clf() 34 | colors = cycle(CYCLE_COLORS) 35 | num_clusters = len(unique_labels) 36 | for cluster_id, color in zip(range(num_clusters), colors): 37 | members_of_cluster = labels == cluster_id 38 | cluster_center = centers[cluster_id] 39 | plt.plot(data[members_of_cluster, 0], data[members_of_cluster, 1], color + '.') 40 | plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=color, 41 | markeredgecolor='k', markersize=PLOT_MARKER_SIZE) 42 | plt.title(f'Estimated number of clusters: {num_clusters}') 43 | plt.show() 44 | 45 | 46 | sample_data = generate_data() 47 | labels, centers, unique_labels = mean_shift_clustering(sample_data) 48 | plot_data(sample_data, labels, centers, unique_labels) -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/2- Lecture_2(Python Intermediate)/Class Ex/Class-Ex-Lecture2.py: -------------------------------------------------------------------------------- 1 | # ================================================================= 2 | # Class_Ex1: 3 | # Write a program that simulates the rolling of a die. 4 | # ---------------------------------------------------------------- 5 | 6 | 7 | 8 | 9 | # ================================================================= 10 | # Class_Ex2: 11 | # Answer Ex1 by using functions. 12 | # ---------------------------------------------------------------- 13 | 14 | 15 | 16 | 17 | 18 | # ================================================================= 19 | # Class_Ex3: 20 | # Randomly Permuting a List 21 | # ---------------------------------------------------------------- 22 | 23 | 24 | 25 | 26 | # ================================================================= 27 | # Class_Ex4: 28 | # Write a program to convert a tuple to a string. 29 | # ---------------------------------------------------------------- 30 | 31 | 32 | 33 | 34 | # ================================================================= 35 | # Class_Ex5: 36 | # Write a program to get the 3th element and 3th element from last of a tuple. 37 | # ---------------------------------------------------------------- 38 | 39 | 40 | 41 | 42 | # ================================================================= 43 | # Class_Ex6: 44 | # Write a program to check if an element exists in a tuple or not. 45 | # ---------------------------------------------------------------- 46 | 47 | 48 | 49 | 50 | # ================================================================= 51 | # Class_Ex7: 52 | # Write a program to check a list is empty or not. 53 | # ---------------------------------------------------------------- 54 | 55 | 56 | 57 | 58 | # ================================================================= 59 | # Class_Ex8: 60 | # Write a program to generate a 4*5*3 3D array that each element is O. 61 | # ---------------------------------------------------------------- 62 | 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /Unsuperised_Learning/Affinity_Propagation/Affnity_Propagation_iris.py: -------------------------------------------------------------------------------- 1 | from sklearn.cluster import AffinityPropagation 2 | from sklearn import datasets 3 | from sklearn.preprocessing import StandardScaler 4 | import matplotlib.pyplot as plt 5 | from itertools import cycle 6 | 7 | AFFINITY_PROPAGATION_PARAMS = { 8 | "damping": 0.9, 9 | "max_iter": 500, 10 | "convergence_iter": 150, 11 | "copy": True, 12 | "preference": -30, 13 | "affinity": 'euclidean' 14 | } 15 | 16 | 17 | def preprocess_data(data): 18 | sc = StandardScaler() 19 | sc.fit(data) 20 | return sc.transform(data) 21 | 22 | 23 | def perform_clustering(data, params): 24 | af = AffinityPropagation(**params) 25 | af.fit(data) 26 | return af.cluster_centers_indices_, af.labels_ 27 | 28 | 29 | def plot_clusters(data, centers_indices, data_labels): 30 | plt.figure(1) 31 | plt.clf() 32 | colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk') 33 | n_clusters = len(centers_indices) 34 | for k, col in zip(range(n_clusters), colors): 35 | class_members = data_labels == k 36 | cluster_center = data[centers_indices[k]] 37 | plt.plot(data[class_members, 0], data[class_members, 1], col + '.') 38 | plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, 39 | markeredgecolor='k', markersize=14) 40 | for point in data[class_members]: 41 | plt.plot([cluster_center[0], point[0]], [cluster_center[1], point[1]], col) 42 | plt.title('Estimated number of clusters: %d' % n_clusters) 43 | plt.show() 44 | 45 | 46 | if __name__ == "__main__": 47 | iris = datasets.load_iris() 48 | iris_features = iris.data[:, [2, 3]] 49 | processed_data = preprocess_data(iris_features) 50 | 51 | cluster_centers_indices, labels = perform_clustering(processed_data, AFFINITY_PROPAGATION_PARAMS) 52 | print('Estimated number of clusters: %d' % len(cluster_centers_indices)) 53 | 54 | plot_clusters(processed_data, cluster_centers_indices, labels) -------------------------------------------------------------------------------- /Supervised_Learning/Decision_Tree/DT-Graphing/Decision_tree_graphviz_web.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.tree import DecisionTreeClassifier 5 | from sklearn.metrics import accuracy_score 6 | from sklearn import tree 7 | import os 8 | os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/' 9 | 10 | 11 | 12 | balance_data = pd.read_csv( 13 | 'https://archive.ics.uci.edu/ml/machine-learning-databases/balance-scale/balance-scale.data', 14 | sep= ',', header= None) 15 | 16 | 17 | 18 | print("Dataset Lenght:: ", len(balance_data)) 19 | print ("Dataset Shape:: ", balance_data.shape) 20 | 21 | 22 | 23 | X = balance_data.values[:, 1:5] 24 | Y = balance_data.values[:,0] 25 | 26 | 27 | 28 | X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size = 0.3, random_state = 100) 29 | 30 | 31 | 32 | clf_gini = DecisionTreeClassifier(criterion = "gini", random_state = 100, 33 | max_depth=3, min_samples_leaf=5) 34 | clf_gini.fit(X_train, y_train) 35 | 36 | 37 | 38 | DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=3, 39 | max_features=None, max_leaf_nodes=None, min_samples_leaf=5, 40 | min_samples_split=2, min_weight_fraction_leaf=0.0, 41 | presort=False, random_state=100, splitter='best') 42 | 43 | clf_entropy = DecisionTreeClassifier(criterion = "entropy", random_state = 100, 44 | max_depth=3, min_samples_leaf=5) 45 | clf_entropy.fit(X_train, y_train) 46 | 47 | 48 | print (clf_gini.predict([[4, 4, 3, 3]])) 49 | 50 | y_pred = clf_gini.predict(X_test) 51 | print( y_pred) 52 | 53 | 54 | 55 | 56 | y_pred_en = clf_entropy.predict(X_test) 57 | print( y_pred_en) 58 | 59 | print( "Accuracy is ", accuracy_score(y_test, y_pred) * 100) 60 | 61 | 62 | print ("Accuracy is ", accuracy_score(y_test,y_pred_en)*100) 63 | 64 | tree.export_graphviz(clf_gini, out_file='tree.dot') 65 | 66 | # http://webgraphviz.com/ 67 | 68 | -------------------------------------------------------------------------------- /Python/05-Pandas/Lecture Code/microbiome.csv: -------------------------------------------------------------------------------- 1 | Taxon,Patient,Group,Tissue,Stool 2 | Firmicutes,1,0,136,4182 3 | Firmicutes,2,1,1174,703 4 | Firmicutes,3,0,408,3946 5 | Firmicutes,4,1,831,8605 6 | Firmicutes,5,0,693,50 7 | Firmicutes,6,1,718,717 8 | Firmicutes,7,0,173,33 9 | Firmicutes,8,1,228,80 10 | Firmicutes,9,0,162,3196 11 | Firmicutes,10,1,372,32 12 | Firmicutes,11,0,4255,4361 13 | Firmicutes,12,1,107,1667 14 | Firmicutes,13,0,96,223 15 | Firmicutes,14,1,281,2377 16 | Proteobacteria,1,0,2469,1821 17 | Proteobacteria,2,1,839,661 18 | Proteobacteria,3,0,4414,18 19 | Proteobacteria,4,1,12044,83 20 | Proteobacteria,5,0,2310,12 21 | Proteobacteria,6,1,3053,547 22 | Proteobacteria,7,0,395,2174 23 | Proteobacteria,8,1,2651,767 24 | Proteobacteria,9,0,1195,76 25 | Proteobacteria,10,1,6857,795 26 | Proteobacteria,11,0,483,666 27 | Proteobacteria,12,1,2950,3994 28 | Proteobacteria,13,0,1541,816 29 | Proteobacteria,14,1,1307,53 30 | Actinobacteria,1,0,1590,4 31 | Actinobacteria,2,1,25,2 32 | Actinobacteria,3,0,259,300 33 | Actinobacteria,4,1,568,7 34 | Actinobacteria,5,0,1102,9 35 | Actinobacteria,6,1,678,377 36 | Actinobacteria,7,0,260,58 37 | Actinobacteria,8,1,424,233 38 | Actinobacteria,9,0,548,21 39 | Actinobacteria,10,1,201,83 40 | Actinobacteria,11,0,42,75 41 | Actinobacteria,12,1,109,59 42 | Actinobacteria,13,0,51,183 43 | Actinobacteria,14,1,310,204 44 | Bacteroidetes,1,0,67,0 45 | Bacteroidetes,2,1,0,0 46 | Bacteroidetes,3,0,85,5 47 | Bacteroidetes,4,1,143,7 48 | Bacteroidetes,5,0,678,2 49 | Bacteroidetes,6,1,4829,209 50 | Bacteroidetes,7,0,74,651 51 | Bacteroidetes,8,1,169,254 52 | Bacteroidetes,9,0,106,10 53 | Bacteroidetes,10,1,73,381 54 | Bacteroidetes,11,0,30,359 55 | Bacteroidetes,12,1,51,51 56 | Bacteroidetes,13,0,2473,2314 57 | Bacteroidetes,14,1,102,33 58 | Other,1,0,195,18 59 | Other,2,1,42,2 60 | Other,3,0,316,43 61 | Other,4,1,202,40 62 | Other,5,0,116,0 63 | Other,6,1,527,12 64 | Other,7,0,357,11 65 | Other,8,1,106,11 66 | Other,9,0,67,14 67 | Other,10,1,203,6 68 | Other,11,0,392,6 69 | Other,12,1,28,25 70 | Other,13,0,12,22 71 | Other,14,1,305,32 -------------------------------------------------------------------------------- /Python/01-Pyhton-Programming/3- Lecture_3(Python Adavnce)/Class Ex/Class_Questions.py: -------------------------------------------------------------------------------- 1 | # --------------------------Q1-------------------------------------------------------- 2 | # Write a Python program to crate two empty classes, Student and TA. 3 | # Now create some instances and check whether they are instances of the said classes or not. 4 | # Also, check whether the said classes are subclasses of the built-in object class or not. 5 | # ------------------------------------------------------------------ 6 | # Use isinstance 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | # --------------------------Q2-------------------------------------------------------- 19 | # Write a Python class named Student with two attributes student_name, grade. 20 | # Modify the attribute values of the said class and print the original and modified values of the said attributes. 21 | # ------------------------------------------------------------------ 22 | # use getattr and setattr 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | # --------------------------Q3-------------------------------------------------------- 35 | # Write a Python class named Student with two instances student1, student2 and 36 | # assign given values to the said instances attributes. 37 | # Print all the attributes of student1, student2 instances with their values in the given format. 38 | # 39 | # Input values of the instances: 40 | # student_1: 41 | # student_id = "12" 42 | # student_name = "Amir Jafari" 43 | # student_2: 44 | # student_id = "12" 45 | # grade_language = 85 46 | # grade_science = 93 47 | # grade_math = 95 48 | # Expected Output: 49 | # student_id -> V12 50 | # student_name -> Amir Jafari 51 | # student_id -> 12 52 | # grade_language -> 85 53 | # grade_science -> 93 54 | # grade_math -> 95 55 | # ------------------------------------------------------------------ 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | # --------------------------Q4-------------------------------------------------------- 71 | # Write a Python class which has two methods get_String and print_String. 72 | # get_String accept a string from the user and print_String print the string in upper case. 73 | # ------------------------------------------------------------------ 74 | 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /Supervised_Learning/KNN/Sample_KNN_Example_iris.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import LabelEncoder, StandardScaler 3 | from sklearn.metrics import confusion_matrix, accuracy_score, classification_report 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.neighbors import KNeighborsClassifier 6 | import seaborn as sns 7 | import matplotlib.pyplot as plt 8 | import warnings 9 | 10 | warnings.filterwarnings("ignore") 11 | DATA_PATH = "Data/iris.data.csv" 12 | 13 | 14 | def load_dataset(): 15 | col_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'] 16 | data = pd.read_csv(DATA_PATH, header=None, names=col_names) 17 | X = data.values[:, :-1] 18 | Y = data.values[:, -1] 19 | return X, Y, data['class'].unique() 20 | 21 | 22 | def train_model(X, Y): 23 | label_encoder = LabelEncoder() 24 | y = label_encoder.fit_transform(Y) 25 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100, stratify=y) 26 | 27 | scaler = StandardScaler() 28 | scaler.fit(X_train) 29 | 30 | X_train_std = scaler.transform(X_train) 31 | X_test_std = scaler.transform(X_test) 32 | 33 | knn_classifier = KNeighborsClassifier(n_neighbors=3) 34 | knn_classifier.fit(X_train_std, y_train) 35 | 36 | y_pred = knn_classifier.predict(X_test_std) 37 | print("Classification Report: ") 38 | print(classification_report(y_test, y_pred)) 39 | print("Accuracy : ", accuracy_score(y_test, y_pred) * 100) 40 | 41 | return y_test, y_pred 42 | 43 | 44 | def show_matrix(y_test, y_pred, class_names): 45 | conf_matrix = confusion_matrix(y_test, y_pred) 46 | df_cm = pd.DataFrame(conf_matrix, index=class_names, columns=class_names) 47 | 48 | plt.figure(figsize=(5, 5)) 49 | hm = sns.heatmap(df_cm, cbar=False, annot=True, square=True, fmt='d', annot_kws={'size': 20}, 50 | yticklabels=df_cm.columns, xticklabels=df_cm.columns) 51 | hm.yaxis.set_ticklabels(hm.yaxis.get_ticklabels(), rotation=45, ha='right', fontsize=10) 52 | hm.xaxis.set_ticklabels(hm.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=10) 53 | plt.ylabel('True label', fontsize=20) 54 | plt.xlabel('Predicted label', fontsize=20) 55 | plt.tight_layout() 56 | plt.show() 57 | 58 | 59 | X, Y, class_names = load_dataset() 60 | y_test, y_pred = train_model(X, Y) 61 | show_matrix(y_test, y_pred, class_names) -------------------------------------------------------------------------------- /Unsuperised_Learning/Agglomerative/Sample_Agglomerative.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | import numpy as np 3 | from scipy import ndimage 4 | from matplotlib import pyplot as plt 5 | from sklearn import manifold, datasets 6 | from sklearn.cluster import AgglomerativeClustering 7 | 8 | N_COMPONENTS = 2 9 | N_CLUSTERS = 10 10 | 11 | 12 | def plot_clustering(X_red, X, labels, title=None): 13 | x_min, x_max = np.min(X_red, axis=0), np.max(X_red, axis=0) 14 | X_red = (X_red - x_min) / (x_max - x_min) 15 | plt.figure(figsize=(6, 4)) 16 | for i in range(X_red.shape[0]): 17 | plt.text(X_red[i, 0], X_red[i, 1], str(labels[i]), 18 | color=plt.cm.Spectral(labels[i] / 10.), 19 | fontdict={'weight': 'bold', 'size': 9}) 20 | plt.xticks([]) 21 | plt.yticks([]) 22 | if title is not None: 23 | plt.title(title, size=17) 24 | plt.axis('off') 25 | plt.tight_layout() 26 | 27 | 28 | digits = datasets.load_digits(n_class=10) 29 | images = digits.data 30 | labels = digits.target 31 | n_samples, n_features = images.shape 32 | np.random.seed(0) 33 | 34 | 35 | def augment_dataset(images, labels): 36 | shift = lambda x: ndimage.shift(x.reshape((8, 8)), 37 | .3 * np.random.normal(size=2), 38 | mode='constant', 39 | ).ravel() 40 | images = np.concatenate([images, np.apply_along_axis(shift, 1, images)]) 41 | labels = np.concatenate([labels, labels], axis=0) 42 | return images, labels 43 | 44 | 45 | images, labels = augment_dataset(images, labels) 46 | 47 | 48 | def embed_data(images): 49 | print("Computing embedding") 50 | images_transformed = manifold.SpectralEmbedding(n_components=N_COMPONENTS).fit_transform(images) 51 | print("Done.") 52 | return images_transformed 53 | 54 | 55 | def create_plot_clusters(type, images_transformed, images, labels): 56 | print(f"{type} linkage") 57 | clustering = AgglomerativeClustering(linkage=type, n_clusters=N_CLUSTERS) 58 | t0 = time() 59 | clustering.fit(images_transformed) 60 | print(f"{type} : {time() - t0:.2f}s") 61 | plot_clustering(images_transformed, images, clustering.labels_, f"{type} linkage") 62 | 63 | 64 | images_transformed = embed_data(images) 65 | for type in ('ward', 'average', 'complete'): 66 | create_plot_clusters(type, images_transformed, images, labels) 67 | 68 | plt.show() -------------------------------------------------------------------------------- /Supervised_Learning/Decision_Tree/DT/Readme.md: -------------------------------------------------------------------------------- 1 | 2 | ## Decision Tree Algorithm 3 | 4 | ### Dataset - 1 : 5 | 6 | **1. Balance Scale Weight & Distance Data** 7 | 8 | https://archive.ics.uci.edu/ml/machine-learning-databases/balance-scale/balance-scale.data 9 | 10 | **2. Data Set Information:** 11 | 12 | This data set was generated to model psychologicalexperimental results. Each example is classified 13 | as having the balance scale tip to the right, tip to the left, or be balanced. The attributes are 14 | the left weight, the left distance, the right weight, and the right distance. 15 | The correct way to find the class is the greater of (left-distance * left-weight) and 16 | (right-distance * right-weight) If they are equal, it is balanced. 17 | 18 | **3. Number of Instances: 625** 19 | 20 | **4. Number of Attributes: 5** 21 | 22 | **5. Attribute Information:** 23 | 24 | Target 25 | - Class Name (Left, Balanced, Right) - Categorical 26 | 27 | Predictors 28 | - Left-Weight - Numeric 29 | - Left-Distance - Numeric 30 | - Right-Weight - Numeric 31 | - Right-Distance - Numeric 32 | 33 | https://archive.ics.uci.edu/ml/machine-learning-databases/balance-scale/balance-scale.names 34 | 35 | 36 | ### Dataset - 2 : 37 | 38 | **1. Banknote Authentication Data** 39 | 40 | http://archive.ics.uci.edu/ml/datasets/banknote+authentication 41 | 42 | **2. Data Set Information:** 43 | 44 | Data were extracted from images that were taken from genuine and forged banknote-like specimens. 45 | For digitization, an industrial camera usually used for print inspection was used. The final images 46 | have 400x 400 pixels. Due to the object lens and distance to the investigated object gray-scale 47 | pictures with a resolution of about 660 dpi were gained. Wavelet Transform tool were used to extract 48 | features from images. 49 | 50 | 51 | **3. Number of Instances: 1372** 52 | 53 | **4. Number of Attributes: 5** 54 | 55 | **5. Attribute Information:** 56 | 57 | Target 58 | - Class (Genuine, Forged) - Categorical 59 | 60 | Predictors 61 | - Variance of Wavelet Transformed image - Numeric 62 | - Skewness of Wavelet Transformed image - Numeric 63 | - Curtosis of Wavelet Transformed image - Numeric 64 | - Entropy of image - Numeric 65 | 66 | 67 | http://archive.ics.uci.edu/ml/datasets/banknote+authentication 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /Supervised_Learning/Naive-Bayes/Sample_NB_Example.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, classification_report 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.naive_bayes import GaussianNB 5 | import seaborn as sns 6 | import matplotlib.pyplot as plt 7 | import warnings 8 | 9 | warnings.filterwarnings("ignore") 10 | 11 | 12 | def load_data(file_path): 13 | data = pd.read_csv(file_path, sep=',', header=0) 14 | print("Dataset No. of Rows: ", data.shape[0]) 15 | print("Dataset No. of Columns: ", data.shape[1]) 16 | print(data.head(2)) 17 | print(data.info()) 18 | print(data.describe(include='all')) 19 | return data 20 | 21 | 22 | def split_data(data): 23 | X = data.values[:, :-1] 24 | Y = data.values[:, -1] 25 | X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=0) 26 | return X_train, X_test, y_train, y_test 27 | 28 | 29 | def train_model(X, y): 30 | clf = GaussianNB() 31 | clf.fit(X, y) 32 | return clf 33 | 34 | 35 | def evaluate_model(clf, X, y): 36 | y_pred = clf.predict(X) 37 | y_pred_score = clf.predict_proba(X) 38 | print("Classification Report: ") 39 | print(classification_report(y, y_pred)) 40 | print("Accuracy : ", accuracy_score(y, y_pred) * 100) 41 | print("\n") 42 | print("ROC_AUC : ", roc_auc_score(y, y_pred_score[:, 1]) * 100) 43 | print("\n") 44 | return confusion_matrix(y, y_pred) 45 | 46 | 47 | def confusion_matrix_heatmap(data, cf_matrix): 48 | class_names = data['Outcome'].unique() 49 | df_cm = pd.DataFrame(cf_matrix, index=class_names, columns=class_names) 50 | 51 | plt.figure(figsize=(5, 5)) 52 | hm = sns.heatmap(df_cm, cbar=False, annot=True, square=True, fmt='d', 53 | annot_kws={'size': 20}, yticklabels=df_cm.columns, xticklabels=df_cm.columns) 54 | hm.yaxis.set_ticklabels(hm.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=20) 55 | hm.xaxis.set_ticklabels(hm.xaxis.get_ticklabels(), rotation=0, ha='right', fontsize=20) 56 | plt.ylabel('True label', fontsize=20) 57 | plt.xlabel('Predicted label', fontsize=20) 58 | plt.tight_layout() 59 | plt.show() 60 | 61 | 62 | data = load_data('Data/pima_diabetes.csv') 63 | X_train, X_test, y_train, y_test = split_data(data) 64 | model = train_model(X_train, y_train) 65 | cf_matrix = evaluate_model(model, X_test, y_test) 66 | confusion_matrix_heatmap(data, cf_matrix) -------------------------------------------------------------------------------- /Python/04-Seaborn/Class Ex/Class-Ex-Lecture4_3.py: -------------------------------------------------------------------------------- 1 | # ================================================================= 2 | # Class_Ex1: 3 | # We will be working with a famous titanic data set for these exercises. 4 | # Later on in the Data mining section of the course, we will work this data, 5 | # and use it to predict survival rates of passengers. 6 | # For now, we'll just focus on the visualization of the data with seaborn: 7 | 8 | # use seaboran to load dataset 9 | # ---------------------------------------------------------------- 10 | import seaborn as sns 11 | import matplotlib.pyplot as plt 12 | sns.set_style('whitegrid') 13 | # titanic = sns.load_dataset('') load titanic 14 | # ================================================================= 15 | # Class_Ex2: 16 | # Join plot on fare and age 17 | # ---------------------------------------------------------------- 18 | 19 | 20 | 21 | 22 | 23 | # ================================================================= 24 | # Class_Ex3: 25 | # Distribution plot on fare with red color and 35 bin 26 | # ---------------------------------------------------------------- 27 | 28 | 29 | 30 | 31 | # ================================================================= 32 | # Class_Ex4: 33 | # box plot on class and age 34 | # ---------------------------------------------------------------- 35 | 36 | 37 | 38 | 39 | 40 | # ================================================================= 41 | # Class_Ex5: 42 | # swarmplot on class and age 43 | # ---------------------------------------------------------------- 44 | 45 | 46 | 47 | 48 | 49 | # ================================================================= 50 | # Class_Ex6: 51 | # Count plot on sex 52 | # ---------------------------------------------------------------- 53 | 54 | 55 | 56 | 57 | 58 | # ================================================================= 59 | # Class_Ex7: 60 | # plot heatmap 61 | # ---------------------------------------------------------------- 62 | 63 | 64 | 65 | 66 | 67 | 68 | # ================================================================= 69 | # Class_Ex8: 70 | # Distribution of male and female ages in same grapgh (Facet) 71 | # ---------------------------------------------------------------- 72 | 73 | 74 | 75 | 76 | 77 | # ================================================================= 78 | # Class_Ex9: 79 | # Explain each graph and describe the results in words 80 | # ---------------------------------------------------------------- 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /Supervised_Learning/Logitic_Regression/Sample_Logistic_Reg_Example_wine.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, roc_auc_score 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.linear_model import LogisticRegression 5 | import seaborn as sns 6 | import matplotlib.pyplot as plt 7 | import warnings 8 | 9 | warnings.filterwarnings("ignore") 10 | 11 | DATA_PATH = "Data/winequality-red.csv" 12 | CSV_DELIMITER = ';' 13 | RANDOM_STATE = 100 14 | TEST_SIZE = 0.3 15 | 16 | 17 | def load_and_prepare_data(): 18 | wine_data = pd.read_csv(DATA_PATH, sep=CSV_DELIMITER) 19 | wine_data['quality'] = wine_data['quality'].apply(lambda x: 0 if x <= 5 else 1) 20 | 21 | X = wine_data.values[:, :-1] 22 | Y = wine_data.values[:, -1] 23 | 24 | return train_test_split(X, Y, test_size=TEST_SIZE, random_state=RANDOM_STATE), wine_data 25 | 26 | 27 | def perform_training(X_train, y_train): 28 | clf = LogisticRegression() 29 | clf.fit(X_train, y_train) 30 | 31 | return clf 32 | 33 | 34 | def evaluate_model(clf, X_test, y_test, wine_data): 35 | y_pred = clf.predict(X_test) 36 | y_pred_score = clf.predict_proba(X_test) 37 | 38 | print("Classification Report: ") 39 | print(classification_report(y_test, y_pred)) 40 | print("Accuracy : ", accuracy_score(y_test, y_pred) * 100) 41 | print("ROC_AUC : ", roc_auc_score(y_test, y_pred_score[:, 1]) * 100) 42 | 43 | display_confusion_matrix(y_test, y_pred, wine_data) 44 | 45 | 46 | def display_confusion_matrix(y_test, y_pred, wine_data): 47 | conf_matrix = confusion_matrix(y_test, y_pred) 48 | class_names = wine_data['quality'].unique() 49 | df_cm = pd.DataFrame(conf_matrix, index=class_names, columns=class_names) 50 | plt.figure(figsize=(5, 5)) 51 | hm = sns.heatmap(df_cm, cbar=False, annot=True, square=True, fmt='d', annot_kws={'size': 20}, 52 | yticklabels=df_cm.columns, xticklabels=df_cm.columns) 53 | hm.yaxis.set_ticklabels(hm.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=20) 54 | hm.xaxis.set_ticklabels(hm.xaxis.get_ticklabels(), rotation=0, ha='right', fontsize=20) 55 | plt.ylabel('True label', fontsize=20) 56 | plt.xlabel('Predicted label', fontsize=20) 57 | plt.tight_layout() 58 | plt.show() 59 | 60 | 61 | def main(): 62 | (X_train, X_test, y_train, y_test), wine_data = load_and_prepare_data() 63 | clf = perform_training(X_train, y_train) 64 | evaluate_model(clf, X_test, y_test, wine_data) 65 | 66 | 67 | if __name__ == "__main__": 68 | main() -------------------------------------------------------------------------------- /Unsuperised_Learning/Affinity_Propagation/Sample_Affinity_Propagation.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from itertools import cycle 3 | from sklearn.cluster import AffinityPropagation 4 | from sklearn import metrics 5 | from sklearn.datasets import make_blobs 6 | 7 | 8 | def generate_sample_data(): 9 | centers = [[1, 1], [-1, -1], [1, -1]] 10 | X, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.5, 11 | random_state=0) 12 | return X, labels_true 13 | 14 | 15 | def compute_affinity_propagation(X): 16 | clustering_model = AffinityPropagation(preference=-50).fit(X) 17 | cluster_centers_indices = clustering_model.cluster_centers_indices_ 18 | labels = clustering_model.labels_ 19 | num_clusters = len(cluster_centers_indices) 20 | return num_clusters, cluster_centers_indices, labels 21 | 22 | 23 | def print_cluster_evaluation_scores(labels_true, labels): 24 | print('Estimated number of clusters: %d' % len(set(labels))) 25 | print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) 26 | print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels)) 27 | print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels)) 28 | print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels)) 29 | print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels)) 30 | print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels, metric='sqeuclidean')) 31 | 32 | 33 | def plot_clusters(X, cluster_centers_indices, labels, num_clusters): 34 | plt.close('all') 35 | plt.figure(1) 36 | plt.clf() 37 | 38 | colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk') 39 | for k, col in zip(range(num_clusters), colors): 40 | class_members = labels == k 41 | cluster_center = X[cluster_centers_indices[k]] 42 | plt.plot(X[class_members, 0], X[class_members, 1], col + '.') 43 | plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, 44 | markeredgecolor='k', markersize=14) 45 | for x in X[class_members]: 46 | plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col) 47 | 48 | plt.title('Estimated number of clusters: %d' % num_clusters) 49 | plt.show() 50 | 51 | 52 | X, labels_true = generate_sample_data() 53 | num_clusters, cluster_centers_indices, labels = compute_affinity_propagation(X) 54 | print_cluster_evaluation_scores(labels_true, labels) 55 | plot_clusters(X, cluster_centers_indices, labels, num_clusters) -------------------------------------------------------------------------------- /Python/06-Scipy/Lecture Code/data.dat: -------------------------------------------------------------------------------- 1 | OPOSSUM 54113344 2 | HAIRY TAIL MOLE 33114433 3 | COMMON MOLE 32103333 4 | STAR NOSE MOLE 33114433 5 | BROWN BAT 23113333 6 | SILVER HAIR BAT 23112333 7 | PIGMY BAT 23112233 8 | HOUSE BAT 23111233 9 | RED BAT 13112233 10 | HOARY BAT 13112233 11 | LUMP NOSE BAT 23112333 12 | ARMADILLO 00000088 13 | PIKA 21002233 14 | SNOWSHOE RABBIT 21003233 15 | BEAVER 11002133 16 | MARMOT 11002133 17 | GROUNDHOG 11002133 18 | PRAIRIE DOG 11002133 19 | GROUND SQUIRREL 11002133 20 | CHIPMUNK 11002133 21 | GRAY SQUIRREL 11001133 22 | FOX SQUIRREL 11001133 23 | POCKET GOPHER 11001133 24 | KANGAROO RAT 11001133 25 | PACK RAT 11000033 26 | FIELD MOUSE 11000033 27 | MUSKRAT 11000033 28 | BLACK RAT 11000033 29 | HOUSE MOUSE 11000033 30 | PORCUPINE 11001133 31 | GUINEA PIG 11001133 32 | COYOTE 13114433 33 | WOLF 33114423 34 | FOX 33114423 35 | BEAR 33114423 36 | CIVET CAT 33114422 37 | RACCOON 33114432 38 | MARTEN 33114412 39 | FISHER 33114412 40 | WEASEL 33113312 41 | MINK 33113312 42 | FERRER 33113312 43 | WOLVERINE 33114412 44 | BADGER 33113312 45 | SKUNK 33113312 46 | RIVER OTTER 33114312 47 | SEA OTTER 32113312 48 | JAGUAR 33113211 49 | OCELOT 33113211 50 | COUGAR 33113211 51 | LYNX 33113211 52 | FUR SEAL 32114411 53 | SEA LION 32114411 54 | WALRUS 10113300 55 | GREY SEAL 32113322 56 | ELEPHANT SEAL 21224411 57 | PECCARY 23113333 58 | ELK 04103333 59 | DEER 04003333 60 | MOOSE 04003333 61 | REINDEER 04103333 62 | ANTELOPE 04003333 63 | BISON 04003333 64 | MOUNTAIN GOAT 04003333 65 | MUSKOX 04003333 66 | MOUNTAIN SHEEP 04003333 67 | -------------------------------------------------------------------------------- /Supervised_Learning/Decision_Tree/DT/Sample_DT_Example_Tennis.py: -------------------------------------------------------------------------------- 1 | # %%%%%%%%%%%%% Machine Learning %%%%%%%%%%%%%%%%%%%%%%%% 2 | # %%%%%%%%%%%%% Authors %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 3 | # Dr. Amir Jafari------>Email: amir.h.jafari@okstate.edu 4 | # Deepak Agarwal------>Email:deepakagarwal@gwmail.gwu.edu 5 | # %%%%%%%%%%%%% Date: 6 | # V1 June - 05 - 2018 7 | # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 8 | # %%%%%%%%%%%%% Decision Tree %%%%%%%%%%%%%%%%%%%%%%%%%% 9 | #%%----------------------------------------------------------------------- 10 | #%%----------------------------------------------------------------------- 11 | # Exercise 12 | #%%----------------------------------------------------------------------- 13 | 14 | # 1: 15 | # Build the simple tennis table we just reviewed, in python as a dataframe. Label the columns. 16 | # We are going to calculate entropy manually, but in python. 17 | # Make sure to enter all variables as binary vs. the actual categorical names 18 | # Name the dataframe tennis_ex. 19 | #%%----------------------------------------------------------------------- 20 | 21 | 22 | 23 | 24 | #%%----------------------------------------------------------------------- 25 | # 2: 26 | # Build a function that will calculate entropy. Calculate entropy for the table we just went over 27 | # in the example, but in python 28 | # This is for the first split. 29 | #%%----------------------------------------------------------------------- 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | #%%----------------------------------------------------------------------- 39 | # 3: 40 | # Run the decision tree algorithm and find out the best feature and graph it. 41 | #%%----------------------------------------------------------------------- 42 | # Importing the required packages 43 | import warnings 44 | warnings.filterwarnings("ignore") 45 | import pandas as pd 46 | from sklearn.model_selection import train_test_split 47 | from sklearn.metrics import accuracy_score 48 | from sklearn.metrics import confusion_matrix 49 | from sklearn import tree 50 | #%%----------------------------------------------------------------------- 51 | import os 52 | os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/' 53 | #%%----------------------------------------------------------------------- 54 | 55 | # Libraries to display decision tree 56 | from pydotplus import graph_from_dot_data 57 | from sklearn.tree import export_graphviz 58 | import webbrowser 59 | #%%--------------------------------Save Console---------------------------- 60 | 61 | # old_stdout = sys.stdout 62 | # log_file = open("console.txt", "w") 63 | # sys.stdout = log_file 64 | 65 | #%%----------------------------------------------------------------------- 66 | tennis = pd.read_csv('tennis.csv') 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /Supervised_Learning/Decision_Tree/DT-Graphing/Purchase_Simple_Example.py: -------------------------------------------------------------------------------- 1 | #%%================================================================================== 2 | import pandas as pd 3 | from sklearn import tree 4 | 5 | #%%----------------------------------------------------------------------- 6 | import os 7 | os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/' 8 | #%%----------------------------------------------------------------------- 9 | 10 | # Libraries to display decision tree 11 | from pydotplus import graph_from_dot_data 12 | from sklearn.tree import export_graphviz 13 | import webbrowser 14 | #================================================================================== 15 | #%% 16 | #Set up all our data in a couple of data frames. 17 | customers = pd.DataFrame() 18 | customers['purchases_amount'] = [105, 65, 89, 99, 149, 102, 34, 120, 129, 39, 19 | 20, 30, 109, 40, 55, 100, 23, 20, 70, 10] 20 | 21 | customers['purchases_items'] = [1, 4, 5, 4, 7, 1, 2, 10, 6, 5, 22 | 1, 3, 2, 1, 5, 10, 3, 3, 1, 1] 23 | 24 | customers['promo'] = [1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 25 | 1, 1, 1, 0, 1, 1, 1, 0, 1, 1] 26 | 27 | customers['email_list'] = [1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 28 | 0, 1, 1, 0, 1, 0, 1, 1, 0, 0] 29 | 30 | customers['checkouts'] = [1, 5, 3, 3, 1, 2, 4, 4, 1, 1, 31 | 1, 1, 2, 4, 1, 1, 2, 1, 1, 1] 32 | 33 | repeat_customers = pd.DataFrame() 34 | 35 | repeat_customers['repeat'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 37 | customers.head() 38 | repeat_customers.head() 39 | customers.info() 40 | repeat_customers.info() 41 | #%%================================================================================== 42 | # Need to install graphviz and pydotplus 43 | # To install pydotplus use !pip install pydotplus in the console, let's you use the console vice a command window 44 | # Alternatively you can also use the anaconda command prompt 45 | # from IPython.display import Image 46 | # The package above makes things easier to visualize 47 | # Below we are calling the packages we just installed 48 | #%%================================================================================== 49 | 50 | # Initialize and train our tree. 51 | clf_1 = tree.DecisionTreeClassifier(criterion='entropy', max_features=1, max_depth=2, random_state = 1000) 52 | 53 | clf_1.fit(customers, repeat_customers) 54 | l = customers.columns 55 | #%%================================================================================== 56 | 57 | dot_data = export_graphviz(clf_1, filled=True, rounded=True, feature_names=list(l), out_file=None) 58 | 59 | graph = graph_from_dot_data(dot_data) 60 | graph.write_pdf("Tennis.pdf") 61 | webbrowser.open_new(r'Tennis.pdf') -------------------------------------------------------------------------------- /Python/03-Matplotlib/Class Ex/Class-Ex-Lecture4_2.py: -------------------------------------------------------------------------------- 1 | # ================================================================= 2 | # Class_Ex1: 3 | # Class_Ex1: 4 | # Find the slope of the following curve for each of its points 5 | # y = np.exp(-x ** 2) 6 | # Then plot it with the original curve np.exp(-X ** 2) in the range 7 | # (-3, 3) with 100 points in the range 8 | # ---------------------------------------------------------------- 9 | 10 | 11 | 12 | 13 | # ================================================================= 14 | # Class_Ex2: 15 | # A file contains N columns of values, describing N–1 curves. 16 | # The first column contains the x coordinates, the second column 17 | # contains the y coordinates of the first curve, the third 18 | # column contains the y coordinates of the second curve, and so on. 19 | # We want to display those N–1 curves. 20 | 21 | # ---------------------------------------------------------------- 22 | 23 | 24 | 25 | 26 | 27 | # ================================================================= 28 | # Class_Ex3: 29 | # Write a efficient code to stack any number of layers of data into 30 | # a bar chart plot. 31 | # Use the following data. 32 | # ---------------------------------------------------------------- 33 | data = np.random.rand(5,3) 34 | color_list = ['b', 'g', 'r', 'k', 'y'] 35 | 36 | 37 | 38 | 39 | # ================================================================= 40 | # Class_Ex4: 41 | # Write a Python code to plot couple of lines 42 | # on same plot with suitable legends of each line. 43 | # ---------------------------------------------------------------- 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | # ================================================================= 52 | # Class_Ex5: 53 | # Write a Python code to plot two or more lines with legends, 54 | # different widths and colors. 55 | # ---------------------------------------------------------------- 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | # ================================================================= 65 | # Class_Ex6: 66 | # Write a Python code to plot two or more lines and set the line markers. 67 | # ---------------------------------------------------------------- 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | # ================================================================= 76 | # Class_Ex7: 77 | # Write a Python code to show grid and draw line graph of 78 | # revenue of certain compan between November 4, 2017 to November 4, 2018. 79 | # Customized the grid lines with linestyle -, width .6. and color blue. 80 | # ---------------------------------------------------------------- 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | # ================================================================= 89 | # Class_Ex8: 90 | # Write a Python code to create multiple empty plots in one plot 91 | # (facets) 92 | # ---------------------------------------------------------------- 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /Supervised_Learning/Random Forest/Readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Random Forest Algorithm 4 | 5 | ### Dataset - 1 : 6 | 7 | **1. Breast Cancer Data** 8 | 9 | https://www.kaggle.com/uciml/breast-cancer-wisconsin-data/data 10 | 11 | **2. Data Set Information:** 12 | 13 | Features are computed from a digitized image of a fine needle aspirate (FNA) of a breast 14 | mass. They describe characteristics of the cell nuclei present in the image. n the 3-dimensional 15 | space is that described in: [K. P. Bennett and O. L. Mangasarian: "Robust Linear Programming 16 | Discrimination of Two Linearly Inseparable Sets", Optimization Methods and Software 1, 1992, 17 | 23-34]. 18 | 19 | **3. Number of Instances: 569** 20 | 21 | **4. Number of Attributes: 33** 22 | 23 | **5. Attribute Information:** 24 | 25 | Target 26 | - Diagnosis (Benign, Malignant) - Categorical 27 | 28 | Predictors - Numeric 29 | - Radius (mean of distances from center to points on the perimeter) 30 | - Texture (standard deviation of gray-scale values) 31 | - Perimeter 32 | - Area 33 | - Smoothness (local variation in radius lengths) 34 | - Compactness (perimeter^2 / area - 1.0) 35 | - Concavity (severity of concave portions of the contour) 36 | - Concave points (number of concave portions of the contour) 37 | - Symmetry 38 | - Fractal dimension ("coastline approximation" - 1) 39 | 40 | https://www.kaggle.com/uciml/breast-cancer-wisconsin-data/data 41 | 42 | 43 | ### Dataset - 2 : 44 | 45 | **1. Titanic Data** 46 | 47 | https://www.kaggle.com/c/titanic/data 48 | 49 | **2. Data Set Information:** 50 | 51 | The sinking of the RMS Titanic is one of the most infamous shipwrecks in history. 52 | On April 15, 1912, during her maiden voyage, the Titanic sank after colliding with 53 | an iceberg, killing 1502 out of 2224 passengers and crew. This sensational tragedy 54 | shocked the international community and led to better safety regulations for ships. 55 | 56 | One of the reasons that the shipwreck led to such loss of life was that there were not 57 | enough lifeboats for the passengers and crew. Although there was some element of luck 58 | involved in surviving the sinking, some groups of people were more likely to survive than 59 | others, such as women, children, and the upper-class. 60 | 61 | We want to study what are characteristics of those who survived and who didn't. 62 | 63 | 64 | **3. Number of Instances: 891** 65 | 66 | **4. Number of Attributes: 12** 67 | 68 | **5. Attribute Information:** 69 | 70 | Target 71 | - Survived (Survived, Deceased) - Categorical 72 | 73 | Predictors 74 | - PassengerId PassengerId 75 | - Pclass Ticket class 76 | - Name Name 77 | - Sex Sex 78 | - Age Age 79 | - SibSp # of siblings / spouses aboard the Titanic 80 | - Parch # of parents / children aboard the Titanic 81 | - Ticket Ticket number 82 | - Fare Passenger fare 83 | - Cabin Cabin number 84 | - Embarked Port of Embarkation 85 | 86 | 87 | https://www.kaggle.com/c/titanic/data 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /Supervised_Learning/Naive-Bayes/Readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Naive Bayes Algorithm 4 | 5 | ### Dataset - 1 : 6 | 7 | **1. Pima Indian Diabetes Data** 8 | 9 | https://www.kaggle.com/uciml/pima-indians-diabetes-database/data 10 | 11 | **2. Data Set Information:** 12 | 13 | This dataset is originally from the National Institute of Diabetes and Digestive and Kidney 14 | Diseases. The objective of the dataset is to diagnostically predict whether or not a patient 15 | has diabetes, based on certain diagnostic measurements included in the dataset. Several constraints 16 | were placed on the selection of these instances from a larger database. In particular, all patients 17 | here are females at least 21 years old of Pima Indian heritage. 18 | The datasets consists of several medical predictor variables and one target variable, Outcome. 19 | Predictor variables includes the number of pregnancies the patient has had, their BMI, insulin level, 20 | age, and so on. 21 | 22 | **3. Number of Instances: 768** 23 | 24 | **4. Number of Attributes: 9** 25 | 26 | **5. Attribute Information:** 27 | 28 | 29 | 30 | Target 31 | - Outcome Class variable (0 or 1) - Categorical 32 | 33 | Predictors - Numeric 34 | - Pregnancies - Number of times pregnant 35 | - Glucose - Plasma glucose concentration a 2 hours in an oral glucose tolerance test 36 | - BloodPressure - Diastolic blood pressure (mm Hg) 37 | - SkinThickness - Triceps skin fold thickness (mm) 38 | - Insulin - 2-Hour serum insulin (mu U/ml) Numeric 39 | - BMI - Body mass index (weight in kg/(height in m)^2) 40 | - DiabetesPedigreeFunction - Diabetes pedigree function 41 | - Age - Age (years) 42 | 43 | https://www.kaggle.com/uciml/pima-indians-diabetes-database/data 44 | 45 | 46 | ### Dataset - 2 : 47 | 48 | **1. Census Data** 49 | 50 | https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data 51 | 52 | **2. Data Set Information:** 53 | 54 | Extraction of this data set was done by Mr. Barry Becker from the 1994 Census database. The 55 | data contains various attributes of a person like age, workclass, education, marital-status, 56 | occupation,race, sex, native-country to name a few. 57 | 58 | Prediction task is to determine whether a person makes over 50K a year. 59 | 60 | 61 | **3. Number of Instances: 32561** 62 | 63 | **4. Number of Attributes: 15** 64 | 65 | **5. Attribute Information:** 66 | 67 | Target 68 | - Income (>50K, <=50K) - Categorical 69 | 70 | Predictors 71 | - age - Age (years) - Numeric 72 | - workclass - Working class - Categorical 73 | - fnlwgt - final weight - Numeric 74 | - education - education - Categorical 75 | - education-num - education number - Numeric 76 | - marital-status - marital status - Categorical 77 | - occupation - occupation - Categorical 78 | - relationship - relationship - Categorical 79 | - race - race - Categorical 80 | - sex - sex - Categorical 81 | - capital-gain - capital gain - Numeric 82 | - capital-loss - capital loss - Numeric 83 | - hours-per-week - hours work per week - Numeric 84 | - native-country - native country - Categorical 85 | 86 | 87 | https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /Supervised_Learning/KNN/Readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## K Nearest Neighbours Algorithm 4 | 5 | ### Dataset - 1 : 6 | 7 | **1. Iris Data** 8 | 9 | https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data 10 | 11 | **2. Data Set Information:** 12 | 13 | This is perhaps the best known database to be found in the pattern recognition literature. 14 | The data set contains 3 classes of 50 instances each, where each class refers to a type of 15 | iris plant. One class is linearly separable from the other 2; the latter are NOT linearly 16 | separable from each other. 17 | 18 | The goal is to find the class of the iris plant given its sepal and petal length & width. 19 | 20 | **3. Number of Instances: 150** 21 | 22 | **4. Number of Attributes: 5** 23 | 24 | **5. Attribute Information:** 25 | 26 | 27 | 28 | Target 29 | - class (Setosa, Versicolour, Virginica) - categorical 30 | 31 | Predictors - numeric 32 | - sepal length in cm 33 | - sepal width in cm 34 | - petal length in cm 35 | - petal width in cm 36 | 37 | https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.names 38 | 39 | 40 | ### Dataset - 2 : 41 | 42 | **1. Chronic Kidney Disease Data** 43 | 44 | https://archive.ics.uci.edu/ml/datasets/Chronic_Kidney_Disease 45 | 46 | **2. Data Set Information:** 47 | 48 | The data is provided by Apollo Hospitals, Managiri, Madurai Main Road, Karaikudi, Tamilnadu. Various 49 | details of a patient like age in years, blood pressure, hypertension, diabetes condition data is 50 | available for analysis. 51 | 52 | The classification goal is to predict if the patient will have kidney disease (yes/no). 53 | 54 | 55 | **3. Number of Instances: 400** 56 | 57 | **4. Number of Attributes: 25** 58 | 59 | **5. Attribute Information:** 60 | 61 | Target 62 | - class - does patient has cronic kidney disease or not (binary: 'ckd' 'notckd') - categorical 63 | 64 | Predictors 65 | - age - age - numeric 66 | - bp - blood pressure - numeric 67 | - sg - specific gravity - categorical 68 | - al - albumin - categorical 69 | - su - sugar - categorical 70 | - rbc - red blood cells - categorical 71 | - pc - pus cell - categorical 72 | - pcc - pus cell clumps - categorical 73 | - ba - bacteria - categorical 74 | - bgr - blood glucose random - numeric 75 | - bu - blood urea - numeric 76 | - sc - serum creatinine - numeric 77 | - sod - sodium - numeric 78 | - pot - potassium - numeric 79 | - hemo- hemoglobin - numeric 80 | - pcv - packed cell volume - numeric 81 | - wc - white blood cell count - numeric 82 | - rc - red blood cell count - numeric 83 | - htn - hypertension - categorical 84 | - dm - diabetes mellitus - categorical 85 | - cad - coronary artery disease - categorical 86 | - appet - appetite - categorical 87 | - pe - pedal edema - categorical 88 | - ane - anemia - categorical 89 | 90 | https://archive.ics.uci.edu/ml/datasets/Chronic_Kidney_Disease 91 | -------------------------------------------------------------------------------- /Supervised_Learning/Random Forest/Sample_RF_Example_breast.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.ensemble import RandomForestClassifier 6 | from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, roc_auc_score 7 | 8 | # Constants 9 | TEST_SIZE = 0.3 10 | ESTIMATORS = 100 11 | FEATURES_COUNT = 15 12 | 13 | 14 | def load_data(): 15 | return pd.read_csv('Data/breast_cancer_data.csv', sep=',', header=0) 16 | 17 | 18 | def explore_data(data): 19 | print("Dataset No. of Rows: ", data.shape[0]) 20 | print("Dataset No. of Columns: ", data.shape[1]) 21 | print("Dataset first few rows:\n ") 22 | print(data.head(2)) 23 | print("Dataset info:\n ") 24 | print(data.info()) 25 | print(data.describe(include='all')) 26 | print("Sum of NULL values in each column. ") 27 | print(data.isnull().sum()) 28 | 29 | 30 | def preprocess_data(data): 31 | data.drop(["id", "Unnamed: 32"], axis=1, inplace=True) 32 | data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0}) 33 | return data 34 | 35 | 36 | def train_model(X_train, y_train, estimator_count): 37 | model = RandomForestClassifier(n_estimators=estimator_count) 38 | model.fit(X_train, y_train) 39 | return model 40 | 41 | 42 | def evaluate_model(model, X_test, y_test): 43 | y_pred = model.predict(X_test) 44 | y_pred_score = model.predict_proba(X_test) 45 | print("Classification Report: ") 46 | print(classification_report(y_test, y_pred)) 47 | print("\n") 48 | print("Accuracy : ", accuracy_score(y_test, y_pred) * 100) 49 | print("\n") 50 | print("ROC_AUC : ", roc_auc_score(y_test, y_pred_score[:, 1]) * 100) 51 | 52 | 53 | def heatmap(conf_matrix, class_names): 54 | df_cm = pd.DataFrame(conf_matrix, index=class_names, columns=class_names) 55 | plt.figure(figsize=(5, 5)) 56 | hm = sns.heatmap(df_cm, cbar=False, annot=True, square=True, fmt='d', annot_kws={'size': 20}, 57 | yticklabels=df_cm.columns, xticklabels=df_cm.columns) 58 | hm.yaxis.set_ticklabels(hm.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=20) 59 | hm.xaxis.set_ticklabels(hm.xaxis.get_ticklabels(), rotation=0, ha='right', fontsize=20) 60 | plt.ylabel('True label', fontsize=20) 61 | plt.xlabel('Predicted label', fontsize=20) 62 | plt.tight_layout() 63 | plt.show() 64 | 65 | 66 | # Loading the data 67 | data = load_data() 68 | explore_data(data) 69 | 70 | # Preprocessing the data 71 | data = preprocess_data(data) 72 | X = data.values[:, 1:] 73 | Y = data.values[:, 0] 74 | X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=TEST_SIZE, random_state=100) 75 | 76 | # Training models and evaluating 77 | random_forest_all = train_model(X_train, y_train, ESTIMATORS) 78 | evaluate_model(random_forest_all, X_test, y_test) 79 | 80 | newX_train = X_train[:, random_forest_all.feature_importances_.argsort()[::-1][:FEATURES_COUNT]] 81 | newX_test = X_test[:, random_forest_all.feature_importances_.argsort()[::-1][:FEATURES_COUNT]] 82 | random_forest_k = train_model(newX_train, y_train, ESTIMATORS) 83 | evaluate_model(random_forest_k, newX_test, y_test) 84 | 85 | heatmap(confusion_matrix(y_test, random_forest_all.predict(X_test)), data['diagnosis'].unique()) 86 | heatmap(confusion_matrix(y_test, random_forest_k.predict(newX_test)), data['diagnosis'].unique()) -------------------------------------------------------------------------------- /Python/05-Pandas/Class Ex/Class-Ex-Lecture5.py: -------------------------------------------------------------------------------- 1 | # ================================================================= 2 | # Class_Ex1: 3 | # From the data table above, create an index to return all rows for 4 | # which the phylum name ends in "bacteria" and the value is greater than 1000. 5 | # ---------------------------------------------------------------- 6 | import pandas as pd 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | 10 | data = pd.DataFrame({'value':[632, 1638, 569, 115, 433, 1130, 754, 555], 11 | 'patient':[1, 1, 1, 1, 2, 2, 2, 2], 12 | 'phylum':['Firmicutes', 'Proteobacteria', 'Actinobacteria', 13 | 'Bacteroidetes', 'Firmicutes', 'Proteobacteria', 'Actinobacteria', 'Bacteroidetes']}) 14 | 15 | 16 | 17 | 18 | print('#',50*"-") 19 | # ================================================================= 20 | # Class_Ex2: 21 | # Create a treatment column and add it to DataFrame that has 6 entries 22 | # which the first 4 are zero and the 5 and 6 element are 1 the rest are NAN 23 | # ---------------------------------------------------------------- 24 | 25 | 26 | 27 | print('#',50*"-") 28 | # ================================================================= 29 | # Class_Ex3: 30 | # Create a month column and add it to DataFrame. Just for month Jan. 31 | # ---------------------------------------------------------------- 32 | 33 | 34 | 35 | print('#',50*"-") 36 | # ================================================================= 37 | # Class_Ex4: 38 | # Drop the month column. 39 | # ---------------------------------------------------------------- 40 | 41 | 42 | print('#',50*"-") 43 | # ================================================================= 44 | # Class_Ex5: 45 | # Create a numpy array that has all the values of DataFrame. 46 | # ---------------------------------------------------------------- 47 | 48 | print('#',50*"-") 49 | 50 | # ================================================================= 51 | # Class_Ex6: 52 | # Read baseball data into a DataFrame and check the first and last 53 | # 10 rows 54 | # ---------------------------------------------------------------- 55 | 56 | 57 | print('#',50*"-") 58 | # ================================================================= 59 | # Class_Ex7: 60 | # Create a unique index by specifying the id column as the index 61 | # Check the new df and verify it is unique 62 | # ---------------------------------------------------------------- 63 | 64 | 65 | print('#',50*"-") 66 | 67 | # ================================================================= 68 | # Class_Ex8: 69 | #Notice that the id index is not sequential. Say we wanted to populate 70 | # the table with every id value. 71 | # Hint: We could specify and index that is a sequence from the first 72 | # to the last id numbers in the database, and Pandas would fill in the 73 | # missing data with NaN values: 74 | # ---------------------------------------------------------------- 75 | 76 | print('#',50*"-") 77 | 78 | # ================================================================= 79 | # Class_Ex9: 80 | # Fill the missing values 81 | # ---------------------------------------------------------------- 82 | 83 | 84 | print('#',50*"-") 85 | 86 | # ================================================================= 87 | # Class_Ex10: 88 | # Find the shape of the new df 89 | # ---------------------------------------------------------------- 90 | 91 | 92 | print('#',50*"-") 93 | 94 | # ================================================================= 95 | # Class_Ex11: 96 | # Drop row 89525 and 89526 97 | # ---------------------------------------------------------------- 98 | 99 | print('#',50*"-") 100 | 101 | 102 | # ================================================================= 103 | # Class_Ex12: 104 | # Sor the df ascending and not descending 105 | # ---------------------------------------------------------------- 106 | 107 | print('#',50*"-") 108 | 109 | -------------------------------------------------------------------------------- /Supervised_Learning/Logitic_Regression/Readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Logistic Regression Algorithm 4 | 5 | ### Dataset - 1 : 6 | 7 | **1. Wine Quality Data** 8 | 9 | http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv 10 | 11 | **2. Data Set Information:** 12 | 13 | The datasets is related to red variant of the Portuguese "Vinho Verde" wine. 14 | For more details, consult: http://www.vinhoverde.pt/en/ or the reference [Cortez et al., 2009]. 15 | Due to privacy and logistic issues, only physicochemical (inputs) and sensory (the output) variables 16 | are available (e.g. there is no data about grape types, wine brand, wine selling price, etc.). 17 | 18 | The inputs include objective tests (e.g. PH values) and the output is based on sensory data 19 | (median of at least 3 evaluations made by wine experts). The wine quality is graded between 20 | 0 (very bad) and 10 (very excellent) 21 | 22 | **3. Number of Instances: 1599** 23 | 24 | **4. Number of Attributes: 12** 25 | 26 | **5. Attribute Information:** 27 | 28 | 29 | 30 | Target 31 | - quality (score between 0 and 10) - categorical 32 | 33 | Predictors - numeric 34 | - fixed acidity 35 | - volatile acidity 36 | - citric acid 37 | - residual sugar 38 | - chlorides 39 | - free sulfur dioxide 40 | - total sulfur dioxide 41 | - density 42 | - pH 43 | - sulphates 44 | - alcohol 45 | 46 | http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality.names 47 | 48 | 49 | ### Dataset - 2 : 50 | 51 | **1. Bank Marketing Data** 52 | 53 | https://archive.ics.uci.edu/ml/datasets/bank+marketing 54 | 55 | **2. Data Set Information:** 56 | 57 | The data is related with direct marketing campaigns of a Portuguese banking institution. The 58 | marketing campaigns were based on phone calls. Often, more than one contact to the same client 59 | was required, in order to access if the product (bank term deposit) would be ('yes') or not ('no') 60 | subscribed. 61 | 62 | The classification goal is to predict if the client will subscribe (yes/no) a term deposit (variable y). 63 | 64 | 65 | **3. Number of Instances: 41188** 66 | 67 | **4. Number of Attributes: 21** 68 | 69 | **5. Attribute Information:** 70 | 71 | Target 72 | - y - has the client subscribed a term deposit? (binary: 'yes','no') - categorical 73 | 74 | Predictors 75 | - age - age in years (numeric) 76 | - job : type of job (categorical) 77 | - marital : marital status (categorical) 78 | - education (categorical) 79 | - default: has credit in default? (categorical) 80 | - housing: has housing loan? (categorical) 81 | - loan: has personal loan? (categorical) 82 | - contact: contact communication type (categorical) 83 | - month: last contact month of year (categorical) 84 | - day_of_week: last contact day of the week (categorical') 85 | 86 | - duration: last contact duration, in seconds (numeric). 87 | Important note: this attribute highly effects the output target g., if duration=0 88 | then y='no'). Yet, the duration is not known before a call is performed. Also, after 89 | the end of the call is obviously known. 90 | Thus, this input should only be included for benchmark purposes and should be 91 | discarded if the intention s to have a realistic predictive model 92 | 93 | - campaign: number of contacts performed during this campaign and for this client (numeric) 94 | 95 | - pdays: number of days that passed by after the client was last contacted from a previous 96 | campaign (numeric) 97 | 98 | - previous: number of contacts performed before this campaign and for this client (numeric) 99 | - poutcome: outcome of the previous marketing campaign (categorical) 100 | - emp.var.rate: employment variation rate - quarterly indicator (numeric) 101 | - cons.price.idx: consumer price index - monthly indicator (numeric) 102 | - cons.conf.idx: consumer confidence index - monthly indicator (numeric) 103 | - euribor3m: euribor 3 month rate - daily indicator (numeric) 104 | - nr.employed: number of employees - quarterly indicator (numeric) 105 | 106 | https://archive.ics.uci.edu/ml/datasets/bank+marketing 107 | -------------------------------------------------------------------------------- /Supervised_Learning/SVM/Sample_KNN_Example_mushrom.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.preprocessing import LabelEncoder 4 | from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve, roc_auc_score, classification_report 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.svm import SVC 7 | import seaborn as sns 8 | import matplotlib.pyplot as plt 9 | import warnings 10 | 11 | warnings.filterwarnings("ignore") 12 | 13 | 14 | # Function to load and prepare the data 15 | def load_and_prepare_data(filename): 16 | mushroom_data = pd.read_csv(filename, sep=',', header=None) 17 | mushroom_data.columns = ['class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor', 'gill-attachment', 18 | 'gill-spacing', 'gill-size', 'gill-color', 'stalk-shape', 'stalk-root', 19 | 'stalk-surf-abv-ring', 'stalk-surf-bel-ring', 'stalk-color-abv-ring', 20 | 'stalk-color-bel-ring', 21 | 'veil-type', 'veil-color', 'ring-number', 'ring-type', 22 | 'spore-print-color', 'population', 'habitat'] 23 | mushroom_data.replace('?', np.NaN, inplace=True) 24 | mushroom_data = mushroom_data.apply(lambda x: x.fillna(x.value_counts().index[0])) 25 | return mushroom_data 26 | 27 | 28 | # Function to convert data 29 | def convert_data(mushroom_data): 30 | X_data = pd.get_dummies(mushroom_data.iloc[:, 1:]) 31 | X = X_data.values 32 | Y_data = mushroom_data.values[:, 0] 33 | class_le = LabelEncoder() 34 | y = class_le.fit_transform(Y_data) 35 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=10) 36 | return X_data, X_train, X_test, y_train, y_test 37 | 38 | 39 | # Function for SVM classification and prediction 40 | def svm_classification(X_train, y_train): 41 | clf = SVC(kernel="linear") 42 | clf.fit(X_train, y_train) 43 | return clf 44 | 45 | 46 | # Function to visualize important features 47 | def coef_values(coef, names): 48 | imp = coef 49 | imp, names = zip(*sorted(zip(imp.ravel(), names))) 50 | imp_pos_10 = imp[-10:] 51 | names_pos_10 = names[-10:] 52 | imp_neg_10 = imp[:10] 53 | names_neg_10 = names[:10] 54 | imp_top_20 = imp_neg_10 + imp_pos_10 55 | names_top_20 = names_neg_10 + names_pos_10 56 | plt.barh(range(len(names_top_20)), imp_top_20, align='center') 57 | plt.yticks(range(len(names_top_20)), names_top_20) 58 | plt.show() 59 | 60 | 61 | # Function to visualize confusion matrix 62 | def visualize_confusion_matrix(y_test, y_pred, df_cm): 63 | plt.figure(figsize=(5, 5)) 64 | sns.heatmap(df_cm, cbar=False, annot=True, square=True, fmt='d', 65 | yticklabels=df_cm.columns, xticklabels=df_cm.columns) 66 | plt.ylabel('True label', fontsize=20) 67 | plt.xlabel('Predicted label', fontsize=20) 68 | plt.tight_layout() 69 | plt.show() 70 | # Function for visualising ROC curve 71 | def roc_chart(y_test, y_pred_proba): 72 | fpr, tpr, _ = roc_curve(y_test, y_pred_proba) 73 | auc = roc_auc_score(y_test, y_pred_proba) 74 | plt.figure() 75 | lw = 2 76 | plt.plot(fpr, tpr, color='darkorange', 77 | lw=lw, label='ROC curve (area = %0.2f)' % auc) 78 | plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') 79 | plt.xlim([0.0, 1.0]) 80 | plt.ylim([0.0, 1.05]) 81 | plt.xlabel('False Positive Rate') 82 | plt.ylabel('True Positive Rate') 83 | plt.title('Receiver operating characteristic example') 84 | plt.legend(loc="lower right") 85 | plt.show() 86 | 87 | 88 | # Main function 89 | def main(): 90 | mushroom_data = load_and_prepare_data("Data/agaricus-lepiota.data.csv") 91 | X_data, X_train, X_test, y_train, y_test = convert_data(mushroom_data) 92 | clf = svm_classification(X_train, y_train) 93 | y_pred = clf.predict(X_test) 94 | 95 | print("Classification Report: ") 96 | print(classification_report(y_test, y_pred)) 97 | 98 | print("Accuracy : ", accuracy_score(y_test, y_pred) * 100) 99 | 100 | coef_values(clf.coef_, X_data.columns) 101 | 102 | conf_matrix = confusion_matrix(y_test, y_pred) 103 | class_names = mushroom_data['class'].unique() 104 | df_cm = pd.DataFrame(conf_matrix, index=class_names, columns=class_names) 105 | visualize_confusion_matrix(y_test, y_pred, df_cm) 106 | 107 | y_pred_proba = clf.decision_function(X_test) 108 | roc_chart(y_test, y_pred_proba) 109 | 110 | 111 | if __name__ == "__main__": 112 | main() -------------------------------------------------------------------------------- /Python/04-Seaborn/Lecture Code/Lecture4_3_Code.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | sns.set(style="ticks") 5 | 6 | df = sns.load_dataset("anscombe") 7 | 8 | sns.lmplot(x="x", y="y", col="dataset", hue="dataset", data=df, 9 | col_wrap=2, ci=None, palette="muted", 10 | scatter_kws={"s": 50, "alpha": 1}) 11 | 12 | plt.show() 13 | print('#',50*"-") 14 | # ----------------------- 15 | sns.set(style="white", palette="muted", color_codes=True) 16 | rs = np.random.RandomState(10) 17 | 18 | f, axes = plt.subplots(2, 2, figsize=(7, 7), sharex=True) 19 | sns.despine(left=True) 20 | 21 | d = rs.normal(size=100) 22 | 23 | sns.distplot(d, kde=False, color="b", ax=axes[0, 0]) 24 | 25 | sns.distplot(d, hist=False, rug=True, color="r", ax=axes[0, 1]) 26 | 27 | sns.distplot(d, hist=False, color="g", kde_kws={"shade": True}, ax=axes[1, 0]) 28 | 29 | sns.distplot(d, color="m", ax=axes[1, 1]) 30 | plt.setp(axes, yticks=[]) 31 | 32 | plt.tight_layout() 33 | plt.show() 34 | 35 | print('#',50*"-") 36 | # ----------------------- 37 | sns.set() 38 | 39 | flights_long = sns.load_dataset("flights") 40 | 41 | flights = flights_long.pivot("month", "year", "passengers") 42 | 43 | 44 | f, ax = plt.subplots(figsize=(9, 6)) 45 | 46 | sns.heatmap(flights, annot=True, fmt="d", linewidths=.5, ax=ax) 47 | 48 | plt.show() 49 | print('#',50*"-") 50 | # ----------------------- 51 | sns.set(style="whitegrid", palette="pastel", color_codes=True) 52 | 53 | tips = sns.load_dataset("tips") 54 | 55 | sns.violinplot(x="day", y="total_bill", hue="smoker", 56 | split=True, inner="quart", 57 | palette={"Yes": "y", "No": "b"}, 58 | data=tips) 59 | 60 | sns.despine(left=True) 61 | plt.show() 62 | print('#',50*"-") 63 | # ----------------------- 64 | sns.set(style="darkgrid") 65 | 66 | df = sns.load_dataset("titanic") 67 | 68 | pal = dict(male="#6495ED", female="#F08080") 69 | 70 | g = sns.lmplot(x="age", y="survived", col="sex", hue="sex", data=df, 71 | palette=pal, y_jitter=.02, logistic=True) 72 | 73 | g.set(xlim=(0, 80), ylim=(-.05, 1.05)) 74 | 75 | plt.show() 76 | print('#',50*"-") 77 | # ----------------------- 78 | import pandas as pd 79 | sns.set(style="whitegrid", palette="muted") 80 | 81 | iris = sns.load_dataset("iris") 82 | 83 | iris = pd.melt(iris, "species", var_name="measurement") 84 | 85 | sns.swarmplot(x="measurement", y="value", hue="species", 86 | palette=["r", "c", "y"], data=iris) 87 | plt.show() 88 | print('#',50*"-") 89 | # ----------------------- 90 | sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)}) 91 | rs = np.random.RandomState(1979) 92 | x = rs.randn(500) 93 | g = np.tile(list("ABCDEFGHIJ"), 50) 94 | df = pd.DataFrame(dict(x=x, g=g)) 95 | m = df.g.map(ord) 96 | df["x"] += m 97 | pal = sns.cubehelix_palette(10, rot=-.25, light=.7) 98 | g = sns.FacetGrid(df, row="g", hue="g", aspect=15, palette=pal) 99 | g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2) 100 | g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2) 101 | g.map(plt.axhline, y=0, lw=2, clip_on=False) 102 | def label(x, color, label): 103 | ax = plt.gca() 104 | ax.text(0, .2, label, fontweight="bold", color=color, 105 | ha="left", va="center", transform=ax.transAxes) 106 | g.map(label, "x") 107 | g.fig.subplots_adjust(hspace=-.25) 108 | g.set_titles("") 109 | g.set(yticks=[]) 110 | g.despine(bottom=True, left=True) 111 | plt.show() 112 | print('#',50*"-") 113 | # ----------------------- 114 | sns.set(style="ticks") 115 | 116 | df = sns.load_dataset("iris") 117 | sns.pairplot(df, hue="species") 118 | plt.show() 119 | print('#',50*"-") 120 | # ----------------------- 121 | sns.set(style="whitegrid") 122 | 123 | rs = np.random.RandomState(7) 124 | x = rs.normal(2, 1, 75) 125 | y = 2 + 1.5 * x + rs.normal(0, 2, 75) 126 | 127 | sns.residplot(x, y, lowess=True, color="g") 128 | plt.show() 129 | print('#',50*"-") 130 | # ----------------------- 131 | sns.set(style="white") 132 | 133 | rs = np.random.RandomState(5) 134 | mean = [0, 0] 135 | cov = [(1, .5), (.5, 1)] 136 | x1, x2 = rs.multivariate_normal(mean, cov, 500).T 137 | 138 | x1 = pd.Series(x1, name="$X_1$") 139 | x2 = pd.Series(x2, name="$X_2$") 140 | 141 | 142 | g = sns.jointplot(x1, x2, kind="kde", height=7, space=0) 143 | plt.show() 144 | print('#',50*"-") 145 | # ----------------------- 146 | sns.set(style="whitegrid") 147 | 148 | diamonds = sns.load_dataset("diamonds") 149 | clarity_ranking = ["I1", "SI2", "SI1", "VS2", "VS1", "VVS2", "VVS1", "IF"] 150 | 151 | sns.boxplot(x="clarity", y="carat", 152 | color="b", order=clarity_ranking, 153 | data=diamonds) 154 | plt.show() 155 | print('#',50*"-") 156 | # ----------------------- -------------------------------------------------------------------------------- /Python/06-Scipy/Class Ex/Class-Ex-Lecture6.py: -------------------------------------------------------------------------------- 1 | # ================================================================= 2 | # Class_Ex1: 3 | # We will do some manipulations on numpy arrays by importing some 4 | # images of a racoon. 5 | # scipy provides a 2D array of this image 6 | # Plot the grey scale image of the racoon by using matplotlib 7 | # ---------------------------------------------------------------- 8 | from scipy import misc 9 | face = misc.face() ## Modify the face function 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | print('#',50*"-") 19 | # ================================================================= 20 | # Class_Ex2: 21 | # If still the face is gray choose the color map function and make it 22 | # gray 23 | # ---------------------------------------------------------------- 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | print('#',50*"-") 35 | # ================================================================= 36 | # Class_Ex3: 37 | # Crop the image (an array of the image) with a narrower centering 38 | # Plot the crop image again. 39 | # ---------------------------------------------------------------- 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | print('#',50*"-") 49 | # ================================================================= 50 | # Class_Ex4: 51 | # Take the racoon face out and mask everything with black color. 52 | # ---------------------------------------------------------------- 53 | 54 | 55 | 56 | 57 | 58 | 59 | print('#',50*"-") 60 | # ================================================================= 61 | # Class_Ex5: 62 | # For linear equation systems on the matrix form Ax=b where A is 63 | # a matrix and x,b are vectors use scipy to solve the for x. 64 | # Create any matrix A and B (Size matters) 65 | # ---------------------------------------------------------------- 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | print('#',50*"-") 76 | # ================================================================= 77 | # Class_Ex6: 78 | # Calculate eigenvalue of matrix A. (create any matrix and check your 79 | # results.) 80 | # ---------------------------------------------------------------- 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | print('#',50*"-") 91 | # ================================================================= 92 | # Class_Ex7: 93 | # Sparse matrices are often useful in numerical simulations dealing 94 | # with large datasets 95 | # Convert sparse matrix to dense and vice versa 96 | # ---------------------------------------------------------------- 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | print('#',50*"-") 107 | 108 | # ================================================================= 109 | # Class_Ex8: 110 | # Create any polynomial to order of 3 and write python function for it 111 | # then use scipy to minimize the function (use Scipy) 112 | # ---------------------------------------------------------------- 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | print('#',50*"-") 125 | # ================================================================= 126 | # Class_Ex9: 127 | # use the brent or fminbound functions for optimization and try again. 128 | # (use Scipy) 129 | # ---------------------------------------------------------------- 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | print('#',50*"-") 140 | # ================================================================= 141 | # Class_Ex10: 142 | # Find a solution to a function. f(x)=0 use the fsolve (use Scipy) 143 | # ---------------------------------------------------------------- 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | print('#',50*"-") 156 | # ================================================================= 157 | # Class_Ex11: 158 | # Create a sine or cosine function with a big step size. Use scipy to 159 | # interpolate between each data points. Use different interpolations. 160 | # plot the results (use Scipy) 161 | # ---------------------------------------------------------------- 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | print('#',50*"-") 173 | # ================================================================= 174 | # Class_Ex12: 175 | # Use scipy statistics methods on randomly created array (use Scipy) 176 | # PDF, CDF (CUMsum), Mean, Std, Histogram 177 | # ---------------------------------------------------------------- 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | print('#',50*"-") 187 | # ================================================================= 188 | # Class_Ex13: 189 | # USe hypothesise testing if two datasets of (independent) random varibales 190 | # comes from the same distribution (use Scipy) 191 | # Calculate p values. 192 | # ---------------------------------------------------------------- 193 | 194 | 195 | 196 | 197 | 198 | 199 | print('#',50*"-") 200 | # ---------------------------------------------------------------- -------------------------------------------------------------------------------- /Supervised_Learning/KNN/Data/iris.data.csv: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 4.7,3.2,1.3,0.2,Iris-setosa 4 | 4.6,3.1,1.5,0.2,Iris-setosa 5 | 5.0,3.6,1.4,0.2,Iris-setosa 6 | 5.4,3.9,1.7,0.4,Iris-setosa 7 | 4.6,3.4,1.4,0.3,Iris-setosa 8 | 5.0,3.4,1.5,0.2,Iris-setosa 9 | 4.4,2.9,1.4,0.2,Iris-setosa 10 | 4.9,3.1,1.5,0.1,Iris-setosa 11 | 5.4,3.7,1.5,0.2,Iris-setosa 12 | 4.8,3.4,1.6,0.2,Iris-setosa 13 | 4.8,3.0,1.4,0.1,Iris-setosa 14 | 4.3,3.0,1.1,0.1,Iris-setosa 15 | 5.8,4.0,1.2,0.2,Iris-setosa 16 | 5.7,4.4,1.5,0.4,Iris-setosa 17 | 5.4,3.9,1.3,0.4,Iris-setosa 18 | 5.1,3.5,1.4,0.3,Iris-setosa 19 | 5.7,3.8,1.7,0.3,Iris-setosa 20 | 5.1,3.8,1.5,0.3,Iris-setosa 21 | 5.4,3.4,1.7,0.2,Iris-setosa 22 | 5.1,3.7,1.5,0.4,Iris-setosa 23 | 4.6,3.6,1.0,0.2,Iris-setosa 24 | 5.1,3.3,1.7,0.5,Iris-setosa 25 | 4.8,3.4,1.9,0.2,Iris-setosa 26 | 5.0,3.0,1.6,0.2,Iris-setosa 27 | 5.0,3.4,1.6,0.4,Iris-setosa 28 | 5.2,3.5,1.5,0.2,Iris-setosa 29 | 5.2,3.4,1.4,0.2,Iris-setosa 30 | 4.7,3.2,1.6,0.2,Iris-setosa 31 | 4.8,3.1,1.6,0.2,Iris-setosa 32 | 5.4,3.4,1.5,0.4,Iris-setosa 33 | 5.2,4.1,1.5,0.1,Iris-setosa 34 | 5.5,4.2,1.4,0.2,Iris-setosa 35 | 4.9,3.1,1.5,0.1,Iris-setosa 36 | 5.0,3.2,1.2,0.2,Iris-setosa 37 | 5.5,3.5,1.3,0.2,Iris-setosa 38 | 4.9,3.1,1.5,0.1,Iris-setosa 39 | 4.4,3.0,1.3,0.2,Iris-setosa 40 | 5.1,3.4,1.5,0.2,Iris-setosa 41 | 5.0,3.5,1.3,0.3,Iris-setosa 42 | 4.5,2.3,1.3,0.3,Iris-setosa 43 | 4.4,3.2,1.3,0.2,Iris-setosa 44 | 5.0,3.5,1.6,0.6,Iris-setosa 45 | 5.1,3.8,1.9,0.4,Iris-setosa 46 | 4.8,3.0,1.4,0.3,Iris-setosa 47 | 5.1,3.8,1.6,0.2,Iris-setosa 48 | 4.6,3.2,1.4,0.2,Iris-setosa 49 | 5.3,3.7,1.5,0.2,Iris-setosa 50 | 5.0,3.3,1.4,0.2,Iris-setosa 51 | 7.0,3.2,4.7,1.4,Iris-versicolor 52 | 6.4,3.2,4.5,1.5,Iris-versicolor 53 | 6.9,3.1,4.9,1.5,Iris-versicolor 54 | 5.5,2.3,4.0,1.3,Iris-versicolor 55 | 6.5,2.8,4.6,1.5,Iris-versicolor 56 | 5.7,2.8,4.5,1.3,Iris-versicolor 57 | 6.3,3.3,4.7,1.6,Iris-versicolor 58 | 4.9,2.4,3.3,1.0,Iris-versicolor 59 | 6.6,2.9,4.6,1.3,Iris-versicolor 60 | 5.2,2.7,3.9,1.4,Iris-versicolor 61 | 5.0,2.0,3.5,1.0,Iris-versicolor 62 | 5.9,3.0,4.2,1.5,Iris-versicolor 63 | 6.0,2.2,4.0,1.0,Iris-versicolor 64 | 6.1,2.9,4.7,1.4,Iris-versicolor 65 | 5.6,2.9,3.6,1.3,Iris-versicolor 66 | 6.7,3.1,4.4,1.4,Iris-versicolor 67 | 5.6,3.0,4.5,1.5,Iris-versicolor 68 | 5.8,2.7,4.1,1.0,Iris-versicolor 69 | 6.2,2.2,4.5,1.5,Iris-versicolor 70 | 5.6,2.5,3.9,1.1,Iris-versicolor 71 | 5.9,3.2,4.8,1.8,Iris-versicolor 72 | 6.1,2.8,4.0,1.3,Iris-versicolor 73 | 6.3,2.5,4.9,1.5,Iris-versicolor 74 | 6.1,2.8,4.7,1.2,Iris-versicolor 75 | 6.4,2.9,4.3,1.3,Iris-versicolor 76 | 6.6,3.0,4.4,1.4,Iris-versicolor 77 | 6.8,2.8,4.8,1.4,Iris-versicolor 78 | 6.7,3.0,5.0,1.7,Iris-versicolor 79 | 6.0,2.9,4.5,1.5,Iris-versicolor 80 | 5.7,2.6,3.5,1.0,Iris-versicolor 81 | 5.5,2.4,3.8,1.1,Iris-versicolor 82 | 5.5,2.4,3.7,1.0,Iris-versicolor 83 | 5.8,2.7,3.9,1.2,Iris-versicolor 84 | 6.0,2.7,5.1,1.6,Iris-versicolor 85 | 5.4,3.0,4.5,1.5,Iris-versicolor 86 | 6.0,3.4,4.5,1.6,Iris-versicolor 87 | 6.7,3.1,4.7,1.5,Iris-versicolor 88 | 6.3,2.3,4.4,1.3,Iris-versicolor 89 | 5.6,3.0,4.1,1.3,Iris-versicolor 90 | 5.5,2.5,4.0,1.3,Iris-versicolor 91 | 5.5,2.6,4.4,1.2,Iris-versicolor 92 | 6.1,3.0,4.6,1.4,Iris-versicolor 93 | 5.8,2.6,4.0,1.2,Iris-versicolor 94 | 5.0,2.3,3.3,1.0,Iris-versicolor 95 | 5.6,2.7,4.2,1.3,Iris-versicolor 96 | 5.7,3.0,4.2,1.2,Iris-versicolor 97 | 5.7,2.9,4.2,1.3,Iris-versicolor 98 | 6.2,2.9,4.3,1.3,Iris-versicolor 99 | 5.1,2.5,3.0,1.1,Iris-versicolor 100 | 5.7,2.8,4.1,1.3,Iris-versicolor 101 | 6.3,3.3,6.0,2.5,Iris-virginica 102 | 5.8,2.7,5.1,1.9,Iris-virginica 103 | 7.1,3.0,5.9,2.1,Iris-virginica 104 | 6.3,2.9,5.6,1.8,Iris-virginica 105 | 6.5,3.0,5.8,2.2,Iris-virginica 106 | 7.6,3.0,6.6,2.1,Iris-virginica 107 | 4.9,2.5,4.5,1.7,Iris-virginica 108 | 7.3,2.9,6.3,1.8,Iris-virginica 109 | 6.7,2.5,5.8,1.8,Iris-virginica 110 | 7.2,3.6,6.1,2.5,Iris-virginica 111 | 6.5,3.2,5.1,2.0,Iris-virginica 112 | 6.4,2.7,5.3,1.9,Iris-virginica 113 | 6.8,3.0,5.5,2.1,Iris-virginica 114 | 5.7,2.5,5.0,2.0,Iris-virginica 115 | 5.8,2.8,5.1,2.4,Iris-virginica 116 | 6.4,3.2,5.3,2.3,Iris-virginica 117 | 6.5,3.0,5.5,1.8,Iris-virginica 118 | 7.7,3.8,6.7,2.2,Iris-virginica 119 | 7.7,2.6,6.9,2.3,Iris-virginica 120 | 6.0,2.2,5.0,1.5,Iris-virginica 121 | 6.9,3.2,5.7,2.3,Iris-virginica 122 | 5.6,2.8,4.9,2.0,Iris-virginica 123 | 7.7,2.8,6.7,2.0,Iris-virginica 124 | 6.3,2.7,4.9,1.8,Iris-virginica 125 | 6.7,3.3,5.7,2.1,Iris-virginica 126 | 7.2,3.2,6.0,1.8,Iris-virginica 127 | 6.2,2.8,4.8,1.8,Iris-virginica 128 | 6.1,3.0,4.9,1.8,Iris-virginica 129 | 6.4,2.8,5.6,2.1,Iris-virginica 130 | 7.2,3.0,5.8,1.6,Iris-virginica 131 | 7.4,2.8,6.1,1.9,Iris-virginica 132 | 7.9,3.8,6.4,2.0,Iris-virginica 133 | 6.4,2.8,5.6,2.2,Iris-virginica 134 | 6.3,2.8,5.1,1.5,Iris-virginica 135 | 6.1,2.6,5.6,1.4,Iris-virginica 136 | 7.7,3.0,6.1,2.3,Iris-virginica 137 | 6.3,3.4,5.6,2.4,Iris-virginica 138 | 6.4,3.1,5.5,1.8,Iris-virginica 139 | 6.0,3.0,4.8,1.8,Iris-virginica 140 | 6.9,3.1,5.4,2.1,Iris-virginica 141 | 6.7,3.1,5.6,2.4,Iris-virginica 142 | 6.9,3.1,5.1,2.3,Iris-virginica 143 | 5.8,2.7,5.1,1.9,Iris-virginica 144 | 6.8,3.2,5.9,2.3,Iris-virginica 145 | 6.7,3.3,5.7,2.5,Iris-virginica 146 | 6.7,3.0,5.2,2.3,Iris-virginica 147 | 6.3,2.5,5.0,1.9,Iris-virginica 148 | 6.5,3.0,5.2,2.0,Iris-virginica 149 | 6.2,3.4,5.4,2.3,Iris-virginica 150 | 5.9,3.0,5.1,1.8,Iris-virginica 151 | 152 | -------------------------------------------------------------------------------- /Supervised_Learning/Decision_Tree/DT/Sample_DT_Example_balance.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import LabelEncoder 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.tree import DecisionTreeClassifier 5 | from sklearn.metrics import accuracy_score 6 | from sklearn.metrics import classification_report 7 | from sklearn.metrics import confusion_matrix 8 | import seaborn as sns 9 | import matplotlib.pyplot as plt 10 | from pydotplus import graph_from_dot_data 11 | from sklearn.tree import export_graphviz 12 | import webbrowser 13 | import warnings 14 | warnings.filterwarnings("ignore") 15 | # ------------------------------------------------------------------------------------------------------------------- 16 | import os 17 | os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz/bin/' 18 | # ------------------------------------------------------------------------------------------------------------------- 19 | data = pd.read_csv('Data/balance-scale.data.csv', sep=',', header=None) 20 | data.columns = ['Class_Name', 'Left-Weight', 'Left-Distance', 'Right-Weight', 'Right-Distance'] 21 | print("Dataset No. of Rows: ", data.shape[0]) 22 | print("Dataset No. of Columns: ", data.shape[1]) 23 | print("Dataset first few rows:\n ") 24 | print(data.head()) 25 | print("Dataset info:\n ") 26 | print(data.info()) 27 | print(data.describe(include='all')) 28 | 29 | 30 | X = data.values[:, 1:5] 31 | y = data.values[:, 0] 32 | # ------------------------------------------------------------------------------------------------------------------ 33 | class_le = LabelEncoder() 34 | y = class_le.fit_transform(y) 35 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100) 36 | # ------------------------------------------------------------------------------------------------------------------- 37 | clf_gini = DecisionTreeClassifier(criterion="gini", random_state=100, max_depth=3, min_samples_leaf=5) 38 | clf_gini.fit(X_train, y_train) 39 | # ------------------------------------------------------------------------------------------------------------------- 40 | clf_entropy = DecisionTreeClassifier(criterion="entropy", random_state=100, max_depth=3, min_samples_leaf=5) 41 | clf_entropy.fit(X_train, y_train) 42 | # ------------------------------------------------------------------------------------------------------------------- 43 | y_pred_gini = clf_gini.predict(X_test) 44 | y_pred_entropy = clf_entropy.predict(X_test) 45 | # ------------------------------------------------------------------------------------------------------------------- 46 | print("Results Using Gini Index: \n") 47 | print("Classification Report: ") 48 | print(classification_report(y_test,y_pred_gini)) 49 | print("Accuracy : ", accuracy_score(y_test, y_pred_gini) * 100) 50 | print("Results Using Entropy: \n") 51 | print("Classification Report: ") 52 | print(classification_report(y_test,y_pred_entropy)) 53 | print("Accuracy : ", accuracy_score(y_test, y_pred_entropy) * 100) 54 | # ------------------------------------------------------------------------------------------------------------------- 55 | conf_matrix = confusion_matrix(y_test, y_pred_gini) 56 | class_names = data.Class_Name.unique() 57 | df_cm = pd.DataFrame(conf_matrix, index=class_names, columns=class_names ) 58 | # ------------------------------------------------------------------------------------------------------------------- 59 | plt.figure(figsize=(5,5)) 60 | hm = sns.heatmap(df_cm, cbar=False, annot=True, square=True, fmt='d', annot_kws={'size': 20}, yticklabels=df_cm.columns, xticklabels=df_cm.columns) 61 | hm.yaxis.set_ticklabels(hm.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=20) 62 | hm.xaxis.set_ticklabels(hm.xaxis.get_ticklabels(), rotation=0, ha='right', fontsize=20) 63 | plt.ylabel('True label',fontsize=20) 64 | plt.xlabel('Predicted label',fontsize=20) 65 | plt.tight_layout() 66 | plt.show() 67 | # ------------------------------------------------------------------------------------------------------------------- 68 | conf_matrix = confusion_matrix(y_test, y_pred_entropy) 69 | class_names = data.Class_Name.unique() 70 | df_cm = pd.DataFrame(conf_matrix, index=class_names, columns=class_names ) 71 | 72 | plt.figure(figsize=(5,5)) 73 | hm = sns.heatmap(df_cm, cbar=False, annot=True, square=True, fmt='d', annot_kws={'size': 20}, yticklabels=df_cm.columns, xticklabels=df_cm.columns) 74 | hm.yaxis.set_ticklabels(hm.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=20) 75 | hm.xaxis.set_ticklabels(hm.xaxis.get_ticklabels(), rotation=0, ha='right', fontsize=20) 76 | plt.ylabel('True label',fontsize=20) 77 | plt.xlabel('Predicted label',fontsize=20) 78 | plt.tight_layout() 79 | plt.show() 80 | 81 | # ------------------------------------------------------------------------------------------------------------------- 82 | dot_data = export_graphviz(clf_gini, filled=True, rounded=True, class_names=class_names, feature_names=data.iloc[:, 1:5].columns, out_file=None) 83 | graph = graph_from_dot_data(dot_data) 84 | graph.write_pdf("decision_tree_gini.pdf") 85 | webbrowser.open_new(r'decision_tree_gini.pdf') 86 | # ------------------------------------------------------------------------------------------------------------------- 87 | dot_data = export_graphviz(clf_entropy, filled=True, rounded=True, class_names=class_names, feature_names=data.iloc[:, 1:5].columns, out_file=None) 88 | graph = graph_from_dot_data(dot_data) 89 | graph.write_pdf("decision_tree_entropy.pdf") 90 | webbrowser.open_new(r'decision_tree_entropy.pdf') 91 | --------------------------------------------------------------------------------