├── Aurora.py ├── License.txt ├── README.md ├── __pycache__ ├── Aurora.cpython-38.pyc └── Aurora.cpython-39.pyc ├── icon.png ├── images ├── logo.png ├── ss1.png └── ss2.png ├── plugins ├── IsolationForest.py ├── Mann_Whit.py ├── PCA.py ├── __pycache__ │ ├── IsolationForest.cpython-38.pyc │ ├── Mann_Whit.cpython-38.pyc │ ├── PCA.cpython-38.pyc │ ├── anova.cpython-38.pyc │ ├── cca.cpython-38.pyc │ ├── esm.cpython-38.pyc │ ├── example_plugin_a.cpython-38.pyc │ ├── plugin_a.cpython-38.pyc │ ├── plugin_b.cpython-38.pyc │ ├── poisson_probabilities.cpython-38.pyc │ └── vine_copula.cpython-38.pyc ├── anova.py ├── autoencoder.py ├── cca.py ├── esm.py ├── example_plugin_a.py ├── histogram.py ├── kmeans.py ├── knn.py ├── pearson.py ├── poisson_probabilities.py ├── svm.py ├── text_classifier.py └── xgboost.py └── requirements.txt /Aurora.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | ##################################################### 5 | #### Package: Aurora 6 | #### Version: 0.2 7 | #### Author: Marius Neagoe 8 | #### Copyright: © 2024 Marius Neagoe 9 | #### Website: https://mariusneagoe.com 10 | #### Github: https://github.com/MariusNea/Aurora 11 | ##################################################### 12 | 13 | import tkinter as tk 14 | from tkinter import ttk, StringVar 15 | from tkinter import messagebox 16 | from tkinter import filedialog 17 | import pandas as pd 18 | from tkinter import simpledialog 19 | import os 20 | import importlib.util 21 | from sklearn.model_selection import train_test_split 22 | from sklearn.linear_model import LinearRegression, LogisticRegression 23 | from sklearn.tree import DecisionTreeClassifier 24 | from statsmodels.tsa.seasonal import STL 25 | import matplotlib.pyplot as plt 26 | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg 27 | from tkinter import Label, Entry, Button 28 | from PIL import Image, ImageTk 29 | from io import BytesIO 30 | from matplotlib.widgets import RectangleSelector 31 | from sklearn.preprocessing import LabelEncoder 32 | 33 | 34 | class DataFrameEditor: 35 | def __init__(self, root, dataframe): 36 | self.root = root 37 | self.root.title("Aurora") 38 | self.dataframe = dataframe 39 | self.plugins = {} 40 | self.selected_columns = [] 41 | self.menu_bar = tk.Menu(self.root) 42 | self.root.config(menu=self.menu_bar) 43 | self.create_menu() 44 | self.current_page = 0 45 | self.rows_per_page = 100 # Set the number of rows per page 46 | self.total_pages = (len(self.dataframe) - 1) // self.rows_per_page + 1 47 | self.tree = ttk.Treeview(root) 48 | self.tree.pack(expand=True, fill='both') 49 | 50 | self.setup_tree_view() 51 | self.add_controls() 52 | self.add_pagination_controls() 53 | self.target_col = None 54 | self.model = None 55 | self.input_data = None 56 | highlighted1 = [] 57 | highlighted2 = [] 58 | self.sel_list = [] 59 | 60 | def register_plugin(self, category, name, menu_text): 61 | def decorator(func): 62 | if name not in self.plugins: 63 | self.plugins[name] = func 64 | if category == 'statistics': 65 | self.add_plugin_menu_item(self.stats_menu, menu_text, func) 66 | elif category == 'machine_learning': 67 | self.add_plugin_menu_item(self.ml_menu, menu_text, func) 68 | else: 69 | print(f"Plugin '{name}' is already registered.") 70 | return func 71 | return decorator 72 | 73 | def create_menu(self): 74 | # Create a menu bar 75 | 76 | 77 | # File menu 78 | file_menu = tk.Menu(self.menu_bar, tearoff=0) 79 | self.menu_bar.add_cascade(label="File", menu=file_menu) 80 | # Add menu items to the File menu 81 | file_menu.add_separator() 82 | file_menu.add_command(label="Exit", command=self.root.destroy) 83 | 84 | # Edit menu 85 | #edit_menu = tk.Menu(menu_bar, tearoff=0) 86 | #menu_bar.add_cascade(label="Edit", menu=edit_menu) 87 | #edit_menu.add_command(label="Cut", command=self.dummy_function) 88 | #edit_menu.add_command(label="Copy", command=self.dummy_function) 89 | #edit_menu.add_command(label="Paste", command=self.dummy_function) 90 | 91 | # Statistics menu 92 | self.stats_menu = tk.Menu(self.menu_bar, tearoff=0) 93 | self.menu_bar.add_cascade(label="Statistics", menu=self.stats_menu) 94 | self.stats_menu.add_command(label="Generate Statistics", command=self.dummy_function) 95 | self.stats_menu.add_command(label="Statistical Models", command=self.regressions) 96 | self.stats_menu.add_command(label="Time Series Decomposition", command=self.decompose_and_plot) 97 | #Machine Learning menu 98 | self.ml_menu = tk.Menu(self.menu_bar, tearoff=0) 99 | self.menu_bar.add_cascade(label="Machine Learning", menu=self.ml_menu) 100 | 101 | 102 | # Help menu 103 | help_menu = tk.Menu(self.menu_bar, tearoff=0) 104 | self.menu_bar.add_cascade(label="Help", menu=help_menu) 105 | help_menu.add_command(label="About", command=self.show_about) 106 | help_menu.add_command(label="License", command=self.show_license) 107 | 108 | def add_plugin_menu_item(self, menu, text, command): 109 | menu.add_command(label=text, command=command) 110 | 111 | def setup_tree_view(self): 112 | # Clear existing columns and rows in the Treeview 113 | for i in self.tree.get_children(): 114 | self.tree.delete(i) 115 | self.tree["columns"] = list(self.dataframe.columns) 116 | self.tree["show"] = "headings" 117 | for column in self.dataframe.columns: 118 | self.tree.heading(column, text=column) 119 | self.tree.column(column, anchor='center') 120 | 121 | # Inserting rows from the current page 122 | start = self.current_page * self.rows_per_page 123 | end = start + self.rows_per_page 124 | display_df = self.dataframe.iloc[start:end] 125 | for _, row in display_df.iterrows(): 126 | self.tree.insert('', 'end', values=list(row)) 127 | 128 | def add_pagination_controls(self): 129 | button_style = {"background": "#4CAF50", # Green background color 130 | "foreground": "white", # White text color 131 | "font": ("Arial", 12), # Font and size 132 | "borderwidth": 2, # Border width 133 | "relief": "groove"} 134 | pagination_frame = tk.Frame(self.root) 135 | pagination_frame.pack(fill='x', padx=5, pady=5) 136 | prev_button = tk.Button(pagination_frame, text="Previous", command=self.prev_page, **button_style) 137 | prev_button.pack(side='left') 138 | next_button = tk.Button(pagination_frame, text="Next", command=self.next_page, **button_style) 139 | next_button.pack(side='left') 140 | 141 | def prev_page(self): 142 | if self.current_page > 0: 143 | self.current_page -= 1 144 | self.setup_tree_view() 145 | 146 | def next_page(self): 147 | if self.current_page < self.total_pages - 1: 148 | self.current_page += 1 149 | self.setup_tree_view() 150 | 151 | 152 | def add_controls(self): 153 | add_row_button = tk.Button(self.root, text="Add Row", command=self.add_row) 154 | add_row_button.pack(side='left') 155 | 156 | delete_row_button = tk.Button(self.root, text="Delete Row", command=self.delete_row) 157 | delete_row_button.pack(side='left') 158 | 159 | add_column_button = tk.Button(self.root, text="Add Column", command=self.add_column) 160 | add_column_button.pack(side='left') 161 | 162 | delete_column_button = tk.Button(self.root, text="Delete Column", command=self.delete_column) 163 | delete_column_button.pack(side='left') 164 | 165 | clear_button = tk.Button(self.root, text="Clear Selection", command=self.clear_list) 166 | clear_button.pack(side='right') 167 | 168 | # Button to plot selected columns 169 | plot_button = tk.Button(self.root, text="Plot or Brush", command=self.int_hig_wrap) 170 | plot_button.pack(side='right') 171 | 172 | # Button to select columns for plotting 173 | select_button = tk.Button(self.root, text="Select Columns to Plot or Brush", command=self.select_columns) 174 | select_button.pack(side='right') 175 | 176 | refresh_button = tk.Button(self.root, text="Refresh Dataframe", command=self.update_frame) 177 | refresh_button.pack(side='right') 178 | 179 | save_button = tk.Button(self.root, text="Save Dataframe", command=self.save_df) 180 | save_button.pack(side='right') 181 | 182 | encode_button = tk.Button(self.root, text="Label Encode", command=self.encode_labels) 183 | encode_button.pack(side='right') 184 | 185 | 186 | self.tree.bind('', self.on_item_double_click) 187 | 188 | def clear_list(self): 189 | self.sel_list.clear() 190 | 191 | def dummy_function(self): 192 | summary = self.dataframe.describe() 193 | result = "Summary Statistics" 194 | 195 | # Create a new window to display the result 196 | result_window = tk.Toplevel(self.root) 197 | result_window.title("summary Statistics") 198 | 199 | # Create a label to display the result 200 | result_label = tk.Label(result_window, text=summary, padx=10, pady=10) 201 | result_label.pack() 202 | 203 | 204 | def decompose_and_plot(self): 205 | 206 | # Function to handle plotting with the entered period 207 | def plot_with_period(): 208 | try: 209 | period = int(period_entry.get()) 210 | except ValueError: 211 | tk.messagebox.showerror("Error", "Please enter a valid integer for the period. First column hast to be Date and second Series") 212 | return 213 | 214 | # Assuming the first column is datetime and the second column is values 215 | time_series = self.dataframe.iloc[:, 1] 216 | 217 | # Perform STL decomposition 218 | decomposition = STL(time_series, period=period).fit() 219 | 220 | # Extract components 221 | original = time_series 222 | trend = decomposition.trend 223 | seasonal = decomposition.seasonal 224 | residual = decomposition.resid 225 | 226 | # Plot the components 227 | root = tk.Toplevel(self.root) 228 | 229 | root.title("Time Series Decomposition") 230 | 231 | fig, axs = plt.subplots(4, 1, figsize=(8, 10), sharex=True) 232 | 233 | axs[0].plot(original, label='Original') 234 | axs[0].set_ylabel('Original') 235 | 236 | axs[1].plot(trend, label='Trend', color='orange') 237 | axs[1].set_ylabel('Trend') 238 | 239 | axs[2].plot(seasonal, label='Seasonal', color='green') 240 | axs[2].set_ylabel('Seasonal') 241 | 242 | axs[3].plot(residual, label='Residual', color='red') 243 | axs[3].set_ylabel('Residual') 244 | 245 | for ax in axs: 246 | ax.legend() 247 | 248 | # Embed the matplotlib plot into the Tkinter window 249 | canvas = FigureCanvasTkAgg(fig, master=root) 250 | canvas_widget = canvas.get_tk_widget() 251 | canvas_widget.pack(side=tk.TOP, fill=tk.BOTH, expand=1) 252 | 253 | tk.mainloop() 254 | 255 | # Create a new window for period input 256 | period_window = tk.Toplevel(self.root) 257 | period_window.title("Enter Seasonality Period") 258 | 259 | # Label and Entry for period input 260 | label = Label(period_window, text="Enter Seasonality Period:") 261 | label.pack(pady=10) 262 | period_entry = Entry(period_window) 263 | period_entry.pack(pady=10) 264 | 265 | # Button to trigger the plot with the entered period 266 | plot_button = Button(period_window, text="Plot", command=plot_with_period) 267 | plot_button.pack(pady=10) 268 | 269 | def train_linear_regression(self, target_col): 270 | if len(self.dataframe) == 1: 271 | return None # Return None if there's only one sample 272 | X_train, X_test, y_train, y_test = train_test_split(self.dataframe.drop(columns=[target_col]), self.dataframe[target_col], test_size=0.2, random_state=42) 273 | model = LinearRegression() 274 | model.fit(X_train, y_train) 275 | return model 276 | 277 | def train_logistic_regression(self, target_col): 278 | if len(self.dataframe) == 1: 279 | return None # Return None if there's only one sample 280 | X_train, X_test, y_train, y_test = train_test_split(self.dataframe.drop(columns=[target_col]), self.dataframe[target_col], test_size=0.2, random_state=42) 281 | model = LogisticRegression() 282 | model.fit(X_train, y_train) 283 | return model 284 | 285 | def train_decision_tree(self, target_col): 286 | if len(self.dataframe) == 1: 287 | return None # Return None if there's only one sample 288 | X_train, X_test, y_train, y_test = train_test_split(self.dataframe.drop(columns=[target_col]), self.dataframe[target_col], test_size=0.2, random_state=42) 289 | model = DecisionTreeClassifier() 290 | model.fit(X_train, y_train) 291 | return model 292 | 293 | def make_predictions(self, model, input_data): 294 | if model is None: 295 | return None # Return None if the model is not trained 296 | predictions = model.predict(input_data) 297 | return predictions 298 | 299 | def on_predict_button_click(self, selected_model, entry_features, label_predictions): 300 | # Get values from entry widgets 301 | feature_values = [float(entry.get()) for entry in entry_features] 302 | 303 | # Create a DataFrame for prediction 304 | new_data = pd.DataFrame([feature_values], columns=self.dataframe.columns[:-1]) 305 | 306 | # Train the selected model 307 | if selected_model == "linear": 308 | model = self.train_linear_regression(target_col='target') 309 | elif selected_model == "logistic": 310 | model = self.train_logistic_regression(target_col='target') 311 | elif selected_model == "tree": 312 | model = self.train_decision_tree(target_col='target') 313 | else: 314 | model = None 315 | 316 | # Make predictions 317 | if model is not None: 318 | predictions = self.make_predictions(model, new_data) 319 | # Display predictions in labels or handle as needed 320 | label_predictions.config(text=f"Prediction: {predictions}") 321 | else: 322 | label_predictions.config(text="Please select a valid model before predicting.") 323 | 324 | def regressions(self): 325 | # Create a Tkinter window 326 | window = tk.Toplevel(self.root) 327 | window.title("Machine Learning Predictions") 328 | 329 | # Ask the user for the number of features 330 | num_features = simpledialog.askinteger("Number of Features", "Enter the number of features(number of columns from 1 to n-1). Last column is the predicted column:") 331 | 332 | # Create entry widgets for user input features 333 | entry_features = [] 334 | for i in range(num_features): 335 | entry = tk.Entry(window, width=10) 336 | entry.grid(row=i, column=1, padx=10, pady=10) 337 | entry_features.append(entry) 338 | label = tk.Label(window, text=f"Feature {i + 1}:") 339 | label.grid(row=i, column=0, padx=10, pady=10, sticky=tk.E) 340 | 341 | # Create radio buttons for selecting the model 342 | # Create radio buttons for selecting the model 343 | selected_model = tk.StringVar() 344 | linear_radio = tk.Radiobutton(window, text="Linear Regression", variable=selected_model, value="linear") 345 | linear_radio.grid(row=num_features, column=0, columnspan=2, pady=10) 346 | logistic_radio = tk.Radiobutton(window, text="Logistic Regression", variable=selected_model, value="logistic") 347 | logistic_radio.grid(row=num_features + 1, column=0, columnspan=2, pady=10) 348 | decision_tree_radio = tk.Radiobutton(window, text="Decision Tree", variable=selected_model, value="tree") 349 | decision_tree_radio.grid(row=num_features + 2, column=0, columnspan=2, pady=10) 350 | 351 | # Create labels for displaying predictions 352 | label_predictions = tk.Label(window, text="Predictions:") 353 | label_predictions.grid(row=num_features + 4, column=0, columnspan=2) 354 | 355 | # Create a button to trigger predictions 356 | predict_button = tk.Button(window, text="Predict", command=lambda: self.on_predict_button_click(selected_model.get(), entry_features, label_predictions)) 357 | predict_button.grid(row=num_features + 3, column=0, columnspan=2, pady=10) 358 | 359 | 360 | # Placeholder DataFrame with an unknown number of columns 361 | data = {'target': [0]} 362 | for i in range(num_features): 363 | data[f'feature{i + 1}'] = [0.0] # Initialize with placeholder values 364 | 365 | # Start the Tkinter event loop 366 | window.mainloop() 367 | 368 | 369 | def show_about(self): 370 | messagebox.showinfo("About", "Aurora \nVersion 0.1\n\nCreated by Marius Neagoe\n\n www.mariusneagoe.com") 371 | 372 | def show_license(self): 373 | license_window = tk.Toplevel() 374 | license_window.title("License") 375 | license_window.geometry("500x300") # You can adjust the size as needed 376 | 377 | # Create a Text widget for displaying the license 378 | license_text_widget = tk.Text(license_window, wrap="word") 379 | license_text_widget.pack(expand=True, fill="both", padx=10, pady=10) 380 | 381 | # License text 382 | license_text = """ AURORA - Accessible User-friendly Resources for Optimized Research Analytics 383 | Copyright (C) 2024 Marius Neagoe (www.mariusneagoe.com) 384 | 385 | This program is free software; you can redistribute it and/or 386 | modify it under the terms of the GNU General Public License 387 | as published by the Free Software Foundation; either version 2 388 | of the License, or (at your option) any later version. 389 | 390 | This program is distributed in the hope that it will be useful, 391 | but WITHOUT ANY WARRANTY; without even the implied warranty of 392 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 393 | GNU General Public License for more details. 394 | 395 | You should have received a copy of the GNU General Public License 396 | along with this program; if not, write to the Free Software 397 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 398 | 399 | """ 400 | 401 | # Insert the license text into the Text widget and disable editing 402 | license_text_widget.insert(tk.END, license_text) 403 | license_text_widget.config(state="disabled") 404 | 405 | 406 | def select_columns(self): 407 | # Use simpledialog to prompt the user for column selection 408 | selected_columns = simpledialog.askstring("Select Columns", "Enter two column names separated by a comma (e.g., col1, col2):") 409 | if selected_columns: 410 | columns = [col.strip() for col in selected_columns.split(',')] 411 | if len(columns) == 2: 412 | self.selected_columns = columns 413 | self.sel_list.append(selected_columns) 414 | 415 | else: 416 | messagebox.showerror("Error", "Please enter exactly two column names.") 417 | self.select_columns() 418 | 419 | def int_hig_wrap(self): 420 | date1, high = self.sel_list[0].split(', ') 421 | 422 | if len(self.sel_list) == 1: 423 | #messagebox.showinfo("Info", "Press OK to plot. You have to select 2 pairs of columns in order to Brush.") 424 | plt.figure(figsize=(10, 6)) 425 | plt.scatter(self.dataframe[date1], self.dataframe[high]) 426 | plt.xlabel(date1) 427 | plt.ylabel(high) 428 | plt.show() 429 | else: 430 | date2, target = self.sel_list[1].split(', ') 431 | col1 = date1 432 | col2 = high 433 | col3 = date2 434 | col4 = target 435 | self.interactive_highlight(col1, col2, col3, col4) 436 | 437 | def interactive_highlight(self, col1, col2, col3, col4): 438 | 439 | # Proceed with the interactive highlight functionality for non-empty col3 and col4 440 | # Check for identical columns among col1, col2, col3, col4 441 | cols = [col1, col2, col3, col4] 442 | distinct_values = [] 443 | seen_values = set() 444 | 445 | for value in cols: 446 | if value not in seen_values: 447 | seen_values.add(value) 448 | distinct_values.append(value) 449 | 450 | x_col, y1_col, y2_col = distinct_values 451 | # Plotting both graphs 452 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5)) 453 | line1, = ax1.plot(self.dataframe[x_col], self.dataframe[y1_col], 'ro', picker=5) 454 | line2, = ax2.plot(self.dataframe[x_col], self.dataframe[y2_col], 'bo') 455 | 456 | highlighted1 = [] 457 | highlighted2 = [] 458 | 459 | def clear_previous_highlights(): 460 | for hl in highlighted1: 461 | hl.remove() 462 | highlighted1.clear() 463 | for hl in highlighted2: 464 | hl.remove() 465 | highlighted2.clear() 466 | 467 | def onselect(eclick, erelease): 468 | clear_previous_highlights() 469 | x1, y1 = eclick.xdata, eclick.ydata 470 | x2, y2 = erelease.xdata, erelease.ydata 471 | mask = (self.dataframe[x_col] >= min(x1, x2)) & (self.dataframe[x_col] <= max(x1, x2)) & \ 472 | (self.dataframe[y1_col] >= min(y1, y2)) & (self.dataframe[y1_col] <= max(y1, y2)) 473 | selected = self.dataframe[mask] 474 | hl1 = ax1.plot(selected[x_col], selected[y1_col], 'yo', linestyle='None', zorder=5) 475 | highlighted1.extend(hl1) 476 | hl2 = ax2.plot(selected[x_col], selected[y2_col], 'yo', linestyle='None', zorder=5) 477 | highlighted2.extend(hl2) 478 | fig.canvas.draw_idle() 479 | 480 | toggle_selector = RectangleSelector(ax1, onselect, useblit=True, 481 | button=[1], 482 | minspanx=5, minspany=5, 483 | spancoords='pixels', 484 | interactive=True) 485 | ax1.set_xlabel(x_col) 486 | ax1.set_ylabel(y1_col) 487 | ax2.set_xlabel(x_col) 488 | ax2.set_ylabel(y2_col) 489 | 490 | plt.show() 491 | 492 | 493 | def get_dataframe(self): 494 | return self.dataframe 495 | 496 | 497 | def on_item_double_click(self, event): 498 | item = self.tree.selection()[0] # This gets the ID of the selected item in the Treeview 499 | column = self.tree.identify_column(event.x) # Identifies the clicked column 500 | col_index = int(column.replace('#', '')) - 1 # Convert column ID to index 501 | 502 | new_value = simpledialog.askstring("Input", f"Enter new value:", parent=self.root) 503 | if new_value is not None: 504 | try: 505 | new_value = float(new_value) 506 | df_index = self.tree.index(item) # Assuming direct correspondence between Treeview and DataFrame indices 507 | if df_index < len(self.dataframe): 508 | self.dataframe.iat[df_index, col_index] = new_value # Update DataFrame 509 | self.tree.set(item, column=col_index, value=new_value) # Update Treeview 510 | else: 511 | print(f"Index {df_index} is out of bounds for the DataFrame.") 512 | except IndexError as e: 513 | print(f"Error updating cell: {e}") 514 | 515 | 516 | def encode_labels(self): 517 | categ_col = simpledialog.askstring("Input", f"Enter column name for Label Encoding:") 518 | # Check if the column exists in the dataframe 519 | if categ_col not in self.dataframe.columns: 520 | raise ValueError(f"The column '{categ_col}' does not exist in the dataframe.") 521 | 522 | # Initialize the LabelEncoder 523 | le = LabelEncoder() 524 | 525 | # Fit and transform the data in the column 526 | self.dataframe[categ_col] = le.fit_transform(self.dataframe[categ_col]) 527 | 528 | return self.dataframe 529 | 530 | 531 | def add_row(self): 532 | new_row_index = len(self.dataframe) # Next row index 533 | self.dataframe.loc[new_row_index] = [None] * len(self.dataframe.columns) # Initialize new row with None or suitable defaults 534 | self.tree.insert('', 'end', values=([None] * len(self.dataframe.columns))) # Add new row to Treeview as well 535 | 536 | 537 | def delete_row(self): 538 | selected_item = self.tree.selection()[0] # Treeview's selected item ID 539 | if selected_item: 540 | # Assuming the order of items in the Treeview matches the DataFrame's index order 541 | index_in_df = self.tree.index(selected_item) # Get index of the item in the Treeview 542 | df_index_to_delete = self.dataframe.index[index_in_df] # Get corresponding DataFrame index 543 | self.dataframe.drop(df_index_to_delete, inplace=True) # Drop the row from the DataFrame 544 | self.tree.delete(selected_item) # Delete the item from the Treeview 545 | 546 | 547 | def add_column(self): 548 | new_column_name = simpledialog.askstring("Input", "Enter new column name:", parent=self.root) 549 | if new_column_name: 550 | self.dataframe[new_column_name] = "" 551 | self.setup_tree_view() 552 | 553 | def delete_column(self): 554 | column_name = simpledialog.askstring("Input", "Enter column name to delete:", parent=self.root) 555 | if column_name and column_name in self.dataframe.columns: 556 | # Drop the column from the DataFrame 557 | self.dataframe.drop(columns=[column_name], inplace=True) 558 | 559 | # Rebuild the Treeview to reflect the change 560 | self.rebuild_treeview() 561 | 562 | def rebuild_treeview(self): 563 | # Clear the existing columns and data in the Treeview 564 | for col in self.tree['columns']: 565 | self.tree.delete(*self.tree.get_children()) 566 | self.tree.heading(col, text='') 567 | self.tree.column(col, width=0, minwidth=0) 568 | 569 | # Setup the Treeview again with the updated DataFrame 570 | self.setup_tree_view() 571 | 572 | def update_frame(self): 573 | self.setup_tree_view() 574 | 575 | def save_df(self): 576 | self.dataframe.to_csv('current_dataframe.csv', encoding='utf-8', index=False) 577 | messagebox.showinfo("Info", "Your dataframe was saved to current_dataframe.csv in current folder.") 578 | 579 | def load_plugins(directory: str, app): 580 | for filename in os.listdir(directory): 581 | if filename.endswith('.py') and not filename.startswith('__'): 582 | plugin_path = os.path.join(directory, filename) 583 | module_name = os.path.splitext(filename)[0] 584 | spec = importlib.util.spec_from_file_location(module_name, plugin_path) 585 | module = importlib.util.module_from_spec(spec) 586 | spec.loader.exec_module(module) 587 | # Check if the module has a register function and call it with the app instance 588 | if hasattr(module, 'register'): 589 | module.register(app) 590 | 591 | 592 | if __name__ == "__main__": 593 | root = tk.Tk() 594 | root.geometry("1200x680") 595 | 596 | root.iconphoto(False, tk.PhotoImage(file='icon.png')) 597 | # Use a file dialog to get the initial CSV file path 598 | initial_file_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")]) 599 | 600 | if not initial_file_path: 601 | messagebox.showinfo("Info", "No file selected. Exiting.") 602 | root.destroy() 603 | quit() 604 | try: 605 | # Load the initial CSV file into a DataFrame 606 | initial_df = pd.read_csv(initial_file_path, engine='python') 607 | except Exception as e: 608 | messagebox.showerror("Error", f"Error loading initial CSV file: {e}") 609 | root.destroy() # destroy the root window in case of an error 610 | quit() 611 | 612 | app = DataFrameEditor(root, initial_df) 613 | try: 614 | load_plugins('plugins', app) 615 | except RuntimeError as error: 616 | print(error) 617 | print("Some plugins did not load correctly and it may not work.") 618 | pass 619 | root.mainloop() -------------------------------------------------------------------------------- /License.txt: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ![Python][Python] 4 | 5 |
6 |
7 | 8 | Logo 9 | 10 | 11 |

Aurora

12 | 13 |

14 | Problem solving focused statistical and machine learning software toolkit. 15 |
16 | Report Bug 17 | · 18 | Request Feature 19 |

20 |
21 | 22 | 23 | 24 | 25 | ## About The Project 26 | 27 | In today's world, the fields of statistics and machine learning hold immense potential for solving real-world problems and significantly impacting businesses and daily life. However, the complexity and learning curve associated with these fields can be daunting, making it challenging for those interested to effectively utilize these tools. Recognizing this gap, we've developed AURORA, a software solution crafted to make the power of statistical and machine learning models more accessible to everyone. 28 | 29 | AURORA is designed with the principle that tools that are capable of addressing a diverse range of problems should be within reach of anyone interested in applying scientific methods to their decision-making processes. Our aim is to remove the barriers posed by the need for specialized training, making it easier for individuals to leverage these models in their activities. 30 | 31 | Aurora comprises three main components: 32 | 33 | 1. **Algorithms Component**: This section encompasses various algorithms essential to Aurora's functionality. 34 | 2. **Data Gathering Module**: This module is responsible for collecting data from multiple sources, including web scraping tools. 35 | 3. **Automated Problem Solver Module**: Utilizing Natural Language Processing, this module assists users in navigating and applying interactively Aurora's capabilities to address their specific issues effectively. 36 |

(back to top)

37 | 38 | ### Examples 39 | 40 | Using Text Classifier from Aurora to predict if a message is spam or not 41 | 42 | [![Watch the video](https://img.youtube.com/vi/ntX30JjQB8M/0.jpg)](https://www.youtube.com/watch?v=ntX30JjQB8M) 43 | 44 | Predict employee churn using AURORA 45 | 46 | [![Watch the video](https://img.youtube.com/vi/fY1UBiRSwLg/0.jpg)](https://www.youtube.com/watch?v=fY1UBiRSwLg) 47 | 48 | ### Built With 49 | 50 | * ![Matplotlib][Matplotlib] 51 | * ![Pandas][Pandas] 52 | * ![Scikit-learn][scikit-learn] 53 | 54 |

(back to top)

55 | 56 | 57 | 58 | ### Prerequisites 59 | 60 | Make sure you have Python >=3.9 installed 61 | 62 | ### Installation 63 | 64 | 1. Clone the repo 65 | ```sh 66 | git clone https://github.com/MariusNea/Aurora.git 67 | ``` 68 | 2. Install libraries 69 | ```sh 70 | pip install -r requirements.txt 71 | ``` 72 | 73 |

(back to top)

74 | 75 | 76 | 77 | ## Usage 78 | 79 | ```sh 80 | python -m Aurora 81 | ``` 82 | The process commences with your .csv file containing the requisite information, which is initially imported as a dataframe into AURORA. Subsequently, all models are applied based on this dataframe. 83 | 84 |

Structuring the Dataframe for plugins

85 | 86 | Every plugin comes with its own documentation except the core plugins which are described here. 87 | 88 |
Regression Algorithms
89 | 90 | Within the dataframe, all columns except the last one function as features, while the final column represents the predicted variable. The Linear Regression algorithm can accommodate any type of numerical data in the predicted column, whereas Logistic Regression and Decision Trees are suitable for categorical data. 91 | 92 |
Mann-Whitney U Test
93 | 94 | This test is conducted between two consecutive columns in the dataframe. For instance, if there are four columns named data_1, data_2, data_3, and data_4, the Mann-Whitney U Test is performed between data_1 and data_2, and then between data_3 and data_4, respectively. Consequently, the dataframe must have an even number of columns. 95 | 96 |
ANOVA
97 | 98 | Firs column of the dataframe must contain your tests categories. All other column must be numeric and represents the results of your tests. If your dataframe contains cells without values, AURORA will clean it automatically. 99 | 100 | For a practical example, let's consider a scenario where a researcher wants to analyze the impact of three different types of fertilizer on the growth of plants. The researcher has three groups of plants, each group receiving a different type of fertilizer. The goal is to see if there's a significant difference in the growth of plants (measured in height) across these groups. 101 | 102 | CSV example: 103 | |No | Fertilizer_Type | Height_After_1_Month | Height_After_2_Months | Height_After_3_Months | 104 | |-----|---------------------|---------------------|------------------------|-----------------------| 105 | | 0 | Type_A | 5.1 | 7.2 | 9.8 | 106 | | 1 | Type_B | 4.8 | 7.0 | 10.1 | 107 | | 2 | Type_C | 5.3 | 7.9 | 10.5 | 108 | | 3 | Type_A | 5.5 | 7.5 | 9.9 | 109 | | 4 | Type_B | 4.9 | 7.1 | 10.0 | 110 | | 5 | Type_C | 5.0 | 7.8 | 10.2 | 111 | ... 112 | 113 | 114 |
Outliers (Anomaly) Detection
115 | 116 | This plugin uses Isolation Forest algorithm to detect outliers in timeseries. From your dataframe select column on which you want to apply algorithm. The result will be a plot with both inliers(red) and outliers (blue). 117 | 118 |
Principal Component Analysis (PCA)
119 | 120 | To apply this plugin on your dataframe, the last column must be the target column and others columns must be features columns. The output will be a .csv file with components. 121 | 122 |

Screenshots from main GUI

123 | 124 | ![Product Name Screen Shot][product-screenshot] 125 | ![Product Name Screen Shot2][product-screenshot2] 126 |

(back to top)

127 | 128 | 129 | 130 | ## Roadmap 131 | 132 | - [x] Implement Plot & CrossSelect 133 | - [x] Implement Dataframe Edit 134 | - [x] Implement Dataframe Pagination for fast loading 135 | - [x] Implement Linear Regression 136 | - [x] Implement Logistic Regression 137 | - [x] Implement Decision Tree 138 | - [x] Implement Time Series Decomposition 139 | - [x] Implement One Way ANOVA 140 | - [x] Implement Canonical Correlation Analysis 141 | - [x] Implement Exponential Smoothing Model 142 | - [x] Implement Mann-Whitney U Test 143 | - [x] Implement Poisson Probabilities 144 | - [x] Implement Anomaly (Outliers) Detection 145 | - [x] Implement Principal Component Analysis 146 | - [x] Implement Support Vector Machines 147 | - [x] Implement K-Nearest Neighbors 148 | - [x] Implement K-Means 149 | - [x] Implement Histogram 150 | - [x] Implement Text Classifier 151 | - [x] Implement Denoising Autoencoder 152 | - [x] Implement XGBoost (Regression and Classification) 153 | - [x] Implement Pearson Correlation 154 | - [ ] Implement Monte Carlo Simulation 155 | - [ ] Implement Interactive Web Scraper 156 | - [ ] Develop multiple methods for interactive data gathering 157 | - [ ] Implement Automated Problem Solver 158 | 159 |

(back to top)

160 | 161 | 162 | 163 | ## Contributing 164 | 165 | Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**. 166 | 167 | If you have a suggestion that would make this better, please fork the repo and create a pull request. You can also simply open an issue with the tag "enhancement". 168 | Don't forget to give the project a star! Thanks again! 169 | 170 | For contributing to the project follow steps described here 171 | 172 |

(back to top)

173 | 174 | 175 | 176 | 177 | ## License 178 | 179 | This project is dual licensed. Distributed under the GPL-2.0 license and a commercial license. See `LICENSE.txt` for GPL-2.0. 180 | 181 |

(back to top)

182 | 183 | 184 | 185 | 186 | ## Contact 187 | 188 | Find more here 189 | 190 | Show your support [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://buymeacoffee.com/mariussorid) 191 |

(back to top)

192 | 193 | 194 | 195 | 196 | 197 | [product-screenshot]: images/ss1.png 198 | [product-screenshot2]: images/ss2.png 199 | [Matplotlib]: https://img.shields.io/badge/Matplotlib-%23ffffff.svg?style=for-the-badge&logo=Matplotlib&logoColor=black 200 | [Pandas]: https://img.shields.io/badge/pandas-%23150458.svg?style=for-the-badge&logo=pandas&logoColor=white 201 | [scikit-learn]: https://img.shields.io/badge/scikit--learn-%23F7931E.svg?style=for-the-badge&logo=scikit-learn&logoColor=white 202 | [Python]: https://ForTheBadge.com/images/badges/made-with-python.svg -------------------------------------------------------------------------------- /__pycache__/Aurora.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/__pycache__/Aurora.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/Aurora.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/__pycache__/Aurora.cpython-39.pyc -------------------------------------------------------------------------------- /icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/icon.png -------------------------------------------------------------------------------- /images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/images/logo.png -------------------------------------------------------------------------------- /images/ss1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/images/ss1.png -------------------------------------------------------------------------------- /images/ss2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/images/ss2.png -------------------------------------------------------------------------------- /plugins/IsolationForest.py: -------------------------------------------------------------------------------- 1 | # plugins/IsolationForest.py 2 | 3 | 4 | ##################################################### 5 | #### Package: Aurora 6 | #### Plugin: Outliers (Anomaly) Detection 7 | #### Version: 0.1 8 | #### Author: Marius Neagoe 9 | #### Copyright: © 2024 Marius Neagoe 10 | #### Website: https://mariusneagoe.com 11 | #### Github: https://github.com/MariusNea/Aurora 12 | ##################################################### 13 | 14 | import pandas as pd 15 | from sklearn.ensemble import IsolationForest 16 | import matplotlib.pyplot as plt 17 | import tkinter as tk 18 | from tkinter.simpledialog import askstring 19 | from tkinter import messagebox 20 | 21 | 22 | def run_isolation_forest(df): 23 | col = ask_col() 24 | contamination = ask_contamination() # Get contamination from the user 25 | df_part = df[col] 26 | 27 | # Initialize the Isolation Forest model 28 | model = IsolationForest(contamination=float(contamination), random_state=42) 29 | 30 | # Fit the model on the data 31 | # Note: .values.reshape(-1, 1) reshapes data for a single feature 32 | model.fit(df_part.values.reshape(-1, 1)) 33 | 34 | # Predict outliers 35 | preds = model.predict(df_part.values.reshape(-1, 1)) 36 | 37 | # Add predictions to the DataFrame 38 | df['outlier_' + col] = preds 39 | 40 | # Filter outliers 41 | outliers = df[df['outlier_' + col] == -1] 42 | 43 | # Plotting 44 | plt.figure(figsize=(10, 6)) 45 | # Plot all data points, using a scatter plot 46 | # Y-values are zeros since it's a single dimension, with slight jitter added for visualization 47 | plt.scatter(df[col], [0 + jitter for jitter in preds * 0.02], c=preds, cmap='coolwarm', edgecolor='k', s=20) 48 | plt.title('Data Points Classified by Isolation Forest') 49 | plt.xlabel(col) # X-axis label as the column name 50 | plt.yticks([]) # Hide Y-axis ticks since they are arbitrary 51 | plt.legend(['Inliers', 'Outliers'], loc='lower right') 52 | plt.savefig('outlier_plot' + '_column_' + col + '.png') 53 | plt.show() 54 | 55 | def ask_contamination(): 56 | root = tk.Tk() 57 | root.withdraw() # We don't want a full GUI, so keep the root window from appearing 58 | contamination = askstring("Input", "Enter the contamination factor (e.g., 0.01):", parent=root) 59 | root.destroy() 60 | 61 | return contamination 62 | 63 | def ask_col(): 64 | root = tk.Tk() 65 | root.withdraw() # We don't want a full GUI, so keep the root window from appearing 66 | col = askstring("Input", "Enter column name on which tou want to perform outlier detection:", parent=root) 67 | root.destroy() 68 | 69 | return col 70 | 71 | def register(app): 72 | @app.register_plugin('machine_learning', 'isolation_forest', 'Outliers (Anomaly) Detection') 73 | def isolation_forest(): 74 | global df 75 | df = app.get_dataframe() 76 | run_isolation_forest(df) 77 | messagebox.showinfo("Results", "Your data was saved as a image in current folder.") 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /plugins/Mann_Whit.py: -------------------------------------------------------------------------------- 1 | # plugins/plugin_a.py 2 | 3 | 4 | ##################################################### 5 | #### Package: Aurora 6 | #### Plugin: Mann-Whitney U Test 7 | #### Version: 0.1 8 | #### Author: Marius Neagoe 9 | #### Copyright: © 2024 Marius Neagoe 10 | #### Website: https://mariusneagoe.com 11 | #### Github: https://github.com/MariusNea/Aurora 12 | ##################################################### 13 | 14 | 15 | from scipy.stats import mannwhitneyu 16 | from tkinter import messagebox 17 | import tkinter as tk 18 | from tkinter import ttk 19 | import pandas as pd 20 | 21 | def register(app): 22 | @app.register_plugin('statistics','mann_whitney_u_test', 'Mann-Whitney U Test') 23 | def mann_whitney_u_test(): 24 | df = app.get_dataframe() 25 | # Check if the number of columns is even 26 | if len(df.columns) % 2 != 0: 27 | error_message = "Error: The number of columns in the dataframe must be even. The test is done on the columns that are placed one next to another." 28 | messagebox.showerror("Error", error_message) 29 | return 30 | 31 | # Create tkinter window 32 | root = tk.Tk() 33 | root.title("Mann-Whitney U Test Results") 34 | 35 | # Create treeview to display results 36 | tree = ttk.Treeview(root) 37 | tree["columns"] = ("Column Pair", "U Statistic", "P-Value") 38 | 39 | # Define treeview columns 40 | tree.column("#0", width=0, stretch=tk.NO) 41 | tree.column("Column Pair", anchor=tk.W, width=100) 42 | tree.column("U Statistic", anchor=tk.W, width=100) 43 | tree.column("P-Value", anchor=tk.W, width=100) 44 | 45 | # Create treeview headings 46 | tree.heading("#0", text="", anchor=tk.W) 47 | tree.heading("Column Pair", text="Column Pair", anchor=tk.W) 48 | tree.heading("U Statistic", text="U Statistic", anchor=tk.W) 49 | tree.heading("P-Value", text="P-Value", anchor=tk.W) 50 | 51 | # Perform Mann-Whitney U test for adjacent column pairs 52 | for i in range(0, len(df.columns), 2): 53 | df['column1_clean'] = pd.to_numeric(df.iloc[:, i], errors='coerce') 54 | df['column2_clean'] = pd.to_numeric(df.iloc[:, i + 1], errors='coerce') 55 | df_clean = df.dropna(subset=['column1_clean', 'column2_clean']) 56 | result = mannwhitneyu(df_clean['column1_clean'], df_clean['column2_clean']) 57 | 58 | # Insert result into treeview 59 | tree.insert("", i, values=(f"{df.columns[i]} - {df.columns[i+1]}", result.statistic, result.pvalue)) 60 | 61 | # Pack and run tkinter window 62 | tree.pack(expand=True, fill=tk.BOTH) 63 | root.mainloop() 64 | 65 | -------------------------------------------------------------------------------- /plugins/PCA.py: -------------------------------------------------------------------------------- 1 | #plugins/pca.py 2 | 3 | ##################################################### 4 | #### Package: Aurora 5 | #### Plugin: Principal Component Analysis 6 | #### Version: 0.1 7 | #### Author: Marius Neagoe 8 | #### Copyright: © 2024 Marius Neagoe 9 | #### Website: https://mariusneagoe.com 10 | #### Github: https://github.com/MariusNea/Aurora 11 | ##################################################### 12 | 13 | import pandas as pd 14 | from sklearn.decomposition import PCA 15 | from sklearn.preprocessing import StandardScaler 16 | 17 | 18 | def register(app): 19 | @app.register_plugin('machine_learning','perform_pca_and_export_csv', 'Principal Component Analysis') 20 | def perform_pca_and_export_csv(): 21 | # Assuming 'df' is a known dataframe available in the scope 22 | df = app.get_dataframe() 23 | 24 | # Separate features from the target 25 | features = df.columns[:-1] # Exclude the last column which is the target 26 | target_column = df.columns[-1] # The last column is the target 27 | X = df.loc[:, features].values 28 | y = df.loc[:, target_column].values 29 | 30 | # Standardize the features 31 | X = StandardScaler().fit_transform(X) 32 | 33 | # Perform PCA 34 | pca = PCA(n_components=2) # Adjust n_components as needed 35 | principalComponents = pca.fit_transform(X) 36 | 37 | # Create a DataFrame with the principal components 38 | principalDf = pd.DataFrame(data=principalComponents, columns=['principal component 1', 'principal component 2']) 39 | 40 | # Add the target column to the DataFrame 41 | principalDf[target_column] = y 42 | 43 | # Export to CSV 44 | principalDf.to_csv('pca.csv', index=False) 45 | 46 | # introduce in new menu, Machine Learning 47 | -------------------------------------------------------------------------------- /plugins/__pycache__/IsolationForest.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/plugins/__pycache__/IsolationForest.cpython-38.pyc -------------------------------------------------------------------------------- /plugins/__pycache__/Mann_Whit.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/plugins/__pycache__/Mann_Whit.cpython-38.pyc -------------------------------------------------------------------------------- /plugins/__pycache__/PCA.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/plugins/__pycache__/PCA.cpython-38.pyc -------------------------------------------------------------------------------- /plugins/__pycache__/anova.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/plugins/__pycache__/anova.cpython-38.pyc -------------------------------------------------------------------------------- /plugins/__pycache__/cca.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/plugins/__pycache__/cca.cpython-38.pyc -------------------------------------------------------------------------------- /plugins/__pycache__/esm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/plugins/__pycache__/esm.cpython-38.pyc -------------------------------------------------------------------------------- /plugins/__pycache__/example_plugin_a.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/plugins/__pycache__/example_plugin_a.cpython-38.pyc -------------------------------------------------------------------------------- /plugins/__pycache__/plugin_a.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/plugins/__pycache__/plugin_a.cpython-38.pyc -------------------------------------------------------------------------------- /plugins/__pycache__/plugin_b.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/plugins/__pycache__/plugin_b.cpython-38.pyc -------------------------------------------------------------------------------- /plugins/__pycache__/poisson_probabilities.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/plugins/__pycache__/poisson_probabilities.cpython-38.pyc -------------------------------------------------------------------------------- /plugins/__pycache__/vine_copula.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariusNea/Aurora/2c9a2594355b2d6f9e63f6e76776121a1e08cec3/plugins/__pycache__/vine_copula.cpython-38.pyc -------------------------------------------------------------------------------- /plugins/anova.py: -------------------------------------------------------------------------------- 1 | 2 | ##################################################### 3 | #### Package: Aurora 4 | #### Plugin: One Way ANOVA 5 | #### Version: 0.1 6 | #### Author: Marius Neagoe 7 | #### Copyright: © 2024 Marius Neagoe 8 | #### Website: https://mariusneagoe.com 9 | #### Github: https://github.com/MariusNea/Aurora 10 | ##################################################### 11 | 12 | import pandas as pd 13 | import numpy as np 14 | from scipy import stats 15 | import matplotlib.pyplot as plt 16 | from tkinter import * 17 | from tkinter import messagebox 18 | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg 19 | from statsmodels.stats.multicomp import pairwise_tukeyhsd 20 | from scipy.stats import f_oneway 21 | from statsmodels.formula.api import ols 22 | from statsmodels.stats.anova import anova_lm 23 | 24 | 25 | def validate_dataframe(dataframe): 26 | """ 27 | Validates the DataFrame structure. Assumes the first column is categorical and the rest are numeric. 28 | Checks for at least one categorical column and at least two numeric columns. 29 | """ 30 | # Check if the DataFrame has at least three columns (one categorical and at least two numeric) 31 | if dataframe.shape[1] < 3: 32 | raise ValueError("DataFrame must contain at least one categorical column and two numeric columns.") 33 | 34 | # Check if the first column is categorical (object or category dtype) 35 | if dataframe.dtypes[0] not in ['object', 'category']: 36 | raise ValueError("The first column must be categorical (type object or category).") 37 | exit() 38 | # Check if the remaining columns are numeric 39 | if not all(dataframe.dtypes[1:].apply(lambda dtype: np.issubdtype(dtype, np.number))): 40 | raise ValueError("All columns except the first must be numeric.") 41 | exit() 42 | # Check for missing values in the DataFrame 43 | if dataframe.isnull().any().any(): 44 | print("Warning: DataFrame contains missing values. They will be handled appropriately.") 45 | 46 | def handle_missing_values(dataframe): 47 | """ 48 | Handles missing values by dropping rows with any missing values. 49 | """ 50 | return dataframe.dropna() 51 | 52 | def perform_anova_and_tukey(dataframe): 53 | """ 54 | Performs ANOVA and Tukey's HSD test on the given DataFrame. 55 | Assumes the first column is categorical and the rest are numeric. 56 | """ 57 | group_col = dataframe.columns[0] # The first column as the categorical column 58 | numeric_cols = dataframe.columns[1:] # The rest as numeric columns 59 | 60 | for col in numeric_cols: 61 | # Preparing groups for ANOVA 62 | groups = [dataframe[dataframe[group_col] == group][col].dropna() for group in dataframe[group_col].unique()] 63 | 64 | # Performing ANOVA 65 | f_stat, p_value = f_oneway(*groups) 66 | print(f"ANOVA result for {col}: F={f_stat}, p={p_value}") 67 | 68 | # If the p-value from ANOVA is significant, proceed with Tukey's HSD 69 | if p_value < 0.05: 70 | # Concatenating all group data into a single series for Tukey's test 71 | all_data = pd.concat(groups) 72 | all_groups = pd.concat([pd.Series([group] * len(g)) for group, g in zip(dataframe[group_col].unique(), groups)]) 73 | 74 | # Performing Tukey's HSD test 75 | tukey = pairwise_tukeyhsd(endog=all_data, groups=all_groups, alpha=0.05) 76 | print(f"Tukey's HSD test result for {col}:\n{tukey}") 77 | else: 78 | print("ANOVA p-value > 0.05; Tukey's test not performed.") 79 | 80 | 81 | 82 | def display_results(dataframe): 83 | """ 84 | Orchestrates the analysis process, including validations, ANOVA, effect size calculation, 85 | and graphical summary, then displays results and plots in a Tkinter window. 86 | """ 87 | group_col = dataframe.columns[0] # First column as categorical 88 | numeric_cols = dataframe.columns[1:] # Remaining columns as numeric variables 89 | 90 | # Data Validation 91 | messagebox.showinfo("Preparation", "First column must be populated with categories that will take part on ANOVA. All other columns must be numeric.") 92 | validate_dataframe(dataframe) 93 | dataframe = handle_missing_values(dataframe) 94 | check_equal_variances(dataframe) 95 | check_normality(dataframe) 96 | calculate_effect_size(dataframe) 97 | # Perform ANOVA and Tukey's HSD Test 98 | perform_anova_and_tukey(dataframe) 99 | 100 | 101 | def check_equal_variances(dataframe): 102 | """ 103 | Checks for equal variances among groups using Levene's test for each numeric variable. 104 | 105 | :param dataframe: The pandas DataFrame containing the data. 106 | :param group_col: The name of the column containing the categorical variable. 107 | """ 108 | group_col = dataframe.columns[0] # First column as categorical 109 | numeric_cols = dataframe.columns[1:] # Remaining columns as numeric variables 110 | 111 | for col in numeric_cols: 112 | print(f"Levene's test for {col}:") 113 | groups = [dataframe[dataframe[group_col] == group][col].dropna() for group in dataframe[group_col].unique()] 114 | statistic, p_value = stats.levene(*groups) 115 | if p_value < 0.05: 116 | print(f" Warning: Unequal variances detected (p-value: {p_value:.3f}).") 117 | else: 118 | print(f" Equal variances confirmed (p-value: {p_value:.3f}).") 119 | 120 | def check_normality(dataframe): 121 | """ 122 | Checks for normality in each group for each numeric variable using the Shapiro-Wilk test. 123 | 124 | :param dataframe: The pandas DataFrame containing the data. 125 | :param group_col: The name of the column containing the categorical variable. 126 | """ 127 | group_col = dataframe.columns[0] # First column as categorical 128 | numeric_cols = dataframe.columns[1:] # Remaining columns as numeric variables 129 | 130 | for col in numeric_cols: 131 | print(f"Shapiro-Wilk test for normality in {col}:") 132 | for group in dataframe[group_col].unique(): 133 | group_data = dataframe[dataframe[group_col] == group][col].dropna() 134 | statistic, p_value = stats.shapiro(group_data) 135 | if p_value < 0.05: 136 | print(f" Group {group}: Non-normal distribution detected (p-value: {p_value:.3f}).") 137 | else: 138 | print(f" Group {group}: Normal distribution confirmed (p-value: {p_value:.3f}).") 139 | 140 | # Integrate these functions into your existing workflow as needed, calling them before conducting ANOVA. 141 | def calculate_effect_size(dataframe): 142 | """ 143 | Calculates the effect size (eta squared) for each numeric variable against the categorical variable. 144 | 145 | :param dataframe: The pandas DataFrame containing the data. 146 | :param group_col: The name of the column containing the categorical variable. 147 | :return: A dictionary with numeric columns as keys and their eta squared values as values. 148 | """ 149 | eta_squared_values = {} 150 | group_col = dataframe.columns[0] # First column as categorical 151 | numeric_cols = dataframe.columns[1:] # Remaining columns as numeric variables 152 | 153 | for col in numeric_cols: 154 | formula = f"{col} ~ C({group_col})" 155 | model = ols(formula, data=dataframe).fit() 156 | aov_table = anova_lm(model, typ=2) 157 | 158 | ss_between = aov_table.sum_sq['C({})'.format(group_col)] # Corrected access method 159 | ss_total = sum(aov_table.sum_sq) 160 | eta_squared = ss_between / ss_total 161 | eta_squared_values[col] = eta_squared 162 | print("Effect sizes of groups:") 163 | print(eta_squared_values) 164 | 165 | def register(app): 166 | @app.register_plugin('statistics', 'anova', 'One Way ANOVA') 167 | def anova(): 168 | df = app.get_dataframe() 169 | 170 | display_results(df) 171 | -------------------------------------------------------------------------------- /plugins/autoencoder.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | from tkinter import scrolledtext, messagebox, filedialog, Label, Entry, Button 3 | import torch 4 | from torch import nn 5 | from torch.utils.data import DataLoader, TensorDataset 6 | import pandas as pd 7 | import numpy as np 8 | from sklearn.preprocessing import MinMaxScaler 9 | 10 | # Define the Autoencoder model using PyTorch 11 | class Autoencoder(nn.Module): 12 | def __init__(self, input_dim, encoding_dim): 13 | super(Autoencoder, self).__init__() 14 | self.encoder = nn.Sequential( 15 | nn.Linear(input_dim, encoding_dim), 16 | nn.ReLU(True) 17 | ) 18 | self.decoder = nn.Sequential( 19 | nn.Linear(encoding_dim, input_dim), 20 | nn.Sigmoid() # Assuming data normalization [0,1] 21 | ) 22 | 23 | def forward(self, x): 24 | x = self.encoder(x) 25 | x = self.decoder(x) 26 | return x 27 | 28 | def train_autoencoder(model, dataloader, epochs, device, output_text): 29 | criterion = nn.MSELoss() 30 | optimizer = torch.optim.Adam(model.parameters()) 31 | model.train() 32 | for epoch in range(epochs): 33 | total_loss = 0 34 | for data, target in dataloader: 35 | data = data.to(device) 36 | target = target.to(device) 37 | optimizer.zero_grad() 38 | output = model(data) 39 | loss = criterion(output, target) 40 | loss.backward() 41 | optimizer.step() 42 | total_loss += loss.item() 43 | average_loss = total_loss / len(dataloader) 44 | output_text.insert(tk.END, f'Epoch {epoch+1}, Loss: {average_loss:.4f}\n') 45 | output_text.insert(tk.END, "Training complete!\n") 46 | 47 | def save_model(model, output_text): 48 | if model is None: 49 | messagebox.showerror("Error", "No model to save.") 50 | return 51 | save_path = filedialog.asksaveasfilename(filetypes=[("PyTorch Model", "*.pth")], defaultextension=".pth") 52 | if save_path: 53 | torch.save(model.state_dict(), save_path) 54 | output_text.insert(tk.END, f"Model saved to {save_path}\n") 55 | 56 | def load_model(input_dim, encoding_dim, device, output_text): 57 | model = Autoencoder(input_dim, encoding_dim).to(device) 58 | load_path = filedialog.askopenfilename(filetypes=[("PyTorch Model", "*.pth")]) 59 | if load_path: 60 | model.load_state_dict(torch.load(load_path)) 61 | model.eval() 62 | output_text.insert(tk.END, "Model loaded successfully.\n") 63 | return model 64 | return None 65 | 66 | # GUI for controlling the autoencoder 67 | def run_gui(dataframe=None): 68 | root = tk.Tk() 69 | root.title("Autoencoder Configuration") 70 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 71 | model = None 72 | 73 | def on_train(): 74 | nonlocal model 75 | input_dim = int(input_dim_entry.get()) 76 | encoding_dim = int(encoding_dim_entry.get()) 77 | epochs = int(epoch_entry.get()) 78 | batch_size = int(batch_size_entry.get()) 79 | model = Autoencoder(input_dim, encoding_dim).to(device) 80 | 81 | if dataframe is not None: 82 | scaler = MinMaxScaler() 83 | scaled_data = scaler.fit_transform(dataframe.values) 84 | # Assume scaled_data is already noisy and the original data is not accessible 85 | data_tensor = torch.tensor(scaled_data, dtype=torch.float32) 86 | # Assuming no clean target available, use noisy data as target for unsupervised learning 87 | dataloader = DataLoader(TensorDataset(data_tensor, data_tensor), batch_size=batch_size, shuffle=True) 88 | train_autoencoder(model, dataloader, epochs, device, output_text) 89 | else: 90 | messagebox.showerror("Error", "No data loaded for training.") 91 | 92 | 93 | def on_load_model(): 94 | nonlocal model 95 | input_dim = int(input_dim_entry.get()) 96 | encoding_dim = int(encoding_dim_entry.get()) 97 | model = load_model(input_dim, encoding_dim, device, output_text) 98 | 99 | def on_predict(): 100 | nonlocal model 101 | if model is None: 102 | messagebox.showerror("Error", "Model not trained or initialized.") 103 | return 104 | try: 105 | scaler = MinMaxScaler() 106 | data_scaled = scaler.fit_transform(dataframe.values) 107 | noisy_data = data_scaled + 0.1 * np.random.normal(size=data_scaled.shape) 108 | noisy_data = np.clip(noisy_data, 0, 1) 109 | input_tensor = torch.tensor(noisy_data, dtype=torch.float32).to(device) 110 | model.eval() 111 | with torch.no_grad(): 112 | predicted = model(input_tensor) 113 | clean_predicted = scaler.inverse_transform(predicted.cpu().numpy()) 114 | output_text.insert(tk.END, "Denoised data ready. Check Aurora's directory.\n") 115 | np.savetxt("denoised_data.csv", clean_predicted, delimiter=",") 116 | except Exception as e: 117 | messagebox.showerror("Error", str(e)) 118 | 119 | # Layout configuration 120 | frame = tk.Frame(root) 121 | frame.pack(fill=tk.BOTH, expand=True) 122 | 123 | Label(frame, text="Input Dimension:").pack() 124 | input_dim_entry = Entry(frame) 125 | input_dim_entry.pack() 126 | 127 | Label(frame, text="Encoding Dimension:").pack() 128 | encoding_dim_entry = Entry(frame) 129 | encoding_dim_entry.pack() 130 | 131 | Label(frame, text="Epochs:").pack() 132 | epoch_entry = Entry(frame) 133 | epoch_entry.pack() 134 | 135 | Label(frame, text="Batch Size:").pack() 136 | batch_size_entry = Entry(frame) 137 | batch_size_entry.pack() 138 | 139 | Button(frame, text="Train Model", command=on_train).pack() 140 | Button(frame, text="Load Model", command=on_load_model).pack() 141 | 142 | Button(frame, text="Predict and Save Clean Data", command=on_predict).pack() 143 | Button(frame, text="Save Model", command=lambda: save_model(model, output_text)).pack() 144 | 145 | output_text = scrolledtext.ScrolledText(frame, height=10) 146 | output_text.pack(fill=tk.BOTH, expand=True) 147 | 148 | root.mainloop() 149 | 150 | 151 | def register(app): 152 | @app.register_plugin('machine_learning', 'ae', 'Denoising Autoencoder') 153 | def ae(): 154 | dataae = app.get_dataframe() 155 | run_gui(dataae) 156 | 157 | -------------------------------------------------------------------------------- /plugins/cca.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | ##################################################### 4 | #### Package: Aurora 5 | #### Plugin: Canonical Correlation Analysis 6 | #### Version: 0.1 7 | #### Author: Marius Neagoe 8 | #### Copyright: © 2024 Marius Neagoe 9 | #### Website: https://mariusneagoe.com 10 | #### Github: https://github.com/MariusNea/Aurora 11 | ##################################################### 12 | 13 | import pandas as pd 14 | import numpy as np 15 | from scipy.linalg import eigh 16 | import matplotlib.pyplot as plt 17 | 18 | def fill_na_with_mean(df): 19 | """Fill NaN values with the mean of their respective columns.""" 20 | return df.fillna(df.mean()) 21 | 22 | def standardize_data(df): 23 | """Standardize DataFrame to have zero mean and unit variance.""" 24 | df_filled = fill_na_with_mean(df) 25 | return (df_filled - df_filled.mean()) / df_filled.std() 26 | 27 | def split_dataframe(df): 28 | """Split DataFrame into two equal halves.""" 29 | mid_point = df.shape[1] // 2 30 | X = df.iloc[:, :mid_point] 31 | Y = df.iloc[:, mid_point:] 32 | return X, Y 33 | 34 | def canonical_correlation_analysis(df): 35 | """Perform Canonical Correlation Analysis on a DataFrame.""" 36 | X, Y = split_dataframe(df) 37 | X_std = standardize_data(X) 38 | Y_std = standardize_data(Y) 39 | 40 | S_xx = np.cov(X_std.T, bias=True) 41 | S_yy = np.cov(Y_std.T, bias=True) 42 | S_xy = np.cov(X_std.T, Y_std.T, bias=True)[:X_std.shape[1], X_std.shape[1]:] 43 | S_yx = S_xy.T 44 | 45 | # Ensure matrices are at least two-dimensional 46 | S_xx = np.atleast_2d(S_xx) 47 | S_yy = np.atleast_2d(S_yy) 48 | S_xy = np.atleast_2d(S_xy) 49 | S_yx = np.atleast_2d(S_yx) 50 | 51 | # Solve the generalized eigenvalue problem 52 | eigvals, eigvecs_x = eigh(S_xy @ np.linalg.inv(S_yy) @ S_yx, S_xx) 53 | eigvals = np.sqrt(np.maximum(eigvals, 0)) # Ensure non-negative eigenvalues 54 | 55 | idx = np.argsort(-eigvals) 56 | canonical_correlations = eigvals[idx] 57 | canonical_weights_x = eigvecs_x[:, idx] 58 | 59 | U = X_std @ canonical_weights_x 60 | V = Y_std @ (np.linalg.inv(S_yy) @ S_yx @ canonical_weights_x) 61 | 62 | return canonical_correlations, U, V 63 | 64 | def plot_first_pair_canonical_variables(U, V): 65 | """ 66 | Plot the first canonical variables from U and V against each other. 67 | U and V are the matrices of canonical variables, where each column is a canonical variable. 68 | This function focuses on the first pair, illustrating their relationship. 69 | """ 70 | plt.figure(figsize=(8, 6)) 71 | plt.scatter(U, V, edgecolor='k', alpha=0.7, label='Canonical Variable Pair') 72 | plt.title('Scatter Plot of the First Pair of Canonical Variables') 73 | plt.xlabel('First Canonical Variable from U') 74 | plt.ylabel('First Canonical Variable from V') 75 | plt.legend() 76 | plt.grid(True) 77 | plt.show() 78 | 79 | def register(app): 80 | @app.register_plugin('statistics', 'cca', 'Canonical Correlation Analysis') 81 | def cca(): 82 | data_cor = app.get_dataframe() 83 | # You can add your code here 84 | canonical_correlations, U, V = canonical_correlation_analysis(data_cor) 85 | print("Canonical Correlations:", canonical_correlations) 86 | print(U) 87 | print(V) 88 | plot_first_pair_canonical_variables(U, V) -------------------------------------------------------------------------------- /plugins/esm.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | ##################################################### 4 | #### Package: Aurora 5 | #### Plugin: Exponential Smoothing Model 6 | #### Version: 0.1 7 | #### Author: Marius Neagoe 8 | #### Copyright: © 2024 Marius Neagoe 9 | #### Website: https://mariusneagoe.com 10 | #### Github: https://github.com/MariusNea/Aurora 11 | ##################################################### 12 | 13 | import tkinter as tk 14 | from tkinter import ttk 15 | from tkinter import messagebox 16 | import pandas as pd 17 | import numpy as np 18 | import matplotlib.pyplot as plt 19 | from statsmodels.tsa.holtwinters import ExponentialSmoothing 20 | 21 | def esf(df, column_name, period, trend, seasonal): 22 | """ 23 | Applies Exponential Smoothing on a DataFrame's specified time series data column and plots the original data and forecast. 24 | 25 | :param df: DataFrame containing the time series data. 26 | :param column_name: Name of the column containing the time series data. 27 | :param period: The seasonal period. 28 | :param trend: The type of trend component ('additive', 'multiplicative', or None). 29 | :param seasonal: The type of seasonal component ('additive', 'multiplicative', or None). 30 | """ 31 | # Validate column name 32 | if column_name not in df.columns: 33 | messagebox.showerror("Error", f"Column '{column_name}' not found in DataFrame.") 34 | return 35 | 36 | # Convert period to integer 37 | try: 38 | period = int(period) 39 | except Exception as e: 40 | # Display an error message box with the description of the exception 41 | messagebox.showerror("Error", f"An error occurred: {e}") 42 | 43 | # Convert 'None' strings to NoneType 44 | trend = None if trend == 'None' else trend 45 | seasonal = None if seasonal == 'None' else seasonal 46 | try: 47 | # Fit the model 48 | model = ExponentialSmoothing(df[column_name], trend=trend, seasonal=seasonal, seasonal_periods=period) 49 | model_fit = model.fit() 50 | except Exception as e: 51 | # Display an error message box with the description of the exception 52 | messagebox.showerror("Error", f"An error occurred: {e}") 53 | 54 | # Forecast 55 | forecast = model_fit.fittedvalues 56 | 57 | # Plot the original data and the forecast 58 | plt.figure(figsize=(10, 6)) 59 | plt.plot(df.index, df[column_name], label='Original') 60 | plt.plot(df.index, forecast, label='Forecast', alpha=0.7) 61 | plt.title('Time Series Forecast') 62 | plt.xlabel('Time') 63 | plt.ylabel('Values') 64 | plt.legend() 65 | plt.show() 66 | 67 | 68 | def register(app): 69 | @app.register_plugin('statistics', 'esm', 'Exponential Smoothing Model') 70 | def esm(): 71 | data = app.get_dataframe() 72 | # Create the main window 73 | root = tk.Tk() 74 | root.title("Exponential Smoothing Parameters") 75 | 76 | # Column Name Entry 77 | tk.Label(root, text="Column Name:").grid(row=0, column=0, padx=10, pady=10, sticky='w') 78 | column_entry = tk.Entry(root) 79 | column_entry.grid(row=0, column=1, padx=10, pady=10, sticky='ew') 80 | 81 | 82 | # Period Entry 83 | tk.Label(root, text="Period:").grid(row=1, column=0, padx=10, pady=10, sticky='w') 84 | period_entry = tk.Entry(root) 85 | period_entry.grid(row=1, column=1, padx=10, pady=10, sticky='ew') 86 | 87 | # Trend ComboBox 88 | tk.Label(root, text="Trend:").grid(row=2, column=0, padx=10, pady=10, sticky='w') 89 | trend_options = ["additive", "multiplicative", "None"] 90 | trend_combobox = ttk.Combobox(root, values=trend_options, state="readonly") 91 | trend_combobox.grid(row=2, column=1, padx=10, pady=10, sticky='ew') 92 | trend_combobox.set("None") 93 | 94 | # Seasonal ComboBox 95 | tk.Label(root, text="Seasonal:").grid(row=3, column=0, padx=10, pady=10, sticky='w') 96 | seasonal_options = ["additive", "multiplicative", "None"] 97 | seasonal_combobox = ttk.Combobox(root, values=seasonal_options, state="readonly") 98 | seasonal_combobox.grid(row=3, column=1, padx=10, pady=10, sticky='ew') 99 | seasonal_combobox.set("None") 100 | 101 | # Submit Button 102 | submit_button = tk.Button(root, text="Submit", command=lambda: esf(data, column_entry.get(), period_entry.get(), trend_combobox.get(), seasonal_combobox.get())) 103 | 104 | submit_button.grid(row=4, column=0, columnspan=2, pady=10) 105 | 106 | # Set the grid expansion properties 107 | root.grid_columnconfigure(1, weight=1) 108 | root.grid_rowconfigure(4, weight=1) 109 | 110 | root.mainloop() 111 | -------------------------------------------------------------------------------- /plugins/example_plugin_a.py: -------------------------------------------------------------------------------- 1 | # plugins/example_plugin_a.py 2 | # The MyApp class now supports creating two separate menu categories: "Statistics" and "Machine Learning". 3 | # The register_plugin method requires a category argument to determine under which menu the plugin should be registered. 4 | # Plugins need to specify their category ('statistics' or 'machine_learning') when using the register_plugin decorator. 5 | # The code can use "app.get_dataframe()" method to access the main dataframe from AURORA. 6 | 7 | 8 | ##################################################### 9 | #### Package: Aurora 10 | #### Plugin: Test plugin 11 | #### Version: 0.1 12 | #### Author: Marius Neagoe 13 | #### Copyright: © 2024 Marius Neagoe 14 | #### Website: https://mariusneagoe.com 15 | #### Github: https://github.com/MariusNea/Aurora 16 | ##################################################### 17 | 18 | 19 | def register(app): 20 | @app.register_plugin('category', 'stats_test', 'Perform Stats Test') 21 | def stats_test(): 22 | 23 | # You can add your code here 24 | print("Running a statistics or machine learning test...") 25 | 26 | # category - replace this with 'statistics' or 'machine_learning' 27 | # stats_test - is function's name. 28 | # "Perform Stats Test" - is the text that will apear in AURORA's GUI 29 | 30 | -------------------------------------------------------------------------------- /plugins/histogram.py: -------------------------------------------------------------------------------- 1 | ##################################################### 2 | #### Package: Aurora 3 | #### Plugin: Histogram 4 | #### Version: 0.1 5 | #### Author: Marius Neagoe 6 | #### Copyright: © 2024 Marius Neagoe 7 | #### Website: https://mariusneagoe.com 8 | #### Github: https://github.com/MariusNea/Aurora 9 | ##################################################### 10 | 11 | 12 | import pandas as pd 13 | import matplotlib.pyplot as plt 14 | 15 | def plot_histogram(df): 16 | """ 17 | Plots histograms for each numerical column in the given DataFrame. 18 | 19 | Parameters: 20 | df (pandas.DataFrame): The DataFrame containing the data to plot histograms for. 21 | """ 22 | # Check if the input is a DataFrame 23 | if not isinstance(df, pd.DataFrame): 24 | raise ValueError("The input must be a pandas DataFrame.") 25 | 26 | # Get the numerical columns from the DataFrame 27 | numerical_columns = df.select_dtypes(include='number').columns 28 | 29 | # Plot histograms for each numerical column 30 | for column in numerical_columns: 31 | plt.figure(figsize=(10, 6)) 32 | plt.hist(df[column], bins=30, edgecolor='black') 33 | plt.title(f'Histogram of {column}') 34 | plt.xlabel(column) 35 | plt.ylabel('Frequency') 36 | plt.grid(True) 37 | plt.show() 38 | 39 | # Example usage 40 | def register(app): 41 | @app.register_plugin('statistics', 'histogram', 'Histogram') 42 | def histogram(): 43 | histogram_data = app.get_dataframe() 44 | 45 | plot_histogram(histogram_data) 46 | -------------------------------------------------------------------------------- /plugins/kmeans.py: -------------------------------------------------------------------------------- 1 | ##################################################### 2 | #### Package: Aurora 3 | #### Plugin: K-Means 4 | #### Version: 0.1 5 | #### Author: Marius Neagoe 6 | #### Copyright: © 2024 Marius Neagoe 7 | #### Website: https://mariusneagoe.com 8 | #### Github: https://github.com/MariusNea/Aurora 9 | ##################################################### 10 | 11 | import tkinter as tk 12 | from tkinter import ttk, scrolledtext 13 | from sklearn.cluster import KMeans 14 | from sklearn.preprocessing import StandardScaler 15 | import numpy as np 16 | import pandas as pd 17 | 18 | # Function to run K-Means clustering 19 | def run_kmeans(data, n_clusters, init_method, max_iter): 20 | print(f"Running KMeans with n_clusters={n_clusters}, init_method='{init_method}', max_iter={max_iter}") 21 | # Configure and run the KMeans algorithm 22 | kmeans = KMeans( 23 | n_clusters=int(n_clusters), 24 | init=init_method, 25 | max_iter=int(max_iter), 26 | algorithm='lloyd', 27 | random_state=42, 28 | n_init=10 29 | ) 30 | kmeans.fit(data) 31 | return kmeans 32 | 33 | # GUI creation function 34 | def create_gui(data): 35 | # Root window 36 | root = tk.Tk() 37 | root.title("K-Means Clustering") 38 | 39 | # Entry for Number of Clusters 40 | tk.Label(root, text="Number of Clusters:").grid(row=0, column=0) 41 | n_clusters_entry = tk.Entry(root) 42 | n_clusters_entry.grid(row=0, column=1) 43 | 44 | # Dropdown for Initialization Methods 45 | tk.Label(root, text="Initialization Method:").grid(row=1, column=0) 46 | init_method_var = tk.StringVar(root) 47 | init_method_dropdown = ttk.Combobox(root, textvariable=init_method_var, state="readonly") 48 | init_method_dropdown['values'] = ('k-means++', 'random') 49 | init_method_dropdown.grid(row=1, column=1) 50 | init_method_dropdown.current(0) 51 | 52 | # Entry for Maximum Number of Iterations 53 | tk.Label(root, text="Max Iterations:").grid(row=2, column=0) 54 | max_iter_entry = tk.Entry(root) 55 | max_iter_entry.grid(row=2, column=1) 56 | 57 | # Scrolled Text Area for Output 58 | output_area = scrolledtext.ScrolledText(root, width=40, height=10) 59 | output_area.grid(row=5, column=0, columnspan=2, pady=10) 60 | 61 | # Button to Run K-Means 62 | def on_run_clicked(): 63 | n_clusters = n_clusters_entry.get() 64 | init_method = init_method_var.get() 65 | max_iter = max_iter_entry.get() 66 | print(f"Button clicked with init_method='{init_method}'") 67 | 68 | if init_method not in ['k-means++', 'random']: 69 | output_area.delete('1.0', tk.END) 70 | output_area.insert(tk.INSERT, f"Invalid init method: {init_method}. Select 'k-means++' or 'random'.\n") 71 | return 72 | 73 | try: 74 | global model # Declare model as global to use in prediction 75 | model = run_kmeans(data, int(n_clusters), init_method, int(max_iter)) 76 | centers = model.cluster_centers_ 77 | output = "Cluster Centers:\n{}\n".format(centers) 78 | output_area.delete('1.0', tk.END) 79 | output_area.insert(tk.INSERT, output) 80 | except Exception as e: 81 | output_area.delete('1.0', tk.END) 82 | output_area.insert(tk.INSERT, "Error: {}\n".format(e)) 83 | 84 | run_button = tk.Button(root, text="Run K-Means", command=on_run_clicked) 85 | run_button.grid(row=4, column=0, columnspan=2) 86 | 87 | # Entry for Prediction Data 88 | tk.Label(root, text="Enter Prediction Data (comma-separated):").grid(row=6, column=0) 89 | prediction_entry = tk.Entry(root) 90 | prediction_entry.grid(row=6, column=1) 91 | 92 | # Button for Making Predictions 93 | def on_predict_clicked(): 94 | prediction_data = prediction_entry.get() 95 | try: 96 | data_point = np.array([float(x) for x in prediction_data.split(',')]).reshape(1, -1) 97 | cluster = model.predict(data_point) 98 | output_area.insert(tk.END, "Predicted Cluster: {}\n".format(cluster[0])) 99 | except Exception as e: 100 | output_area.insert(tk.END, "Error in prediction: {}\n".format(e)) 101 | 102 | predict_button = tk.Button(root, text="Make Prediction", command=on_predict_clicked) 103 | predict_button.grid(row=7, column=0, columnspan=2) 104 | 105 | # Start the GUI 106 | root.mainloop() 107 | 108 | def register(app): 109 | @app.register_plugin('machine_learning', 'kmeans', 'Unsupervised Learning (K Means)') 110 | def kmeans(): 111 | dateq = app.get_dataframe() 112 | # Preprocess data: scaling 113 | scaler = StandardScaler() 114 | data_scaled = scaler.fit_transform(dateq) 115 | data_scaled = pd.DataFrame(data_scaled, columns=dateq.columns) 116 | # Running the GUI 117 | create_gui(data_scaled) 118 | -------------------------------------------------------------------------------- /plugins/knn.py: -------------------------------------------------------------------------------- 1 | ##################################################### 2 | #### Package: Aurora 3 | #### Plugin: K Nearest Neighbors 4 | #### Version: 0.1 5 | #### Author: Marius Neagoe 6 | #### Copyright: © 2024 Marius Neagoe 7 | #### Website: https://mariusneagoe.com 8 | #### Github: https://github.com/MariusNea/Aurora 9 | ##################################################### 10 | 11 | 12 | import tkinter as tk 13 | from tkinter import simpledialog, messagebox 14 | import pandas as pd 15 | from sklearn.neighbors import KNeighborsClassifier 16 | 17 | class KNNApp: 18 | def __init__(self, master, df): 19 | self.master = master 20 | self.df = df 21 | self.model = KNeighborsClassifier(n_neighbors=3) 22 | self.features = None 23 | self.target = None 24 | 25 | # Text area for displaying information 26 | self.text_area = tk.Text(master, height=10, width=50) 27 | self.text_area.pack() 28 | 29 | # Button to train the KNN 30 | self.train_btn = tk.Button(master, text="Train KNN", command=self.open_feature_selection) 31 | self.train_btn.pack() 32 | 33 | # Button to make a prediction 34 | self.predict_btn = tk.Button(master, text="Make Prediction", command=self.make_prediction) 35 | self.predict_btn.pack() 36 | 37 | def open_feature_selection(self): 38 | # Opens a new window to select features 39 | self.feature_window = tk.Toplevel(self.master) 40 | self.feature_window.title("Select Features") 41 | 42 | tk.Label(self.feature_window, text="Enter features separated by commas:").pack() 43 | 44 | self.feature_entry = tk.Entry(self.feature_window, width=50) 45 | self.feature_entry.pack(pady=10) 46 | 47 | submit_btn = tk.Button(self.feature_window, text="Submit", command=self.train_knn) 48 | submit_btn.pack() 49 | 50 | def train_knn(self): 51 | features = self.feature_entry.get().replace(' ', '').split(',') 52 | if all(feature in self.df.columns for feature in features): 53 | self.features = self.df[features] 54 | self.target = self.df.iloc[:, -1] # Assuming the last column is the target 55 | 56 | self.model.fit(self.features, self.target) 57 | self.text_area.insert(tk.END, "Model trained with features: {}\n".format(", ".join(features))) 58 | self.feature_window.destroy() 59 | else: 60 | messagebox.showerror("Error", "One or more features are invalid") 61 | 62 | def make_prediction(self): 63 | # Opens a new window for predictions 64 | if self.features is None: 65 | messagebox.showerror("Error", "Model is not trained yet") 66 | return 67 | 68 | self.pred_window = tk.Toplevel(self.master) 69 | self.pred_window.title("Make Prediction") 70 | self.entries = [] 71 | 72 | for feature in self.features.columns: 73 | row = tk.Frame(self.pred_window) 74 | lbl = tk.Label(row, width=15, text=feature, anchor='w') 75 | ent = tk.Entry(row) 76 | row.pack(side=tk.TOP, fill=tk.X, padx=5, pady=5) 77 | lbl.pack(side=tk.LEFT) 78 | ent.pack(side=tk.RIGHT, expand=tk.YES, fill=tk.X) 79 | self.entries.append(ent) 80 | 81 | submit_btn = tk.Button(self.pred_window, text="Submit", command=self.submit_prediction) 82 | submit_btn.pack() 83 | 84 | def submit_prediction(self): 85 | try: 86 | input_data = [float(entry.get()) for entry in self.entries] 87 | prediction = self.model.predict([input_data])[0] 88 | self.text_area.insert(tk.END, f"Prediction data: {input_data}\n") 89 | self.text_area.insert(tk.END, f"Belonging class: {prediction}\n") 90 | except ValueError: 91 | messagebox.showerror("Error", "Please enter valid numbers") 92 | finally: 93 | self.pred_window.destroy() 94 | 95 | 96 | def register(app): 97 | @app.register_plugin('machine_learning', 'knn', 'K Nearest Neighbors') 98 | def knn(): 99 | datas = app.get_dataframe() 100 | root = tk.Tk() 101 | appl = KNNApp(root, datas) 102 | root.mainloop() -------------------------------------------------------------------------------- /plugins/pearson.py: -------------------------------------------------------------------------------- 1 | ##################################################### 2 | #### Package: Aurora 3 | #### Plugin: Pearson correlation 4 | #### Version: 0.1 5 | #### Author: Marius Neagoe 6 | #### Copyright: © 2024 Marius Neagoe 7 | #### Website: https://mariusneagoe.com 8 | #### Github: https://github.com/MariusNea/Aurora 9 | ##################################################### 10 | 11 | import pandas as pd 12 | import numpy as np 13 | import tkinter as tk 14 | from tkinter import messagebox 15 | from tkinter import filedialog 16 | 17 | def calculate_pearson_correlation(df, col1, col2): 18 | """Calculate the Pearson correlation coefficient between two columns.""" 19 | if col1 not in df.columns or col2 not in df.columns: 20 | raise ValueError(f"One or both columns '{col1}' and '{col2}' are not in the DataFrame.") 21 | 22 | return df[col1].corr(df[col2]) 23 | 24 | def create_tkinter_ui(dataframe): 25 | """Create a Tkinter UI for inputting column names and calculating Pearson correlation.""" 26 | def on_calculate(): 27 | """Handler for the calculate button.""" 28 | cols = entry.get().strip() 29 | try: 30 | if '-' in cols: 31 | parts = cols.split(',') 32 | if len(parts) != 2: 33 | raise ValueError("Incorrect format. Use either 'col1,col2', 'col1,col2-col10', or 'col1-col100,col105-col200'.") 34 | 35 | range1 = parts[0].strip() 36 | range2 = parts[1].strip() 37 | 38 | if '-' in range1 and '-' in range2: 39 | start_col1, end_col1 = range1.split('-') 40 | start_col1 = start_col1.strip() 41 | end_col1 = end_col1.strip() 42 | 43 | start_col2, end_col2 = range2.split('-') 44 | start_col2 = start_col2.strip() 45 | end_col2 = end_col2.strip() 46 | 47 | if start_col1 not in dataframe.columns or end_col1 not in dataframe.columns or start_col2 not in dataframe.columns or end_col2 not in dataframe.columns: 48 | raise ValueError("One or more columns are not in the DataFrame.") 49 | 50 | # Get the range of columns for both parts 51 | col_range1 = dataframe.loc[:, start_col1:end_col1].columns 52 | col_range2 = dataframe.loc[:, start_col2:end_col2].columns 53 | 54 | results = [] 55 | for col1 in col_range1: 56 | for col2 in col_range2: 57 | correlation = calculate_pearson_correlation(dataframe, col1, col2) 58 | results.append((col1, col2, correlation)) 59 | 60 | # Convert results to a DataFrame for export 61 | result_df = pd.DataFrame(results, columns=['Column 1', 'Column 2', 'Correlation']) 62 | 63 | # Save to CSV 64 | save_results_to_csv(result_df) 65 | elif '-' in range1 or '-' in range2: 66 | raise ValueError("Invalid range format. Both parts should be ranges if '-' is present in both.") 67 | else: 68 | base_col = range1 69 | other_col = range2 70 | correlation = calculate_pearson_correlation(dataframe, base_col, other_col) 71 | result_df = pd.DataFrame([(base_col, other_col, correlation)], columns=['Base Column', 'Compared Column', 'Correlation']) 72 | 73 | # Show the result in a message box 74 | messagebox.showinfo("Pearson Correlation", f"The correlation between '{base_col}' and '{other_col}' is: {correlation:.4f}") 75 | 76 | # Save to CSV 77 | save_results_to_csv(result_df) 78 | else: 79 | col1, col2 = cols.split(',') 80 | col1 = col1.strip() 81 | col2 = col2.strip() 82 | correlation = calculate_pearson_correlation(dataframe, col1, col2) 83 | result_df = pd.DataFrame([(col1, col2, correlation)], columns=['Base Column', 'Compared Column', 'Correlation']) 84 | 85 | # Show the result in a message box 86 | messagebox.showinfo("Pearson Correlation", f"The correlation between '{col1}' and '{col2}' is: {correlation:.4f}") 87 | 88 | # Save to CSV 89 | save_results_to_csv(result_df) 90 | except ValueError as ve: 91 | messagebox.showerror("Input Error", str(ve)) 92 | except Exception as e: 93 | messagebox.showerror("Error", str(e)) 94 | 95 | def save_results_to_csv(result_df): 96 | """Save the correlation results to a CSV file.""" 97 | file_path = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv")]) 98 | if file_path: 99 | result_df.to_csv(file_path, index=False) 100 | messagebox.showinfo("Save Successful", f"Results saved to {file_path}") 101 | 102 | # Tkinter setup 103 | root = tk.Tk() 104 | root.title("Pearson Correlation Calculator") 105 | 106 | label = tk.Label(root, text="Enter column names (e.g., Col1,Col2 or Col1,Col2-Col10 or Col1-Col100,Col105-Col200):") 107 | label.pack() 108 | 109 | global entry # Declare entry as global to access it within on_calculate 110 | entry = tk.Entry(root, width=50) 111 | entry.pack() 112 | 113 | button = tk.Button(root, text="Calculate Correlation", command=on_calculate) 114 | button.pack() 115 | 116 | root.mainloop() 117 | 118 | def register(app): 119 | @app.register_plugin('statistics', 'pearson', 'Pearson Correlation') 120 | def pearson(): 121 | datafr = app.get_dataframe() 122 | create_tkinter_ui(datafr) 123 | -------------------------------------------------------------------------------- /plugins/poisson_probabilities.py: -------------------------------------------------------------------------------- 1 | #plugins/poisson_probabilities.py 2 | import tkinter as tk 3 | from tkinter import simpledialog, messagebox 4 | import pandas as pd 5 | import math 6 | 7 | ##################################################### 8 | #### Package: Aurora 9 | #### Plugin: Poisson Probabilities 10 | #### Version: 0.1 11 | #### Author: Marius Neagoe 12 | #### Copyright: © 2024 Marius Neagoe 13 | #### Website: https://mariusneagoe.com 14 | #### Github: https://github.com/MariusNea/Aurora 15 | ##################################################### 16 | 17 | ## The dataframemust contain only one column which represents the number of events on a given period of time. 18 | ## Plugin outputh takes one argument, the numer of events for that period of time. 19 | ## Outputs 3 probabilities: the exact probability for exact that number of events to take place in the next period of time, 20 | ## the probability that < x number of events to take place in the next period of time, 21 | ## the probability that > x number of events to take place in the next period of time. 22 | 23 | def poisson_probability(x, mu): 24 | return (math.exp(-mu) * (mu ** x)) / math.factorial(x) 25 | 26 | def cumulative_poisson_probability(x, mu, cumulative=False): 27 | if cumulative: 28 | return sum(poisson_probability(i, mu) for i in range(x + 1)) 29 | else: 30 | return poisson_probability(x, mu) 31 | 32 | def real_world_poisson(mu, parameter, calculation_type='exact', **kwargs): 33 | if calculation_type == 'exact': 34 | return poisson_probability(parameter, mu) 35 | elif calculation_type == 'cumulative': 36 | return cumulative_poisson_probability(parameter, mu, cumulative=True) 37 | elif calculation_type == 'greater_than': 38 | return 1 - cumulative_poisson_probability(parameter, mu, cumulative=True) 39 | else: 40 | raise ValueError("Unsupported calculation type") 41 | 42 | def calculate_and_display_results(): 43 | # Calculate the average number of events per interval from the DataFrame 44 | mu = df[df.columns[0]].mean() 45 | 46 | x = simpledialog.askstring("Input", "Enter the specific number of events (x):") 47 | if x is not None: 48 | try: 49 | x = int(x) 50 | except ValueError: 51 | messagebox.showerror("Error", "Please enter a valid integer.") 52 | return 53 | 54 | # Calculate probabilities 55 | exact_probability = real_world_poisson(mu, x, 'exact') 56 | cumulative_probability = real_world_poisson(mu, x, 'cumulative') 57 | greater_than_probability = real_world_poisson(mu, x, 'greater_than') 58 | 59 | # Display results in a messagebox 60 | result_message = f"Exact Probability (x={x}): {exact_probability*100:.4f} %\n" \ 61 | f"Cumulative Probability (<=x): {cumulative_probability*100:.4f} %\n" \ 62 | f"Greater Than Probability (>x): {greater_than_probability*100:.4f} %" 63 | messagebox.showinfo("Probability Results", result_message) 64 | 65 | def register(app): 66 | @app.register_plugin('statistics','poisson', 'Poisson Probabilities') 67 | def poisson(): 68 | global df 69 | df = app.get_dataframe() 70 | # Check if the number of columns is even 71 | if len(df.columns) > 1: 72 | error_message = "Error: The number of columns in the dataframe must be 1. Each row represents the number of events on a period of time." 73 | messagebox.showerror("Error", error_message) 74 | return 75 | root = tk.Tk() 76 | root.withdraw() # Hide the main window 77 | calculate_and_display_results() 78 | 79 | -------------------------------------------------------------------------------- /plugins/svm.py: -------------------------------------------------------------------------------- 1 | ##################################################### 2 | #### Package: Aurora 3 | #### Plugin: Support Vector Machines Classifier 4 | #### Version: 0.1 5 | #### Author: Marius Neagoe 6 | #### Copyright: © 2024 Marius Neagoe 7 | #### Website: https://mariusneagoe.com 8 | #### Github: https://github.com/MariusNea/Aurora 9 | ##################################################### 10 | 11 | import pandas as pd 12 | import numpy as np 13 | import tkinter as tk 14 | from tkinter import ttk, simpledialog, messagebox, scrolledtext 15 | from sklearn.model_selection import train_test_split 16 | from sklearn.preprocessing import StandardScaler 17 | from sklearn.svm import SVC 18 | from sklearn.metrics import classification_report, accuracy_score 19 | import threading 20 | 21 | 22 | def preprocess_data(df, features, target): 23 | X = df[features] 24 | y = df[target] 25 | scaler = StandardScaler() 26 | X_scaled = scaler.fit_transform(X) 27 | return X_scaled, y, scaler 28 | 29 | def split_data(X, y, test_size=0.2, random_state=42): 30 | return train_test_split(X, y, test_size=test_size, random_state=random_state) 31 | 32 | def train_svm(X_train, y_train, kernel='rbf', C=1.0): 33 | model = SVC(kernel=kernel, C=C) 34 | model.fit(X_train, y_train) 35 | return model 36 | 37 | def evaluate_model(model, X_test, y_test): 38 | predictions = model.predict(X_test) 39 | accuracy = accuracy_score(y_test, predictions) 40 | report = classification_report(y_test, predictions, zero_division=0) 41 | return accuracy, report 42 | 43 | def create_gui(df): 44 | window = tk.Tk() 45 | window.title("SVM Classifier") 46 | 47 | # Determine initial values for features and target from DataFrame 48 | initial_features = ", ".join(df.columns[:-1]) # All columns except the last one 49 | initial_target = df.columns[-1] # Last column 50 | 51 | kernel_var = tk.StringVar(window) 52 | kernel_options = ['linear', 'poly', 'rbf', 'sigmoid'] 53 | kernel_var.set(kernel_options[2]) # default to RBF 54 | 55 | ttk.Label(window, text="Select Kernel:").pack(pady=5) 56 | kernel_dropdown = ttk.OptionMenu(window, kernel_var, kernel_options[2], *kernel_options) 57 | kernel_dropdown.pack(pady=5) 58 | 59 | results_text = scrolledtext.ScrolledText(window, width=60, height=10) 60 | results_text.pack(pady=10) 61 | 62 | global model, scaler 63 | model = None 64 | scaler = None 65 | 66 | def run_svm(features, target, kernel, C): 67 | global model, scaler 68 | try: 69 | features_list = [f.strip() for f in features.split(',')] 70 | X_scaled, y, scaler = preprocess_data(df, features_list, target.strip()) 71 | X_train, X_test, y_train, y_test = split_data(X_scaled, y) 72 | model = train_svm(X_train, y_train, kernel, C) 73 | accuracy, report = evaluate_model(model, X_test, y_test) 74 | results_text.delete('1.0', tk.END) 75 | results_text.insert(tk.INSERT, f"Classification Report:\n{report}\n") 76 | results_text.insert(tk.INSERT, f"Accuracy: {accuracy:.2f}\n") 77 | except Exception as e: 78 | messagebox.showerror("Error", f"An error occurred: {e}") 79 | 80 | def get_input(): 81 | features = simpledialog.askstring("Input", "Enter feature column names separated by comma:", 82 | initialvalue=initial_features) 83 | target = simpledialog.askstring("Input", "Enter target column name:", initialvalue=initial_target) 84 | kernel = kernel_var.get() 85 | C = float(simpledialog.askstring("Input", "Enter C parameter (e.g., 1.0):")) 86 | threading.Thread(target=run_svm, args=(features, target, kernel, C)).start() 87 | 88 | def make_prediction(): 89 | if model is not None and scaler is not None: 90 | try: 91 | feature_inputs = simpledialog.askstring("Predict", "Enter values for {} separated by commas:".format(", ".join(df.columns[:-1]))) 92 | if not feature_inputs: 93 | messagebox.showwarning("Warning", "Input was cancelled or empty. Please provide valid numbers.") 94 | return 95 | feature_values = [float(v.strip()) for v in feature_inputs.split(',')] 96 | if len(feature_values) != len(df.columns[:-1]): 97 | messagebox.showerror("Error", "The number of input values must match the number of features.") 98 | return 99 | data = pd.DataFrame([feature_values], columns=df.columns[:-1]) 100 | scaled_data = scaler.transform(data) 101 | prediction = model.predict(scaled_data) 102 | messagebox.showinfo("Prediction Result", f"The predicted class is: {prediction[0]}") 103 | except ValueError: 104 | messagebox.showerror("Error", "Invalid input. Please enter valid numbers.") 105 | except Exception as e: 106 | messagebox.showerror("Error", f"An unexpected error occurred: {e}") 107 | else: 108 | messagebox.showerror("Error", "Model is not trained yet. Please train the model first.") 109 | 110 | btn_run = tk.Button(window, text="Train SVM", command=get_input) 111 | btn_run.pack(pady=10) 112 | 113 | btn_predict = tk.Button(window, text="Make Prediction", command=make_prediction) 114 | btn_predict.pack(pady=10) 115 | 116 | window.mainloop() 117 | 118 | def register(app): 119 | @app.register_plugin('machine_learning', 'svm', 'Support Vector Machines') 120 | def svm(): 121 | data = app.get_dataframe() 122 | create_gui(data) 123 | -------------------------------------------------------------------------------- /plugins/text_classifier.py: -------------------------------------------------------------------------------- 1 | ##################################################### 2 | #### Package: Aurora 3 | #### Plugin: Text Classifier 4 | #### Version: 0.1 5 | #### Author: Marius Neagoe 6 | #### Copyright: © 2024 Marius Neagoe 7 | #### Website: https://mariusneagoe.com 8 | #### Github: https://github.com/MariusNea/Aurora 9 | ##################################################### 10 | 11 | import tkinter as tk 12 | from tkinter import simpledialog, messagebox 13 | from tkinter.scrolledtext import ScrolledText # Corrected import 14 | from sklearn.model_selection import train_test_split 15 | from sklearn.feature_extraction.text import CountVectorizer 16 | from sklearn.naive_bayes import MultinomialNB 17 | from sklearn.pipeline import make_pipeline 18 | from sklearn.metrics import classification_report 19 | import pandas as pd 20 | 21 | class TextClassifierPlugin: 22 | def __init__(self, dataframe): 23 | self.dataframe = dataframe 24 | self.parameters = {} 25 | self.status_text = None 26 | self.model = None 27 | 28 | def get_parameters_window(self): 29 | def submit(): 30 | try: 31 | self.parameters['test_size'] = float(test_size_entry.get()) 32 | self.parameters['random_state'] = int(random_state_entry.get()) 33 | self.parameters['text_column'] = text_column_entry.get() 34 | self.parameters['label_column'] = label_column_entry.get() 35 | param_window.destroy() 36 | except ValueError: 37 | self.show_status("Invalid input. Please enter valid numbers for test size and random state, and column names.") 38 | 39 | param_window = tk.Toplevel(root) 40 | param_window.title("Set Parameters") 41 | 42 | tk.Label(param_window, text="Test Size (0-1):").grid(row=0, column=0) 43 | tk.Label(param_window, text="Random State:").grid(row=1, column=0) 44 | tk.Label(param_window, text="Text Column Name:").grid(row=2, column=0) 45 | tk.Label(param_window, text="Label Column Name:").grid(row=3, column=0) 46 | 47 | test_size_entry = tk.Entry(param_window) 48 | random_state_entry = tk.Entry(param_window) 49 | text_column_entry = tk.Entry(param_window) 50 | label_column_entry = tk.Entry(param_window) 51 | 52 | test_size_entry.grid(row=0, column=1) 53 | random_state_entry.grid(row=1, column=1) 54 | text_column_entry.grid(row=2, column=1) 55 | label_column_entry.grid(row=3, column=1) 56 | 57 | text_column_entry.insert(0, "Text") # Default column name for text 58 | label_column_entry.insert(0, "Label") # Default column name for labels 59 | 60 | submit_button = tk.Button(param_window, text="Submit", command=submit) 61 | submit_button.grid(row=4, columnspan=2) 62 | 63 | param_window.transient(root) 64 | param_window.grab_set() 65 | root.wait_window(param_window) 66 | 67 | def load_data(self): 68 | X = self.dataframe[self.parameters['text_column']] 69 | y = self.dataframe[self.parameters['label_column']] 70 | return X, y 71 | 72 | def train_model(self): 73 | self.show_status("Loading data...") 74 | X, y = self.load_data() 75 | 76 | self.show_status("Splitting data...") 77 | X_train, X_test, y_train, y_test = train_test_split( 78 | X, y, test_size=self.parameters['test_size'], random_state=self.parameters['random_state']) 79 | 80 | self.show_status("Training model...") 81 | self.model = make_pipeline(CountVectorizer(), MultinomialNB()) 82 | self.model.fit(X_train, y_train) 83 | 84 | self.show_status("Evaluating model...") 85 | y_pred = self.model.predict(X_test) 86 | report = classification_report(y_test, y_pred) 87 | self.show_status("Model trained. \n\n" + report) 88 | 89 | def make_prediction(self): 90 | input_text = self.prediction_entry.get() 91 | if self.model is not None: 92 | prediction = self.model.predict([input_text]) 93 | self.show_status(f"Prediction for '{input_text}': {prediction[0]}") 94 | else: 95 | self.show_status("Model is not trained yet.") 96 | 97 | def show_status(self, message): 98 | if self.status_text: 99 | self.status_text.config(state=tk.NORMAL) 100 | self.status_text.insert(tk.END, message + "\n") 101 | self.status_text.config(state=tk.DISABLED) 102 | 103 | def main(self): 104 | global root 105 | root = tk.Tk() 106 | root.title("Text Classifier") 107 | 108 | self.status_text = ScrolledText(root, wrap=tk.WORD, state=tk.DISABLED) 109 | self.status_text.pack(expand=True, fill='both') 110 | 111 | self.get_parameters_window() 112 | 113 | start_button = tk.Button(root, text="Start Training", command=self.train_model) 114 | start_button.pack() 115 | 116 | tk.Label(root, text="Enter text for prediction:").pack() 117 | self.prediction_entry = tk.Entry(root) 118 | self.prediction_entry.pack() 119 | 120 | predict_button = tk.Button(root, text="Make Prediction", command=self.make_prediction) 121 | predict_button.pack() 122 | 123 | root.mainloop() 124 | 125 | def register(app): 126 | @app.register_plugin('machine_learning', 'text_classifier', 'Text Classifier') 127 | def kmeans(): 128 | text = app.get_dataframe() 129 | 130 | classifier_plugin = TextClassifierPlugin(text) 131 | classifier_plugin.main() 132 | -------------------------------------------------------------------------------- /plugins/xgboost.py: -------------------------------------------------------------------------------- 1 | ##################################################### 2 | #### Package: Aurora 3 | #### Plugin: XGBoost (Regression and Classification) 4 | #### Version: 0.1 5 | #### Author: Marius Neagoe 6 | #### Copyright: © 2024 Marius Neagoe 7 | #### Website: https://mariusneagoe.com 8 | #### Github: https://github.com/MariusNea/Aurora 9 | ##################################################### 10 | 11 | import pandas as pd 12 | import xgboost as xgb 13 | from sklearn.model_selection import train_test_split 14 | from sklearn.metrics import mean_squared_error 15 | import tkinter as tk 16 | from tkinter import messagebox 17 | 18 | def xgboost_trainer_gui(data: pd.DataFrame): 19 | 20 | model = None 21 | feature_cols = None 22 | target_col = None 23 | prediction_feature_names = None 24 | 25 | def train_xgboost_model(data: pd.DataFrame, feature_cols=None, target_col=None, n_estimators=100, learning_rate=0.1, max_depth=5, early_stopping_rounds=10): 26 | 27 | if feature_cols is None or target_col is None: 28 | X = data.iloc[:, :-1] 29 | y = data.iloc[:, -1] 30 | else: 31 | X = data[feature_cols] 32 | y = data[target_col] 33 | 34 | # Split the data into training and validation sets 35 | X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42) 36 | 37 | # Define the model 38 | model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth) 39 | 40 | # Track early stopping manually 41 | min_val_error = float("inf") 42 | rounds_without_improvement = 0 43 | 44 | for i in range(n_estimators): 45 | model.n_estimators = i + 1 # Incrementally increase n_estimators 46 | model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False) 47 | 48 | val_predictions = model.predict(X_val) 49 | val_error = mean_squared_error(y_val, val_predictions) 50 | 51 | if val_error < min_val_error: 52 | min_val_error = val_error 53 | rounds_without_improvement = 0 54 | else: 55 | rounds_without_improvement += 1 56 | 57 | if rounds_without_improvement >= early_stopping_rounds: 58 | print(f"Early stopping after {i+1} rounds.") 59 | break 60 | 61 | # Feature importance 62 | feature_importance = pd.DataFrame({ 63 | 'Feature': X.columns, 64 | 'Importance': model.feature_importances_ 65 | }).sort_values(by='Importance', ascending=False) 66 | 67 | return model, feature_importance, X.columns.tolist() 68 | 69 | def train_model(): 70 | nonlocal model, feature_cols, target_col, prediction_feature_names 71 | 72 | if not entry_estimators.get().strip() or not entry_lr.get().strip() or not entry_depth.get().strip() or not entry_early_stopping_rounds.get().strip(): 73 | messagebox.showwarning("Invalid Input", "All fields must not be empty.") 74 | return 75 | 76 | feature_cols = entry_features.get().strip() 77 | target_col = entry_target.get().strip() 78 | 79 | if not feature_cols: 80 | feature_cols = None 81 | else: 82 | feature_cols = [col.strip() for col in feature_cols.split(",")] 83 | 84 | if not target_col: 85 | target_col = None 86 | 87 | try: 88 | n_estimators = int(entry_estimators.get().strip()) 89 | learning_rate = float(entry_lr.get().strip()) 90 | max_depth = int(entry_depth.get().strip()) 91 | early_stopping_rounds = int(entry_early_stopping_rounds.get().strip()) 92 | except ValueError: 93 | messagebox.showwarning("Invalid Input", "Please enter valid hyperparameters.") 94 | return 95 | 96 | model, feature_importance, feature_names = train_xgboost_model(data, feature_cols, target_col, n_estimators, learning_rate, max_depth, early_stopping_rounds) 97 | print("Model trained successfully!") 98 | print(feature_importance) 99 | 100 | # Store feature names for prediction 101 | prediction_feature_names = feature_names 102 | 103 | # Enable prediction inputs after training 104 | for entry in prediction_entries: 105 | entry.config(state='normal') 106 | btn_predict.config(state='normal') 107 | 108 | def predict(): 109 | if model is None: 110 | messagebox.showwarning("Model Not Trained", "Please train the model before making predictions.") 111 | return 112 | 113 | try: 114 | input_data = [float(entry.get().strip()) for entry in prediction_entries] 115 | except ValueError: 116 | messagebox.showwarning("Invalid Input", "Please enter valid numbers for predictions.") 117 | return 118 | 119 | input_df = pd.DataFrame([input_data], columns=prediction_feature_names) 120 | prediction = model.predict(input_df)[0] 121 | lbl_prediction_result.config(text=f"Predicted Value: {prediction:.2f}") 122 | 123 | # Tkinter GUI setup 124 | root = tk.Tk() 125 | root.title("XGBoost Model Trainer and Predictor") 126 | 127 | tk.Label(root, text="Feature Columns (comma separated):").pack() 128 | entry_features = tk.Entry(root, width=80) 129 | entry_features.pack(pady=5) 130 | 131 | tk.Label(root, text="Target Column:").pack() 132 | entry_target = tk.Entry(root, width=80) 133 | entry_target.pack(pady=5) 134 | 135 | tk.Label(root, text="Number of Estimators:").pack() 136 | entry_estimators = tk.Entry(root, width=20) 137 | entry_estimators.insert(0, "100") # Default value 138 | entry_estimators.pack(pady=5) 139 | 140 | tk.Label(root, text="Learning Rate:").pack() 141 | entry_lr = tk.Entry(root, width=20) 142 | entry_lr.insert(0, "0.1") # Default value 143 | entry_lr.pack(pady=5) 144 | 145 | tk.Label(root, text="Max Depth:").pack() 146 | entry_depth = tk.Entry(root, width=20) 147 | entry_depth.insert(0, "5") # Default value 148 | entry_depth.pack(pady=5) 149 | 150 | tk.Label(root, text="Early Stopping Rounds:").pack() 151 | entry_early_stopping_rounds = tk.Entry(root, width=20) 152 | entry_early_stopping_rounds.insert(0, "10") # Default value 153 | entry_early_stopping_rounds.pack(pady=5) 154 | 155 | btn_train_model = tk.Button(root, text="Train Model", command=train_model) 156 | btn_train_model.pack(pady=20) 157 | 158 | # Prediction Section 159 | tk.Label(root, text="Enter Values for Prediction:").pack(pady=10) 160 | prediction_entries = [] 161 | for i in range(data.shape[1] - 1): # Number of features 162 | entry = tk.Entry(root, width=20, state='disabled') 163 | entry.pack(pady=2) 164 | prediction_entries.append(entry) 165 | 166 | btn_predict = tk.Button(root, text="Predict", command=predict, state='disabled') 167 | btn_predict.pack(pady=20) 168 | 169 | lbl_prediction_result = tk.Label(root, text="") 170 | lbl_prediction_result.pack(pady=5) 171 | 172 | root.mainloop() 173 | 174 | 175 | def register(app): 176 | @app.register_plugin('machine_learning', 'xgboost', 'XGBoost (Regression and Classification)') 177 | def xgboost(): 178 | date = app.get_dataframe() 179 | xgboost_trainer_gui(date) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | altgraph==0.17.4 2 | annotated-types==0.6.0 3 | anyio==3.7.0 4 | appdirs==1.4.4 5 | argon2-cffi==21.3.0 6 | argon2-cffi-bindings==21.2.0 7 | arrow==1.2.3 8 | astor==0.8.1 9 | asttokens==2.2.1 10 | attrs==23.1.0 11 | autocommand==2.2.2 12 | backcall==0.2.0 13 | backports.csv==1.0.7 14 | beautifulsoup4==4.11.1 15 | bleach==6.0.0 16 | certifi==2022.9.24 17 | cffi==1.15.1 18 | charset-normalizer==2.1.1 19 | cheroot==10.0.0 20 | CherryPy==18.9.0 21 | click==8.1.7 22 | cloudpickle==3.0.0 23 | colorama==0.4.6 24 | comm==0.1.3 25 | configparser==6.0.0 26 | contourpy==1.1.0 27 | cryptography==41.0.1 28 | cycler==0.11.0 29 | Cython==3.0.4 30 | dask==2023.5.0 31 | debugpy==1.6.7 32 | decorator==5.1.1 33 | defusedxml==0.7.1 34 | distributed==2023.5.0 35 | docopt==0.6.2 36 | entrypoints==0.4 37 | exceptiongroup==1.1.1 38 | executing==1.2.0 39 | extruct==0.14.0 40 | fastdtw==0.3.4 41 | fastjsonschema==2.17.1 42 | feedparser==6.0.11 43 | fonttools==4.42.1 44 | fqdn==1.5.1 45 | frozendict==2.3.8 46 | fsspec==2024.2.0 47 | future==1.0.0 48 | futures==3.0.5 49 | html-text==0.5.2 50 | html5lib==1.1 51 | idna==3.4 52 | importlib-metadata==6.6.0 53 | importlib-resources==5.12.0 54 | inflect==7.0.0 55 | ipykernel==6.23.2 56 | ipython==8.12.2 57 | ipython-genutils==0.2.0 58 | ipywidgets==8.1.2 59 | isodate==0.6.1 60 | isoduration==20.11.0 61 | jaraco.collections==5.0.0 62 | jaraco.context==4.3.0 63 | jaraco.functools==4.0.0 64 | jaraco.text==3.12.0 65 | jedi==0.18.2 66 | Jinja2==3.1.2 67 | joblib==1.3.2 68 | jsonpointer==2.4 69 | jsonschema==4.17.3 70 | jstyleson==0.0.2 71 | jupyter-events==0.6.3 72 | jupyter_client==8.2.0 73 | jupyter_core==5.3.1 74 | jupyter_server==2.6.0 75 | jupyter_server_terminals==0.4.4 76 | jupyterlab-pygments==0.2.2 77 | jupyterlab_widgets==3.0.10 78 | kiwisolver==1.4.5 79 | locket==1.0.0 80 | lxml==4.9.1 81 | MarkupSafe==2.1.3 82 | matplotlib==3.7.2 83 | matplotlib-inline==0.1.6 84 | mechanize==0.4.8 85 | mf2py==1.1.2 86 | mistune==2.0.5 87 | more-itertools==10.2.0 88 | mplcursors==0.5.3 89 | msgpack==1.0.7 90 | multitasking==0.0.11 91 | mysqlclient==2.2.4 92 | nbclassic==1.0.0 93 | nbclient==0.8.0 94 | nbconvert==7.5.0 95 | nbformat==5.9.0 96 | nest-asyncio==1.5.6 97 | networkx==3.1 98 | nltk==3.8.1 99 | notebook==6.5.4 100 | notebook_shim==0.2.3 101 | numpy==1.24.4 102 | overrides==7.3.1 103 | packaging==23.1 104 | pandas==2.0.3 105 | pandasgui==0.2.14 106 | pandocfilters==1.5.0 107 | parso==0.8.3 108 | partd==1.4.1 109 | patsy==0.5.6 110 | Pattern==3.6 111 | pdfminer.six==20231228 112 | pefile==2023.2.7 113 | pickleshare==0.7.5 114 | Pillow==10.0.0 115 | pipreqs==0.4.13 116 | pkgutil_resolve_name==1.3.10 117 | platformdirs==3.5.3 118 | plotly==5.16.1 119 | portend==3.2.0 120 | portpicker==1.6.0 121 | prometheus-client==0.17.0 122 | prompt-toolkit==3.0.38 123 | protobuf==4.25.2 124 | psutil==5.9.5 125 | pure-eval==0.2.2 126 | pyaes==1.6.1 127 | pyarrow==15.0.0 128 | pyasn1==0.5.0 129 | pycparser==2.21 130 | pydantic==2.6.4 131 | pydantic_core==2.16.3 132 | Pygments==2.15.1 133 | pyinstaller==6.3.0 134 | pyinstaller-hooks-contrib==2024.0 135 | pynput==1.7.6 136 | pyparsing==3.0.9 137 | PyQt5==5.15.10 138 | PyQt5-Qt5==5.15.2 139 | PyQt5-sip==12.13.0 140 | PyQtWebEngine==5.15.6 141 | PyQtWebEngine-Qt5==5.15.2 142 | pyRdfa3==3.5.3 143 | pyrsistent==0.19.3 144 | python-dateutil==2.8.2 145 | python-docx==1.1.0 146 | python-json-logger==2.0.7 147 | pytz==2023.3 148 | pywin32==306 149 | pywin32-ctypes==0.2.2 150 | pywinpty==2.0.10 151 | PyYAML==6.0 152 | pyzmq==25.1.0 153 | qtstylish==0.1.5 154 | rdflib==6.2.0 155 | recipe-scrapers==14.23.0 156 | regex==2023.12.25 157 | requests==2.28.1 158 | rfc3339-validator==0.1.4 159 | rfc3986-validator==0.1.1 160 | rsa==4.9 161 | scikit-learn==1.3.0 162 | scipy==1.10.1 163 | seaborn==0.13.2 164 | Send2Trash==1.8.2 165 | sgmllib3k==1.0.0 166 | six==1.16.0 167 | sklearn==0.0.post7 168 | sniffio==1.3.0 169 | sortedcontainers==2.4.0 170 | soupsieve==2.3.2.post1 171 | stack-data==0.6.2 172 | statsmodels==0.14.1 173 | tblib==3.0.0 174 | telegram==0.0.1 175 | Telethon==1.31.1 176 | tempora==5.5.1 177 | tenacity==8.2.3 178 | terminado==0.17.1 179 | threadpoolctl==3.2.0 180 | tinycss2==1.2.1 181 | tk==0.1.0 182 | toolz==0.12.1 183 | tornado==6.3.2 184 | tqdm==4.66.2 185 | traitlets==5.9.0 186 | ttkthemes==3.2.2 187 | typing_extensions==4.6.3 188 | tzdata==2023.3 189 | uri-template==1.2.0 190 | urllib3==1.26.12 191 | vl-convert-python==1.2.3 192 | w3lib==2.0.1 193 | wcwidth==0.2.6 194 | webcolors==1.13 195 | webencodings==0.5.1 196 | websocket-client==1.5.3 197 | widgetsnbextension==4.0.10 198 | wordcloud==1.9.3 199 | yarg==0.1.9 200 | yfinance==0.2.20 201 | zc.lockfile==3.0.post1 202 | zict==3.0.0 203 | zipp==3.15.0 204 | torch==2.3.0 205 | xgboost==2.1.1 --------------------------------------------------------------------------------