├── .github
    ├── demo.mp4
    └── logo.png
├── .gitignore
├── LICENSE
├── README.md
├── cli
    ├── __init__.py
    └── commands
    │   ├── __init__.py
    │   └── command.py
├── connor
    ├── __init__.py
    ├── data
    │   └── config.ini
    ├── fonts
    │   └── Coder's Crux.ttf
    ├── processes.py
    ├── reader.py
    ├── static
    │   ├── icons
    │   │   ├── clear.png
    │   │   ├── folder.png
    │   │   ├── refresh.png
    │   │   └── upload.png
    │   └── style.css
    ├── tmp
    │   └── keep.txt
    └── tree_builder.py
├── gui
    ├── __init__.py
    └── views
    │   ├── __init__.py
    │   ├── about.py
    │   ├── organizer.py
    │   └── settings.py
├── requirements.txt
├── run.py
└── setup.py


/.github/demo.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/.github/demo.mp4


--------------------------------------------------------------------------------
/.github/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/.github/logo.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Releases
  2 | .releases/
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | cover/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | .pybuilder/
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | #   For a library or package, you might want to ignore these files since the code is
 90 | #   intended to run in multiple environments; otherwise, check them in:
 91 | # .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # poetry
101 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
103 | #   commonly ignored for libraries.
104 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105 | #poetry.lock
106 | 
107 | # pdm
108 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109 | #pdm.lock
110 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111 | #   in version control.
112 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
113 | .pdm.toml
114 | .pdm-python
115 | .pdm-build/
116 | 
117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118 | __pypackages__/
119 | 
120 | # Celery stuff
121 | celerybeat-schedule
122 | celerybeat.pid
123 | 
124 | # SageMath parsed files
125 | *.sage.py
126 | 
127 | # Environments
128 | .env
129 | .venv
130 | env/
131 | venv/
132 | ENV/
133 | env.bak/
134 | venv.bak/
135 | 
136 | # Spyder project settings
137 | .spyderproject
138 | .spyproject
139 | 
140 | # Rope project settings
141 | .ropeproject
142 | 
143 | # mkdocs documentation
144 | /site
145 | 
146 | # mypy
147 | .mypy_cache/
148 | .dmypy.json
149 | dmypy.json
150 | 
151 | # Pyre type checker
152 | .pyre/
153 | 
154 | # pytype static type analyzer
155 | .pytype/
156 | 
157 | # Cython debug symbols
158 | cython_debug/
159 | 
160 | # PyCharm
161 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
162 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
163 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
164 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
165 | #.idea/
166 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 ycatsh
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">
  2 | <img src="./.github/logo.png" alt="Connor">
  3 | </h1>
  4 | 
  5 | Connor is a file organizer written in [Python](https://www.python.org/). It makes use of the [sentence-transformers](https://sbert.net/) framework for the main organization process and the [PyQt6](https://doc.qt.io/qtforpython-6/) GUI toolkit for the graphical user interface. **It is by no means supposed to substitute for organzing files by hand. It is just a concept**. Connor features a fast and fully local file organizer that uses natural language processing to organize computer files based on their textual content.
  6 | <br>
  7 | 
  8 | <div align="center">
  9 | 
 10 | ![releases](https://img.shields.io/github/v/release/ycatsh/connor?color=507591&labelColor=1d1e1f&style=flat)
 11 | ![issues-open](https://img.shields.io/github/issues/ycatsh/connor?color=507591&labelColor=1d1e1f&style=flat)
 12 | ![stars](https://img.shields.io/github/stars/ycatsh/connor?color=507591&labelColor=1d1e1f&style=flat)
 13 | 
 14 | </div>
 15 | 
 16 | https://github.com/user-attachments/assets/b0d151c6-9a8b-4710-92e9-d410edc57b84
 17 | 
 18 | ## Features
 19 | Connor runs locally using the `sentence-transformers/paraphrase-MiniLM-L6-v2` model to analyze file content and organize them based on semantic similarity. It uses cosine similarity to group similar files and applies Latent Dirichlet Allocation (LDA) to name folders.  
 20 | 
 21 | Unprocessable files (e.g., images, binaries) are sorted into a `_misc` folder based on their extensions.
 22 | 
 23 | ### Customization Options
 24 | 1. **Similarity Threshold:** Set the minimum similarity percentage threshold for grouping.
 25 | 2. **Reading Word Limit:** Limit how much of a file is read.
 26 | 3. **Folder Name Word Limit:** Set max words for folder names.
 27 | 
 28 | ### User Preferences
 29 | **Command Line Interface**: Quick folder organization.  
 30 | **Graphical Interface**: Simple GUI with file upload support.
 31 | 
 32 | 
 33 | <br>
 34 | <br>
 35 | 
 36 | 
 37 | ## Installation
 38 | There are installation instructions for both GUI and CLI. You can choose the one you want to install. If you're opting for building the application from [source](https://github.com/ycatsh/connor#source) then adding the run file to path is recommended.
 39 | 
 40 | **Install Connor via pip:**
 41 | 1. Make sure you have `python` and `pip` installed and added to path.
 42 | 2. Run `pip install connor-nlp`  
 43 | 
 44 | <br>
 45 | 
 46 | **Install the GUI version of Connor (executable)**
 47 | 1. Go to the [latest release](https://github.com/ycatsh/connor/releases).
 48 | 3. Follow the steps there.
 49 | 2. Run the executable (`.exe`).  
 50 | 
 51 | 
 52 | <br>
 53 | <br>
 54 | 
 55 | 
 56 | ## Usage
 57 | 
 58 | ### Command Structure
 59 | 
 60 | ```bash
 61 | connor [command] [options]
 62 | ```
 63 | 
 64 | ### Commands
 65 | #### `run`: Run the folder organization process.
 66 | 
 67 | **Usage:**
 68 | ```bash
 69 | connor run <folder_path>
 70 | ```
 71 | 
 72 | **Options:**
 73 | - `folder_path`: Required. Absolute path to the folder that you want to organize.
 74 | 
 75 | **Example:**
 76 | ```bash
 77 | connor run /path/to/your/folder
 78 | ```
 79 | 
 80 | <br>
 81 | 
 82 | #### `settings`: Update the default settings for the tool.
 83 | 
 84 | **Usage:**
 85 | ```bash
 86 | connor settings [options]
 87 | ```
 88 | 
 89 | **Options:**
 90 | - `-f, --folder-word-limit`: Set the maximum length for folder names. (default: 3)
 91 | - `-r, --reading-limit`: Specify the word limit for reading files. (default: 200)
 92 | - `-t, --similarity-threshold`: Define the similarity threshold percentage. (default: 50)
 93 | - `--show`: Show current settings
 94 | 
 95 | **Example:**
 96 | ```bash
 97 | connor settings -f 2 -r 150 -t 60
 98 | ```
 99 | 
100 | ```console
101 | $ connor settings --show
102 | To see how to update: Connor settings [-h]
103 | 
104 | Current settings:
105 |   folder words limit     3
106 |   reading limit          200
107 |   similarity threshold   50%
108 | ```
109 | 
110 | <br>
111 | 
112 | #### `--gui`: Run Connor as a full fledged GUI from the terminal.
113 | 
114 | **Usage:**
115 | ```bash
116 | connor --gui
117 | ```
118 | 
119 | <br>
120 | 
121 | ### Help
122 | To view help information for commands and options use the ``-h`` or `--help` flag.  
123 | 
124 | **Example:**
125 | ```console
126 | $ connor -h
127 | usage: Connor [-h] [--gui] {settings,run} ...
128 | 
129 | Connor: Fast and local NLP file organizer
130 | 
131 | positional arguments:
132 |   {settings,run}
133 |     settings      Update the settings for the organizer
134 |     run           Run the folder organization process
135 | 
136 | options:
137 |   -h, --help      show this help message and exit
138 |   --gui           Run the application in GUI mode.
139 | ```
140 | 
141 | <br>
142 | <br>
143 | 
144 | 
145 | ## Source
146 | #### 1. Clone repository:
147 | ```bash
148 | git clone https://github.com/ycatsh/connor.git
149 | cd connor
150 | ```  
151 | #### 2. Create and activate virtual environment:
152 | ```bash
153 | python3 -m venv venv
154 | source venv/bin/activate
155 | ```  
156 | #### 3. Install dependencies:
157 | ```bash
158 | pip3 install -r requirements.txt
159 | ```
160 | #### 4. Run program:
161 | For GUI:
162 | ```bash
163 | python3 run.py --gui
164 | ```
165 | For CLI:
166 | ```bash
167 | python3 run.py -h
168 | ```
169 | 
170 | #### 5. Install locally (optional):
171 | ```bash
172 | pip3 install .
173 | ```  
174 |   
175 | **Example:**  
176 | ```bash
177 | connor --gui
178 | ```
179 | ```bash
180 | connor -h
181 | ```
182 | 
183 | 
184 | <br>
185 | <br>
186 | 
187 | 
188 | ## License
189 | This project is distributed under MIT License, which can be found in LICENSE in the root dir of the project. I reserve the right to place future versions of this project under a different license.


--------------------------------------------------------------------------------
/cli/__init__.py:
--------------------------------------------------------------------------------
1 | from .commands import ConnorCLI
2 | 
3 | 
4 | def main():
5 |     return ConnorCLI()
6 | 
7 | if __name__ == '__main__':
8 |     main()


--------------------------------------------------------------------------------
/cli/commands/__init__.py:
--------------------------------------------------------------------------------
1 | from .command import ConnorCLI


--------------------------------------------------------------------------------
/cli/commands/command.py:
--------------------------------------------------------------------------------
 1 | import configparser
 2 | import shutil
 3 | import os
 4 | 
 5 | from connor import (
 6 |     init, data_path,
 7 | )
 8 | from connor.processes import (
 9 |     get_file_word_list, sim_organize, 
10 |     rename_folders, organize
11 | )
12 | from connor.tree_builder import make_tree
13 | from connor.reader import prep_files
14 | 
15 | 
16 | class ConnorCLI:
17 |     def __init__(self):
18 |         # Loads the default settings from config file
19 |         self.settings = configparser.ConfigParser()
20 |         self.settings.read(os.path.join(data_path, "config.ini"))
21 | 
22 |         # Load initial parameters from config
23 |         self.folder_name_length = int(self.settings["Parameters"].get("folder_name_length", 3))
24 |         self.reading_word_limit = int(self.settings["Parameters"].get("reading_word_limit", 200))
25 |         self.similarity_threshold = int(self.settings["Parameters"].get("similarity_threshold", 50))
26 | 
27 |         # Seperator
28 |         terminal_width = shutil.get_terminal_size().columns
29 |         self.separator = '-' * terminal_width
30 | 
31 |     def update_settings(self, folder_name_length=None, reading_word_limit=None, similarity_threshold=None):
32 |         if folder_name_length is not None:
33 |             self.folder_name_length = folder_name_length
34 |             self.settings["Parameters"]["folder_name_length"] = str(folder_name_length)
35 |         if reading_word_limit is not None:
36 |             self.reading_word_limit = reading_word_limit
37 |             self.settings["Parameters"]["reading_word_limit"] = str(reading_word_limit)
38 |         if similarity_threshold is not None:
39 |             self.similarity_threshold = similarity_threshold
40 |             self.settings["Parameters"]["similarity_threshold"] = str(similarity_threshold)
41 | 
42 |         # Save updated settings to config file
43 |         with open(os.path.join(data_path, "config.ini"), "w") as configfile:
44 |             self.settings.write(configfile)
45 |         print("Settings updated successfully.")
46 | 
47 |     def show_settings(self):
48 |         print("To see how to update: Connor settings [-h]")
49 |         print("\nCurrent settings:")
50 |         print(f"  {'folder words limit':<22} {self.folder_name_length}")
51 |         print(f"  {'reading limit':<22} {self.reading_word_limit}")
52 |         print(f"  {'similarity threshold':<22} {self.similarity_threshold}%")
53 | 
54 |     def organize_folder(self, folder_path):
55 |         model, stop_words, lda_model, vectorizer = init()
56 |         if not os.path.exists(folder_path):
57 |             print(f"Error: The folder '{folder_path}' does not exist.")
58 |             return
59 |         
60 |         print(self.separator)
61 |         print(f'To customize default settings instead run the command <connor settings -h>\nfolder_name_length: {self.folder_name_length}\nreading_word_limit: {self.reading_word_limit}\nsimilarity_threshold: {self.similarity_threshold}%')
62 |         print(self.separator)
63 |         print(f"Folder '{folder_path}' is being organized...")
64 |         
65 |         # Preparing files and organizing
66 |         folder_dict = {}
67 |         prep_files(folder_path, select_folder=True)
68 |         self.file_list, misc_list = get_file_word_list(folder_path, self.reading_word_limit, stop_words)
69 |         folder_dict, misc_list = sim_organize(model, self.similarity_threshold / 100, self.file_list, misc_list)
70 | 
71 |         # Fitting the model based on the data provided
72 |         data_vectorized = vectorizer.fit_transform(words[1] for words in self.file_list)
73 |         lda_model.fit(data_vectorized)
74 | 
75 |         # Main Process
76 |         renamed_dict = rename_folders(vectorizer, lda_model, folder_dict, self.file_list, 
77 |                                       self.folder_name_length, misc_list)
78 |         print(make_tree(path=folder_path, dict=renamed_dict, is_path_only=False, cli=True))
79 |         print(self.separator)
80 |         
81 |         # Confirm Organization
82 |         try:
83 |             confirm = input(f"The above directory tree explains how the folder will be organized.\nDo you want to continue? [y/n] ")
84 |             if confirm.lower() == 'y' or confirm == '':
85 |                 organize(folder_path, renamed_dict, self.reading_word_limit, self.folder_name_length, 
86 |                          vectorizer, lda_model, model, stop_words)
87 |                 print(f"Folder '{folder_path}' organized successfully.")
88 |                 print(self.separator)
89 |             else:
90 |                 print(f"Folder organization aborted. The files in '{folder_path}' were left untouched.")
91 |                 print(self.separator)
92 |         except KeyboardInterrupt:
93 |             print(f"\nAbort. The files in '{folder_path}' were left untouched.")


--------------------------------------------------------------------------------
/connor/__init__.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | import logging
 3 | import sys
 4 | import os
 5 | 
 6 | 
 7 | # Managing Paths
 8 | def get_path(relative_path):
 9 |     if getattr(sys, 'frozen', False):
10 |         base_path = os.path.dirname(sys.executable)
11 |     else:
12 |         base_path = os.path.abspath(os.path.dirname(__file__))
13 | 
14 |     return os.path.join(base_path, relative_path)
15 | 
16 | static_path = get_path('static')
17 | font_path = get_path('fonts')
18 | data_path = get_path('data')
19 | tmp_path = get_path('tmp')
20 | 
21 | 
22 | # Organization Parameters and Models
23 | TOPICS = 50
24 | MISCELLANEOUS_FOLDER_NAME = "_misc"
25 | 
26 | def initialize_dependencies():
27 |     # Load imports on demand
28 |     from sklearn.decomposition import LatentDirichletAllocation
29 |     from sklearn.feature_extraction.text import TfidfVectorizer
30 |     from sentence_transformers import SentenceTransformer
31 |     from nltk.corpus import stopwords
32 |     import nltk
33 |     
34 |     # Logging
35 |     logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
36 |     logger = logging.getLogger(__name__)
37 |     logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
38 |     warnings.filterwarnings("ignore")
39 | 
40 |     print("Downloading dependencies...")
41 |     logger.info("Initializing Sentence Transformer model...")
42 |     try:
43 |         model = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2')
44 |         logger.info("Sentence Transformer model downloaded successfully.")
45 |     except Exception as e:
46 |         logger.error("Error downloading Sentence Transformer model: %s", e)
47 |         return None, None, None, None
48 | 
49 |     logger.info("Setting up NLTK stop words...")
50 |     nltk.download('stopwords', quiet=True)
51 |     stop_words = set(stopwords.words('english'))
52 |     logger.info("NLTK component set up successfully.")
53 | 
54 |     logger.info("Initializing LDA model with %d topics...", TOPICS)
55 |     lda_model = LatentDirichletAllocation(n_components=TOPICS, learning_decay=0.7, random_state=0)
56 |     logger.info("LDA model initialized successfully.")
57 | 
58 |     logger.info("Initializing TF-IDF Vectorizer...")
59 |     vectorizer = TfidfVectorizer(max_df=0.8, min_df=2, stop_words='english')
60 |     logger.info("TF-IDF Vectorizer initialized successfully.")
61 | 
62 |     return model, stop_words, lda_model, vectorizer
63 | 
64 | 
65 | # Initialize models
66 | def init():
67 |     if not os.path.exists(os.path.join(data_path, "init.txt")):
68 |         with open(os.path.join(data_path, "init.txt"), 'w') as f:
69 |             f.write('initialized')
70 |         return initialize_dependencies()
71 |     else:
72 |         # Load imports on demand
73 |         from sklearn.decomposition import LatentDirichletAllocation
74 |         from sklearn.feature_extraction.text import TfidfVectorizer
75 |         from sentence_transformers import SentenceTransformer
76 |         from nltk.corpus import stopwords
77 | 
78 |         model = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2')
79 |         stop_words = set(stopwords.words('english'))
80 |         lda_model = LatentDirichletAllocation(n_components=TOPICS, learning_decay=0.7, random_state=0)
81 |         vectorizer = TfidfVectorizer(max_df=0.8, min_df=2, stop_words='english')
82 |         return model, stop_words, lda_model, vectorizer


--------------------------------------------------------------------------------
/connor/data/config.ini:
--------------------------------------------------------------------------------
 1 | [Parameters]
 2 | folder_name_length = 3
 3 | reading_word_limit = 200
 4 | similarity_threshold = 50
 5 | 
 6 | [Extension_Map]
 7 | documents = docx odt pdf rtf
 8 | text_files = txt
 9 | markup = html md
10 | executables = exe msi bat
11 | images = jpg png gif bmp svg
12 | spreadsheets = xlsx csv ods
13 | presentations = pptx ppt odp
14 | audio = mp3 wav aac flac
15 | video = mp4 avi mkv mov
16 | archives = zip rar 7z tar gz
17 | programming = py cpp c java js
18 | style_sheets = css scss
19 | databases = sqlite db sql
20 | fonts = ttf otf woff
21 | scripts = sh ps1 bash
22 | configuration = ini cfg yaml
23 | logs = log
24 | torrent = torrent
25 | backup = bak
26 | web = php asp jsp
27 | system = dll sys
28 | compressed = zip rar 7z tar gz
29 | virtual_machines = ova vdi vmdk
30 | certificates = crt pem
31 | 
32 | 


--------------------------------------------------------------------------------
/connor/fonts/Coder's Crux.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/connor/fonts/Coder's Crux.ttf


--------------------------------------------------------------------------------
/connor/processes.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import string
  4 | import configparser
  5 | 
  6 | import numpy as np
  7 | from numpy import dot
  8 | from numpy.linalg import norm
  9 | 
 10 | from connor import (
 11 |     data_path, MISCELLANEOUS_FOLDER_NAME
 12 | )
 13 | from connor.reader import read_files
 14 | 
 15 | 
 16 | # pre-processesing the text to focus on relevant content
 17 | def preprocess(text, stop_words):
 18 |     text = text.translate(str.maketrans('', '', string.punctuation))
 19 |     preprocessed = []
 20 |     for word in text.split():
 21 |         if word.lower() not in stop_words:
 22 |             try:
 23 |                 num_word = int(word)
 24 |                 if num_word > 100: # Ignore small numbers in file names
 25 |                     preprocessed.append(word)
 26 |             except ValueError:
 27 |                 preprocessed.append(word)
 28 |     return ' '.join(preprocessed)
 29 | 
 30 | 
 31 | # Returns a tuple of files names and corresponding content and the ones which are not text-based (i.e. misc)
 32 | def get_file_word_list(path, word_limit, stop_words):
 33 |     raw_text_based, misc = read_files(path, word_limit)
 34 |     text_based = [(file, preprocess(content, stop_words)) for file, content in raw_text_based if content]
 35 |     return text_based, misc
 36 | 
 37 | 
 38 | # Cosine similarity calculation
 39 | def calculate_similarity(embeddings):
 40 |     return dot(embeddings[0], embeddings[1]) / (norm(embeddings[0]) * norm(embeddings[1]))
 41 | 
 42 | 
 43 | # Files that have a similarity score over a certain threshold are grouped together
 44 | def sim_organize(model, simlarity_threshold, files_words_list, misc_list):
 45 |     grouped_files = set()
 46 |     file_groups = {}
 47 |     embeddings = model.encode([w[1] for w in files_words_list], convert_to_tensor=True)
 48 | 
 49 |     for i, parent_files in enumerate(files_words_list):
 50 |         if parent_files[0] not in grouped_files:
 51 |             is_misc = True
 52 |             for j, other_files in enumerate(files_words_list):
 53 |                 if i != j and other_files[0] not in grouped_files:
 54 |                     score = calculate_similarity([embeddings[i], embeddings[j]])
 55 | 
 56 |                     if score >= simlarity_threshold: # similarity threshold decided by user (default: 50%)
 57 |                         if parent_files[0] not in file_groups:
 58 |                             file_groups[parent_files[0]] = [parent_files[0]]
 59 | 
 60 |                         file_groups[parent_files[0]].append(other_files[0])
 61 |                         grouped_files.add(other_files[0])
 62 |                         is_misc = False
 63 | 
 64 |             grouped_files.add(parent_files[0])
 65 |             if is_misc:
 66 |                 misc_list.append(parent_files[0])
 67 | 
 68 |     return file_groups, misc_list
 69 | 
 70 | 
 71 | # Generating names for the folders 
 72 | def name_category(vectorizer, lda_model, text_list, folder_word_limit=5, delimiter="_"):
 73 |     if not text_list:
 74 |         return "Untitled"
 75 |     
 76 |     text_vectorized = vectorizer.transform(text_list)
 77 |     topic_distribution = lda_model.transform(text_vectorized)
 78 |     dominant_topic_index = np.argmax(topic_distribution, axis=1)[0]
 79 | 
 80 |     feature_names = vectorizer.get_feature_names_out()
 81 |     topic_words = lda_model.components_[dominant_topic_index]
 82 |     top_word_indices = topic_words.argsort()[-folder_word_limit:][::-1] # Folder word length limit
 83 |     top_words = [feature_names[i].capitalize() for i in top_word_indices]
 84 | 
 85 |     folder_name = delimiter.join(top_words)
 86 |     if folder_name:
 87 |         return folder_name
 88 |     else:
 89 |         return folder_name_fallback(vectorizer, text_list, folder_word_limit)
 90 | 
 91 | 
 92 | def folder_name_fallback(vectorizer, text_list, folder_word_limit=5, delimiter="_"):
 93 |     text_vectorized = vectorizer.transform(text_list)
 94 |     feature_names = vectorizer.get_feature_names_out()
 95 |     scores = text_vectorized.sum(axis=0).A1
 96 |     
 97 |     top_word_indices = scores.argsort()[-folder_word_limit:][::-1]
 98 |     top_words = [feature_names[i] for i in top_word_indices]
 99 |     capitalized_words = [word.capitalize() for word in top_words]
100 | 
101 |     return delimiter.join(capitalized_words)
102 | 
103 | 
104 | # Handling files that cannot be organized (misc)
105 | def misc_handler(misc_files):
106 |     config = configparser.ConfigParser()
107 |     config.read(os.path.join(data_path, "config.ini"))
108 |     exts = config['Extension_Map']
109 |     misc_dir = {MISCELLANEOUS_FOLDER_NAME: {}}
110 | 
111 |     for misc_file in misc_files:
112 |         file_ext = os.path.splitext(misc_file)[1][1:]
113 |         parent = None
114 |         for key, value in exts.items():
115 |             if file_ext in value.split():
116 |                 parent = key
117 |                 break
118 | 
119 |         if parent:
120 |             if parent not in misc_dir[MISCELLANEOUS_FOLDER_NAME]:
121 |                 misc_dir[MISCELLANEOUS_FOLDER_NAME][parent] = []
122 |             misc_dir[MISCELLANEOUS_FOLDER_NAME][parent].append(misc_file)
123 |         else:
124 |             if file_ext not in misc_dir[MISCELLANEOUS_FOLDER_NAME]:
125 |                 misc_dir[MISCELLANEOUS_FOLDER_NAME][file_ext] = []
126 |             misc_dir[MISCELLANEOUS_FOLDER_NAME][file_ext].append(misc_file)
127 | 
128 |     return misc_dir
129 | 
130 | 
131 | # Re-name the folders with the names determined using topic modeling
132 | def rename_folders(vectorizer, lda_model, folder_dict, files_words_list, folder_word_limit, misc_files):
133 |     renamed_dict = {}
134 |     folder_names = set()
135 | 
136 |     def unique_folder_name_gen(content, base_name):
137 |         folder_name = name_category(vectorizer, lda_model, content, folder_word_limit)
138 |         if folder_name in folder_names:
139 |             folder_name = folder_name_fallback(vectorizer, content, folder_word_limit)
140 |     
141 |         counter = 1
142 |         while folder_name in folder_names:
143 |             folder_name = f"{base_name}_{counter}"
144 |             counter += 1
145 | 
146 |         return folder_name
147 | 
148 |     for _, similar_files in folder_dict.items():
149 |         content = [files[1] for files in files_words_list if files[0] in similar_files]
150 |         base_name = name_category(vectorizer, lda_model, content, folder_word_limit)
151 |         folder_name = unique_folder_name_gen(content, base_name)
152 |         folder_names.add(folder_name)
153 |         renamed_dict[folder_name] = similar_files
154 |     misc_dict = misc_handler(misc_files)
155 | 
156 |     return {**renamed_dict, **misc_dict}
157 | 
158 | 
159 | # Handles moving files
160 | def move_file(path, file_name, destination_path):
161 |     source_file = os.path.join(path, file_name)
162 |     destination_file = os.path.join(destination_path, file_name)
163 | 
164 |     if os.path.exists(source_file):
165 |         shutil.move(source_file, destination_file)
166 | 
167 | 
168 | # Organizing files that are similar (determined using NLP)
169 | def base_organize(path, renamed_dict):
170 |     for folder, folder_content in renamed_dict.items():
171 |         folder_path = os.path.join(path, folder)
172 | 
173 |         if not os.path.exists(folder_path):
174 |             os.mkdir(folder_path)
175 |         
176 |         # For sub-folders
177 |         if isinstance(folder_content, dict):
178 |             base_organize(folder_path, folder_content)
179 | 
180 |         # For misc-folder
181 |         if isinstance(folder_content, dict) and folder == MISCELLANEOUS_FOLDER_NAME:
182 |             for sub_folder, file_names in folder_content.items():
183 |                 for file_name in file_names:
184 |                     move_file(path, file_name, os.path.join(folder_path, sub_folder))
185 | 
186 |         if isinstance(folder_content, list):
187 |             for file_name in folder_content:
188 |                 move_file(path, file_name, folder_path)
189 | 
190 | 
191 | # Organizing inside the generated folders
192 | def sub_organize(path, folder_dict, word_limit, folder_word_limit, vectorizer, lda_model, model, stop_words):
193 |     for folder, folder_content in folder_dict.items():
194 |         sub_folder = os.path.join(path, folder)
195 | 
196 |         if len(folder_content) > 6:
197 |             sub_file_word_list = get_file_word_list(sub_folder, word_limit, stop_words)[0]
198 |             sub_folder_dict = sim_organize(model, sub_file_word_list, simlarity_threshold=0.75) # Grouped only if similarity >=75%
199 | 
200 |             if len(sub_folder_dict) > 1:
201 |                 sub_renamed_dict = rename_folders(vectorizer, lda_model, sub_folder_dict, sub_file_word_list, 
202 |                                                   folder_word_limit, misc_files={})
203 |                 base_organize(sub_folder, sub_renamed_dict)
204 | 
205 | 
206 | # Organizing the folder provided by the user
207 | def organize(path, folder_dict, word_limit, folder_word_limit, vectorizer, lda_model, model, stop_words):
208 |     base_organize(path, folder_dict)
209 |     sub_organize(path, folder_dict, word_limit, folder_word_limit, 
210 |                  vectorizer, lda_model, model, stop_words) 
211 | 
212 |     return
213 | 


--------------------------------------------------------------------------------
/connor/reader.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | import os
  3 | 
  4 | from openpyxl import load_workbook
  5 | from odf.opendocument import load
  6 | from odf import text, teletype
  7 | from pptx import Presentation
  8 | from docx import Document
  9 | import PyPDF2
 10 | 
 11 | from connor import tmp_path
 12 | 
 13 | 
 14 | # Reads the content from all the files in the provided folderS
 15 | def read_text(file_path, word_limit):
 16 |     with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
 17 |         content = file.read().split()
 18 |         return ' '.join(content[:word_limit])
 19 | 
 20 | def read_pdf(file_path, word_limit):
 21 |         with open(file_path, 'rb') as file:
 22 |             pdf_reader = PyPDF2.PdfReader(file)
 23 |             content = ' '.join(page.extract_text() for page in pdf_reader.pages).split()
 24 |             return ' '.join(content[:word_limit])
 25 | 
 26 | def read_odf(file_path, word_limit):
 27 |         odf_file = load(file_path)
 28 |         content = [teletype.extractText(para) for para in odf_file.getElementsByType(text.P)]
 29 |         return ' '.join(content[:word_limit])
 30 | 
 31 | def read_doc(file_path, word_limit):
 32 |         doc = Document(file_path)
 33 |         content = ' '.join([paragraph.text for paragraph in doc.paragraphs]).split()
 34 |         return ' '.join(content[:word_limit])
 35 | 
 36 | def read_xlsx(file_path, word_limit):
 37 |         workbook = load_workbook(file_path)
 38 |         sheet = workbook.active
 39 |         rows = list(sheet.iter_rows(values_only=True))
 40 |         content = [cell for row in rows for cell in row if cell]
 41 |         return ' '.join(str(cell) for cell in content[:word_limit])
 42 | 
 43 | def read_ppt(file_path, word_limit):
 44 |         presentation = Presentation(file_path)
 45 |         content = []
 46 |         for slide in presentation.slides:
 47 |             for shape in slide.shapes:
 48 |                 if hasattr(shape, "text"):
 49 |                     content.append(shape.text)
 50 |         return ' '.join(content[:word_limit])
 51 | 
 52 | 
 53 | # Avoids several ifs by mapping funcs to exts
 54 | func_map = {'.txt':  read_text, '.html': read_text, '.md': read_text, '.csv': read_text, 
 55 |             '.pdf':  read_pdf,  '.docx': read_doc,  '.odt': read_odf, '.odp': read_odf, 
 56 |             '.xlsx': read_xlsx, '.pptx': read_ppt,  '.ppt': read_ppt  } 
 57 | 
 58 | def prep_files(directory, select_folder, copy_files=False):
 59 |     # if user selects a folder: Moves the files from existing sub-folders (if any) to root level of selected folder
 60 |     if select_folder:
 61 |         for root, _, files in os.walk(directory):
 62 |             for file_name in files:
 63 |                 file_path = os.path.join(root, file_name)
 64 |                 root_path = os.path.join(directory, file_name)
 65 |                 shutil.move(file_path, root_path)
 66 | 
 67 |         # Deletes the now empty existing sub-folders
 68 |         for root, folders, _ in os.walk(directory, topdown=False):
 69 |             for folder in folders:
 70 |                 os.rmdir(os.path.join(root, folder))
 71 | 
 72 |     # If user uploads files: Moves/Copies the said files to tmp folder in the installation directory
 73 |     if not select_folder:
 74 |         tmp_folder = os.path.join(tmp_path, "Organized_Files")
 75 |         if not os.path.exists(tmp_folder):
 76 |             os.mkdir(tmp_folder)
 77 | 
 78 |         for file_path in directory:
 79 |             if copy_files:
 80 |                 shutil.copy(file_path, os.path.join(tmp_folder, os.path.basename(file_path)))
 81 |             else:
 82 |                 shutil.move(file_path, os.path.join(tmp_folder, os.path.basename(file_path)))
 83 | 
 84 | def read_files(directory, word_limit):
 85 |     words_list = []
 86 |     misc_list = []
 87 |     for _, _, files in os.walk(directory):
 88 |         for file_name in files:
 89 |             root_path = os.path.join(directory, file_name)
 90 | 
 91 |             # Reads the files and adds the data to words_list
 92 |             if os.path.isfile(root_path):
 93 |                 file_extension = os.path.splitext(file_name)[1]
 94 |                 if file_extension in func_map:
 95 |                     words_list.append((file_name, func_map[file_extension](root_path, word_limit)))
 96 |                 else:
 97 |                      misc_list.append(file_name)
 98 | 
 99 |     return words_list, misc_list
100 | 


--------------------------------------------------------------------------------
/connor/static/icons/clear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/connor/static/icons/clear.png


--------------------------------------------------------------------------------
/connor/static/icons/folder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/connor/static/icons/folder.png


--------------------------------------------------------------------------------
/connor/static/icons/refresh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/connor/static/icons/refresh.png


--------------------------------------------------------------------------------
/connor/static/icons/upload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/connor/static/icons/upload.png


--------------------------------------------------------------------------------
/connor/static/style.css:
--------------------------------------------------------------------------------
 1 | QMainWindow, QDialog {
 2 |     background-color: #202020;
 3 | }
 4 | 
 5 | QLabel, QPushButton, QLineEdit, QTextEdit, QCheckBox, QMenu, QMenuBar {
 6 |     color: #d6d2c8;
 7 | }
 8 | 
 9 | QPushButton {
10 |     background-color: #323232;
11 |     border: 2px solid #333;
12 | }
13 | 
14 | QPushButton:hover {
15 |     background-color: #444;
16 | }
17 | 
18 | QSlider::groove:horizontal {
19 |     background: #484848;
20 |     height: 8px;
21 | }
22 | 
23 | QSlider::handle:horizontal {
24 |     background: #75a7ad;
25 |     border: 1px solid #333;
26 |     width: 10px;
27 |     height: 12px;
28 |     margin: -9px 0;
29 | }
30 | 
31 | QCheckBox::indicator {
32 |     color: #75a7ad; 
33 |     width: 15px;
34 |     height: 15px;
35 | }
36 | 
37 | QCheckBox::indicator:unchecked {
38 |     border: 1px solid #333;
39 | }
40 | 
41 | QCheckBox::indicator:checked {
42 |     background-color: #75a7ad;
43 |     border: 1px solid #75a7ad;
44 | }
45 | 
46 | QTextEdit {
47 |     background-color: #333;
48 |     border: 1px solid #333;
49 |     color: white;
50 |     padding: 5px;
51 | }
52 | 
53 | QLineEdit {
54 |     background-color: #323232;
55 |     border: 1px solid #333;
56 |     color: white;
57 |     padding: 5px;
58 | }
59 | 
60 | QMenuBar, QMenuBar::item{
61 |     background-color: #202020;
62 | }
63 | 
64 | QMenu, QMenu::item{
65 |     background-color: #484848;
66 | }
67 | 
68 | QMenu::item:selected, QMenuBar::item:selected {
69 |     background-color: #75a7ad;
70 | }


--------------------------------------------------------------------------------
/connor/tmp/keep.txt:
--------------------------------------------------------------------------------
1 | Don't Delete this placeholder text file.


--------------------------------------------------------------------------------
/connor/tree_builder.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | 
 4 | branch = "│   " 
 5 | connector = "├── "
 6 | end = "└── "
 7 | space =  "    "
 8 | 
 9 | # Generates the organization summary (tree structure) recursively
10 | def tree(directory, indent='', is_path=False):
11 |     if is_path:
12 |         directory = Path(directory)
13 |         items = list(directory.iterdir())
14 |         pointers = [connector] * (len(items) - 1) + [end]
15 | 
16 |         for pointer, path in zip(pointers, items):
17 |             yield indent + pointer + path.name
18 | 
19 |             if path.is_dir():
20 |                 new_indent = branch if pointer == connector else space
21 |                 yield from tree(path, indent+new_indent, is_path=True)
22 |     else:
23 |         for folder_name, files in directory.items():
24 |             yield indent + folder_name
25 |             
26 |             if files:
27 |                 pointers = ['├── '] * (len(files) - 1) + ['└── ']
28 |                 for pointer, file in zip(pointers, files):
29 |                     yield indent + pointer + file
30 | 
31 | # Generates a string of the tree with relevant formatting
32 | def make_tree(path, dict, is_path_only=False, cli=False):
33 |     if cli:
34 |         structure = f"Organized Folder:\n{path}\n"
35 |         for struct in tree(dict, is_path=is_path_only):
36 |             structure += f" {struct}\n"
37 |     else:
38 |         structure = f"<samp><pre> {path}\n"
39 |         for struct in tree(dict, is_path=is_path_only):
40 |             structure += f" {struct}\n"
41 |         structure += "</pre></samp>"
42 | 
43 |     return structure


--------------------------------------------------------------------------------
/gui/__init__.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from PyQt6.QtWidgets import QApplication
 4 | 
 5 | from .views import ConnorGUI
 6 | 
 7 | 
 8 | def main():
 9 |     app = QApplication(sys.argv)
10 |     nlp_file_Organizer = ConnorGUI()
11 |     nlp_file_Organizer.show()
12 |     sys.exit(app.exec())
13 | 
14 | if __name__ == '__main__':
15 |     main()


--------------------------------------------------------------------------------
/gui/views/__init__.py:
--------------------------------------------------------------------------------
1 | from .organizer import ConnorGUI


--------------------------------------------------------------------------------
/gui/views/about.py:
--------------------------------------------------------------------------------
 1 | from PyQt6.QtWidgets import QVBoxLayout, QLabel, QDialog, QTextEdit
 2 | 
 3 | class About(QDialog):
 4 |     def __init__(self, parent):
 5 |         super().__init__(parent)
 6 |         self.setWindowTitle("Tutorial")
 7 |         self.setFixedSize(600, 450)
 8 |         self.setModal(True)
 9 | 
10 |         # Layout
11 |         layout = QVBoxLayout()
12 |         section = QVBoxLayout()
13 | 
14 |         # Section 1
15 |         title_lft = QLabel("<h2>Select Folder</h2>")
16 |         desc_lft = QTextEdit()
17 |         desc_lft.setHtml("""
18 |             <p>Allows you to select a folder on your computer and organize it using artificial intelligence based on the content or the names of the files.</p>
19 |             <p>After selecting or entering the absolute path of the folder in the appropriate screen, click on <strong>ORGANIZE FILES</strong> to start the organization process.</p>
20 |         """)
21 |         desc_lft.setReadOnly(True)
22 |         desc_lft.setFixedSize(575, 100)
23 | 
24 |         title_rgt = QLabel("<h2>Upload Files</h2>")
25 |         desc_rgt = QTextEdit()
26 |         desc_rgt.setHtml("""
27 |             <style>
28 |             code {
29 |                 font-family: Monospace;
30 |                 color: #75a7ad;
31 |                 background-color: #202020;
32 |                 padding: 2px;
33 |                 font-size: 105%;
34 |             }
35 |             </style>
36 |             <p>Allows you to upload files manually from anywhere on your computer.</p>
37 |             <p>After going to the appropriate screen, click on <strong>UPLOAD FILES</strong> to manually upload files into the app. These files are stored temporarily in <code>tmp/</code> in the app's root directory for the purpose of organization.</p>
38 |             <p>You can also choose between copying or moving the files:</p>
39 |             <ul>
40 |                 <li><strong>Copy Files:</strong> copies the files from the original location into the app</li>
41 |                 <li><strong>Move Files:</strong> moves the files from the original location into the app</li>
42 |             </ul>
43 |             <p>After you have ensured the files are uploaded into the app, click on <strong>ORGANIZE FILES</strong> to organize the uploaded files into a folder. Upon completion, you can send the organized folder containing your uploaded files (but organized using artificial intelligence) back to anywhere on your computer by clicking on <strong>SEND TO COMPUTER</strong>.</p>
44 |         """)
45 |         desc_rgt.setReadOnly(True)
46 |         desc_rgt.setFixedSize(575, 240)
47 | 
48 |         section.addWidget(title_lft)
49 |         section.addWidget(desc_lft)
50 |         section.addWidget(title_rgt)
51 |         section.addWidget(desc_rgt)
52 | 
53 |         layout.addLayout(section)
54 |         self.setLayout(layout)
55 | 


--------------------------------------------------------------------------------
/gui/views/organizer.py:
--------------------------------------------------------------------------------
  1 | import configparser
  2 | import os
  3 | import shutil
  4 | 
  5 | from PyQt6.QtWidgets import (
  6 |     QMainWindow, QPushButton, QLineEdit, QTextEdit, QVBoxLayout,
  7 |     QHBoxLayout, QWidget, QStackedWidget, QFileDialog, QLabel,
  8 |     QSlider, QSizePolicy, QCheckBox, QMessageBox, QGridLayout
  9 | )
 10 | from PyQt6.QtGui import QFont, QIcon, QAction, QFontDatabase
 11 | from PyQt6.QtCore import Qt, QFile, QTextStream, QIODevice
 12 | from PyQt6 import QtCore
 13 | 
 14 | from connor.processes import (
 15 |     get_file_word_list, rename_folders, sim_organize, organize
 16 | )
 17 | from connor import (
 18 |     init, data_path, static_path, tmp_path, font_path
 19 | )
 20 | from connor.tree_builder import make_tree
 21 | from connor.reader import prep_files
 22 | from gui.views.settings import Settings
 23 | from gui.views.about import About
 24 | 
 25 | 
 26 | class ConnorGUI(QMainWindow):
 27 |     def __init__(self):
 28 |         super().__init__()
 29 | 
 30 |         self.central_widget = QWidget()
 31 |         self.setCentralWidget(self.central_widget)
 32 |         self.setWindowFlag(Qt.WindowType.WindowMaximizeButtonHint, False)
 33 | 
 34 |         # Loads models
 35 |         self.model, self.stop_words, self.lda_model, self.vectorizer = init()
 36 | 
 37 |         # Loads the default settings from config file
 38 |         self.settings = configparser.ConfigParser()
 39 |         self.settings.read(os.path.join(data_path, "config.ini"))
 40 | 
 41 |         self.folder_name_length = int(self.settings["Parameters"].get("folder_name_length", 3))
 42 |         self.reading_word_limit = int(self.settings["Parameters"].get("reading_word_limit", 200))
 43 |         self.similarity_threshold = int(self.settings["Parameters"].get("similarity_threshold", 50))
 44 |         
 45 |         self.init_ui()
 46 | 
 47 |     def init_ui(self):
 48 |         self.setGeometry(550, 250, 850, 600)
 49 |         self.setWindowTitle("Smart File Organizer")
 50 |         self.load_stylesheet("style.css")
 51 | 
 52 |         # Window
 53 |         self.stacked_widget = QStackedWidget(self)
 54 |         self.central_layout = QVBoxLayout(self.central_widget)
 55 |         self.central_layout.addWidget(self.stacked_widget)
 56 | 
 57 |         # Menubar
 58 |         self.view_action = None
 59 |         menu_bar = self.menuBar()
 60 |         self.setup_menu_bar(menu_bar)
 61 | 
 62 |         # Default organization variables
 63 |         self.copy_files = False
 64 |         self.directories = []
 65 |         self.file_list = []
 66 |         self.misc_list = []
 67 |         self.num_files = 0
 68 |         self.tmp_folder = os.path.join(tmp_path, "Organized_Files")
 69 | 
 70 |         # Screens 
 71 |         self.screen1 = QWidget()
 72 |         self.screen2 = QWidget()
 73 |         self.screen3 = QWidget()
 74 |         self.screen4 = QWidget()
 75 |         self.screen5 = QWidget()
 76 | 
 77 |         self.stacked_widget.addWidget(self.screen1)
 78 |         self.stacked_widget.addWidget(self.screen2)
 79 |         self.stacked_widget.addWidget(self.screen3)
 80 |         self.stacked_widget.addWidget(self.screen4)
 81 |         self.stacked_widget.addWidget(self.screen5)
 82 | 
 83 |         self.create_screen1()
 84 |         self.create_screen2()
 85 |         self.create_screen3()
 86 |         self.create_screen4()
 87 |         self.create_screen5()
 88 | 
 89 |         self.current_screen = 0
 90 |         self.stacked_widget.setCurrentIndex(self.current_screen)
 91 | 
 92 |         # Fonts
 93 |         custom_font = QFontDatabase.applicationFontFamilies(QFontDatabase.addApplicationFont(os.path.join(font_path, "Coder's Crux.ttf")))[0]
 94 |         self.update_custom_fonts(custom_font)
 95 | 
 96 |     # Loads styling for the application
 97 |     def load_stylesheet(self, file_name):
 98 |         css_file = os.path.join(static_path, file_name)
 99 |         file = QFile(css_file)
100 |         if file.open(QIODevice.OpenModeFlag.ReadOnly | QIODevice.OpenModeFlag.Text):
101 |             stream = QTextStream(file)
102 |             self.setStyleSheet(stream.readAll())
103 | 
104 |     # Create Menu Bar
105 |     def setup_menu_bar(self, menu_bar):
106 |         menus = {
107 |             "File": {"Exit": self.close},
108 |             "Edit": {"Settings": self.show_settings},
109 |             "View": {"Menu Bar": self.toggle_menubar},
110 |             "Help": {"About": self.show_about},
111 |         }
112 | 
113 |         for menu_name, menu_items in menus.items():
114 |             menu = menu_bar.addMenu(menu_name)
115 |             for item_name, item_action in menu_items.items():
116 |                 action = QAction(item_name, self)
117 |                 action.triggered.connect(item_action)
118 |                 menu.addAction(action)
119 |                 
120 |                 if item_name == "Menu Bar":
121 |                     self.view_action = action
122 |     
123 |     # Alt key toggles menubar 
124 |     def keyPressEvent(self, event):
125 |         if event.key() == Qt.Key.Key_Alt:
126 |             self.view_action.setChecked(not self.view_action.isChecked())
127 |             self.toggle_menubar()
128 | 
129 |     def toggle_menubar(self):
130 |         if self.menuBar().isVisible():
131 |             self.menuBar().hide()
132 |         else:
133 |             self.menuBar().show()
134 | 
135 |     # Updates font
136 |     def update_font(self, widget, font, size=12):
137 |         upd_font = QFont(font)
138 |         upd_font.setPointSize(size)
139 |         widget.setFont(upd_font)
140 | 
141 |     # Updates elements iwth custom font
142 |     def update_custom_fonts(self, custom_font):
143 |         for button in self.findChildren(QPushButton):
144 |             self.update_font(button, custom_font, 24)
145 | 
146 |         for slider in self.findChildren(QLabel):
147 |             self.update_font(slider, custom_font, 24)
148 | 
149 |         for text in self.findChildren(QTextEdit):
150 |             self.update_font(text, "Monospace", 14)
151 | 
152 |     # Settings pop-up (Allows the user to change default params)
153 |     def show_settings(self):
154 |         settings = Settings(self.settings, self)
155 |         settings.exec()
156 | 
157 |     # Tutorial pop-up (Shows General app instructions)
158 |     def show_about(self):
159 |         about = About(self)
160 |         about.exec()
161 | 
162 |     # Allows user to go back to the previous screen
163 |     def return_button(self, prev_screen, parent=None):
164 |         return_button = QPushButton("Return", parent)
165 |         return_button.setGeometry(720, 5, 100, 30)
166 |         return_button.clicked.connect(lambda _, screen=prev_screen: self.stacked_widget.setCurrentWidget(screen))
167 |         return return_button
168 | 
169 |     # Update move/copy files checkbox
170 |     def copy_files_checkbox_state(self):
171 |         checkbox = self.sender()
172 |         if checkbox.isChecked():
173 |             self.copy_files = True
174 |         else:
175 |             self.copy_files = False
176 |     
177 |     # Main Menu Screen
178 |     def create_screen1(self):
179 |         layout = QVBoxLayout()
180 |         self.screen1.setLayout(layout) 
181 | 
182 |         button_layout = QHBoxLayout()
183 | 
184 |         # Select folder button
185 |         select_folder_button = QPushButton("SELECT FOLDER", parent=self.screen1)
186 |         select_folder_button.setFixedSize(200, 60)
187 | 
188 |         # Upload and organize files button
189 |         upload_files_button = QPushButton("UPLOAD FILES", parent=self.screen1)
190 |         upload_files_button.setFixedSize(200, 60)
191 | 
192 |         select_folder_button.clicked.connect(self.show_screen2)
193 |         upload_files_button.clicked.connect(self.show_screen3)
194 | 
195 |         button_layout.addWidget(select_folder_button)
196 |         button_layout.addWidget(upload_files_button)
197 | 
198 |         # Parameter Sliders 
199 |         slider1_layout = QHBoxLayout()
200 |         self.slider1_label = QLabel(f"Max Folder Name Length: <span style='color:#75a7ad;'>{self.folder_name_length} words</span>")
201 |         slider1 = QSlider(Qt.Orientation.Horizontal)
202 |         slider1.setFixedWidth(200)
203 |         slider1.setRange(2, 5)
204 |         slider1.setValue(self.folder_name_length)
205 |         slider1.setSizePolicy(QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Fixed)
206 |         slider1.valueChanged.connect(self.slider1_changed)
207 |         slider1_layout.addWidget(self.slider1_label)
208 |         slider1_layout.addWidget(slider1)
209 | 
210 |         slider2_layout = QHBoxLayout()
211 |         self.slider2_label = QLabel(f"Word Limit For Reading File: <span style='color:#75a7ad;'>{self.reading_word_limit} words</span>")
212 |         slider2 = QSlider(Qt.Orientation.Horizontal)
213 |         slider2.setRange(100, 1000)
214 |         slider2.setFixedWidth(200)
215 |         slider2.setValue(self.reading_word_limit)
216 |         slider2.setSizePolicy(QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Fixed)
217 |         slider2.valueChanged.connect(self.slider2_changed)
218 |         slider2_layout.addWidget(self.slider2_label)
219 |         slider2_layout.addWidget(slider2)
220 |  
221 |         slider3_layout = QHBoxLayout()
222 |         self.slider3_label = QLabel(f"Similarity Threshold Percent: <span style='color:#75a7ad;'>{self.similarity_threshold} %</span>")
223 |         slider3 = QSlider(Qt.Orientation.Horizontal)
224 |         slider3.setFixedWidth(200)
225 |         slider3.setRange(0, 100)
226 |         slider3.setValue(self.similarity_threshold)
227 |         slider3.setSizePolicy(QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Fixed)
228 |         slider3.valueChanged.connect(self.slider3_changed)
229 |         slider3_layout.addWidget(self.slider3_label)
230 |         slider3_layout.addWidget(slider3)  
231 | 
232 |         layout.addLayout(button_layout)
233 |         layout.addLayout(slider1_layout)
234 |         layout.addLayout(slider2_layout)
235 |         layout.addLayout(slider3_layout)
236 | 
237 |     # Select Folder Screen
238 |     def create_screen2(self):
239 |         layout = QVBoxLayout()
240 |         top_section = QVBoxLayout()
241 |         inp_layout = QHBoxLayout()
242 |         bot_section = QVBoxLayout()
243 |         self.screen2.setLayout(layout)
244 | 
245 |         # Select Folder button
246 |         select_button = QPushButton(parent=self.screen2)
247 |         select_button.setIcon(QIcon(os.path.join(static_path, "icons/folder.png")))
248 |         select_button.setIconSize(QtCore.QSize(100, 100))
249 |         select_button.setFixedSize(50, 50)
250 | 
251 |         # Folder input field
252 |         self.folder_path_input = QLineEdit()
253 |         self.folder_path_input.setPlaceholderText("Enter Folder Path")
254 |         self.folder_path_input.setFixedHeight(50)
255 |         self.update_font(self.folder_path_input, "Monospace", 14)
256 | 
257 |         # Organize button
258 |         organize_button = QPushButton("ORGANIZE SELECTED FOLDER", parent=self.screen2)
259 |         organize_button.setFixedSize(820, 50)
260 | 
261 |         inp_layout.addWidget(select_button)
262 |         inp_layout.addWidget(self.folder_path_input)
263 |         top_section.addLayout(inp_layout)
264 | 
265 |         # selected folder label
266 |         self.select_folder_label = QLabel(f"Your Selected Folder Before Organization:", parent=self.screen2)
267 |         self.select_folder_label.setFixedSize(600, 30)
268 | 
269 |         # Box for displaying selected folder
270 |         self.select_folder_tab = QTextEdit(parent=self.screen2)
271 |         self.select_folder_tab.setReadOnly(True)
272 |         self.select_folder_tab.setFixedWidth(820)
273 |         self.select_folder_tab.setFixedHeight(200)
274 | 
275 |         bot_section.addWidget(self.select_folder_label)
276 |         bot_section.addWidget(self.select_folder_tab)
277 |         bot_section.addWidget(organize_button)
278 | 
279 |         organize_button.clicked.connect(self.organize_selected_folder)
280 |         select_button.clicked.connect(self.select_folder)
281 | 
282 |         layout.addLayout(top_section)
283 |         layout.addLayout(bot_section)
284 |         
285 |         # Return to previous screen button
286 |         self.return_button(self.screen1, self.screen2)
287 | 
288 |     # Upload Files Screen
289 |     def create_screen3(self):
290 |         layout = QHBoxLayout()
291 |         left_section = QVBoxLayout()
292 |         right_section = QVBoxLayout()
293 |         util_layout = QHBoxLayout()
294 |         self.screen3.setLayout(layout)
295 | 
296 |         # Upload button
297 |         upload_button = QPushButton(" UPLOAD", parent=self.screen3)
298 |         upload_button.setIcon(QIcon(os.path.join(static_path, "icons/upload.png")))
299 |         upload_button.setIconSize(QtCore.QSize(100, 100))
300 |         upload_button.setFixedSize(300, 150)
301 | 
302 |         # Organize button
303 |         organize_button = QPushButton("ORGANIZE", parent=self.screen3)
304 |         organize_button.setFixedSize(300, 50)
305 | 
306 |         # copy files instead of move when user uploads files checkbox
307 |         checkbox2 = QCheckBox("Copy Uploaded Files", parent=self.screen3)
308 |         checkbox2.setFixedSize(200, 25)
309 |     
310 |         left_section.setAlignment(Qt.AlignmentFlag.AlignCenter)
311 |         left_section.addWidget(upload_button)
312 |         left_section.addWidget(organize_button)
313 |         left_section.addWidget(checkbox2)
314 | 
315 |         # Uploaded files label
316 |         self.uploaded_num_files = QLabel(f"Your Uploaded Files: <span style='color:#75a7ad;'>{self.num_files}</span>", parent=self.screen3)
317 |         self.uploaded_num_files.setFixedSize(400, 30)
318 | 
319 |         # Box for displaying uploaded files
320 |         self.uploaded_files_tab = QTextEdit(parent=self.screen3)
321 |         self.uploaded_files_tab.setReadOnly(True)
322 |         self.uploaded_files_tab.setFixedWidth(450)
323 |         self.uploaded_files_tab.setFixedHeight(450)
324 |         right_section.addSpacing(50) 
325 | 
326 |         # Loads the already uploaded files
327 |         self.refresh_files()
328 | 
329 |         # Refresh all the uploaded files button
330 |         refresh_button = QPushButton(" REFRESH", parent=self.screen3)
331 |         refresh_button.setIcon(QIcon(os.path.join(static_path, "icons/refresh.png")))
332 |         refresh_button.setIconSize(QtCore.QSize(30, 30))
333 |         refresh_button.setFixedSize(222, 35)
334 | 
335 |         # Clear all the uploaded files button
336 |         clear_button = QPushButton(" CLEAR", parent=self.screen3)
337 |         clear_button.setIcon(QIcon(os.path.join(static_path, "icons/clear.png")))
338 |         clear_button.setIconSize(QtCore.QSize(30, 30))
339 |         clear_button.setFixedSize(222, 35)
340 | 
341 |         util_layout.addWidget(refresh_button)
342 |         util_layout.addWidget(clear_button)
343 |         
344 |         right_section.addWidget(self.uploaded_num_files)
345 |         right_section.addWidget(self.uploaded_files_tab)
346 |         right_section.addLayout(util_layout)
347 | 
348 |         checkbox2.stateChanged.connect(self.copy_files_checkbox_state)
349 |         upload_button.clicked.connect(self.upload_files)
350 |         organize_button.clicked.connect(self.organize_uploaded_files)
351 |         clear_button.clicked.connect(self.clear_files)
352 |         refresh_button.clicked.connect(self.refresh_files)
353 | 
354 |         layout.addLayout(left_section)
355 |         layout.addLayout(right_section)
356 | 
357 |         # Return to previous screen button
358 |         self.return_button(self.screen1, self.screen3)
359 | 
360 |     # Organized Folder Summary Screen (if user selects a folder)
361 |     def create_screen4(self):
362 |         layout = QVBoxLayout()
363 |         self.screen4.setLayout(layout)
364 | 
365 |         self.output_title = QLabel("Folder Successfully organized:")
366 |         self.output_title.setFixedSize(400, 30)
367 | 
368 |         # Organization summary text box
369 |         self.output_text = QTextEdit()
370 |         self.output_text.setReadOnly(True)
371 | 
372 |         layout.addWidget(self.output_title)
373 |         layout.addWidget(self.output_text)
374 |         
375 |         # Return to previous screen button
376 |         self.return_button(self.screen2, self.screen4)
377 | 
378 |     # Organized Folder Summary Screen (if user uploads files)
379 |     def create_screen5(self):
380 |         layout = QVBoxLayout()
381 |         self.screen5.setLayout(layout)
382 | 
383 |         self.output_title = QLabel("Organized Folder Structure:")
384 |         self.output_title.setFixedSize(400, 30)
385 | 
386 |         # Organization summary text box
387 |         self.output_text2 = QTextEdit()
388 |         self.output_text2.setReadOnly(True)
389 | 
390 |         # Send organized files (if uploaded) to computer
391 |         send_to_comp_button = QPushButton("SEND TO COMPUTER", parent=self.screen5)
392 |         send_to_comp_button.setFixedSize(250, 50)
393 |         send_to_comp_button.clicked.connect(self.send_to_computer)
394 | 
395 |         layout.addWidget(self.output_title)
396 |         layout.addWidget(self.output_text2)
397 |         layout.addWidget(send_to_comp_button)
398 |         
399 |         # Return to previous screen button
400 |         self.return_button(self.screen3, self.screen5)
401 | 
402 |     # Allows user to choose a folder instead of pasting its path
403 |     def select_folder(self):
404 |         selected_folder = QFileDialog.getExistingDirectory(self, 'Select Folder')
405 |         if selected_folder.strip():
406 |             self.folder_path_input.setText(selected_folder)
407 |             self.select_folder_tab.setHtml(make_tree(path=selected_folder, dict=selected_folder, is_path_only=True, cli=False))
408 | 
409 |     # Organizes the selected folder
410 |     def organize_selected_folder(self):
411 |         # Initializing the file names and content, and grouping them into a dictionary
412 |         folder_path = os.path.relpath(self.folder_path_input.text(), os.getcwd())
413 |         prep_files(folder_path, select_folder=True)
414 |         self.file_list, self.misc_list = get_file_word_list(folder_path, self.reading_word_limit, self.stop_words)
415 |         folder_dict, self.misc_list = sim_organize(self.model, self.similarity_threshold/100, self.file_list, self.misc_list)
416 |         
417 |         # Fitting the model based on the data provided
418 |         data_vectorized = self.vectorizer.fit_transform(words[1] for words in self.file_list)
419 |         self.lda_model.fit(data_vectorized)
420 | 
421 |         # Final organization process
422 |         renamed_dict = rename_folders(self.vectorizer, self.lda_model, folder_dict, 
423 |                                       self.file_list, self.folder_name_length, self.misc_list)
424 |         organize(folder_path, renamed_dict, self.reading_word_limit, self.folder_name_length,
425 |                  self.vectorizer, self.lda_model, self.model, self.stop_words)
426 |         self.output_text.setHtml(make_tree(path=folder_path, dict=folder_path, is_path_only=True, cli=False))
427 | 
428 |         # Switch to summary screen
429 |         self.show_screen4()
430 | 
431 |     # Handles uploading the files
432 |     def upload_files(self):
433 |         # Initializing the file names and content, and grouping them into a dictionary
434 |         self.directories, _ = QFileDialog.getOpenFileNames(self, "Select Files", "", "All Files (*)")
435 |         prep_files(self.directories, select_folder=False, copy_files=self.copy_files)
436 |         self.refresh_files()
437 | 
438 |     # Organizes the uploaded files
439 |     def organize_uploaded_files(self):
440 |         # Initializing the file names and content, and grouping them into a dictionary
441 |         self.file_list, self.misc_list = get_file_word_list(self.tmp_folder, self.reading_word_limit, self.stop_words)
442 |         folder_dict, self.misc_list = sim_organize(self.model, self.similarity_threshold/100, self.file_list, self.misc_list)
443 | 
444 |         # Fitting the model based on the data provided
445 |         data_vectorized = self.vectorizer.fit_transform(words[1] for words in self.file_list)
446 |         self.lda_model.fit(data_vectorized)
447 | 
448 |         # Final organization process
449 |         renamed_dict = rename_folders(self.vectorizer, self.lda_model, folder_dict, 
450 |                                       self.file_list, self.folder_name_length, self.misc_list)
451 |         organize(self.tmp_folder, renamed_dict, self.reading_word_limit, self.folder_name_length,
452 |                  self.vectorizer, self.lda_model, self.model, self.stop_words)
453 |         self.output_text2.setHtml(make_tree(path=self.tmp_folder, dict=self.tmp_folder, is_path_only=True, cli=False))
454 | 
455 |         # Switch to summary screen
456 |         self.show_screen5()
457 | 
458 |     # Allows the user to send the organized files (from uploads) to computer
459 |     def send_to_computer(self):
460 |         send_folder = QFileDialog.getExistingDirectory(self, 'Select Folder')
461 |         root_folder = self.tmp_folder
462 |         
463 |         if send_folder.strip():
464 |             for file in os.listdir(root_folder):
465 |                 shutil.move(os.path.join(root_folder, file), os.path.join(send_folder, file))
466 |             shutil.rmtree(root_folder)
467 |             
468 |             # Success message pop-up
469 |             pop_up = self.create_pop_up(title="Sucess", content=f"The uploaded files have been organized and sent to the specified folder:\n{send_folder}", icon=QMessageBox.Icon.Information, options=False)
470 |             if pop_up == QMessageBox.StandardButton.Yes:
471 |                 self.reset_params()
472 |                 self.show_screen1()
473 |         else:
474 |             # Error message pop-up
475 |             pop_up = self.create_pop_up(title="Confirmation", content="If you do not wish to send these files to your computer? then please click 'Yes' to delete them or 'No' to cancel.\n\n The previously uploaded files and the temporary organized folder will be deleted from uploads if you continue with 'Yes' (this does not delete the actual files if you chose to copy instead of move)", icon=QMessageBox.Icon.Question)
476 |             if pop_up == QMessageBox.StandardButton.Yes:
477 |                 # Deletes previously organized folder (if ignored by the user)
478 |                 shutil.rmtree(root_folder)
479 |                 self.reset_params()
480 |                 self.show_screen1()
481 |     
482 |     def create_pop_up(self, title, content, icon, options=True):
483 |         pop_up = QMessageBox()
484 |         pop_up.setWindowTitle(title)
485 |         pop_up.setText(content)
486 |         pop_up.setIcon(icon)
487 |         pop_up.setStandardButtons(QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No if options else QMessageBox.StandardButton.Yes)
488 | 
489 |         return pop_up.exec()
490 | 
491 |     def clear_files(self):
492 |         shutil.rmtree(self.tmp_folder)
493 |         self.reset_params()
494 | 
495 |     # Refreshes the uploaded files display on screen 3
496 |     def refresh_files(self):
497 |         self.reset_params()
498 |         if os.path.exists(self.tmp_folder):
499 |             self.num_files = 0
500 |             for file in os.listdir(self.tmp_folder):
501 |                 self.num_files += 1
502 |                 self.uploaded_files_tab.append(f"<samp>{os.path.basename(file)}<br></samp>")
503 |             self.uploaded_num_files.setText(f"Your Uploaded Files: <span style='color:#75a7ad;'>{self.num_files}</span>")
504 | 
505 |     # Resets the uploaded files label and text box appropriately
506 |     def reset_params(self):
507 |         self.uploaded_files_tab.setText("")
508 |         self.directories = []
509 |         self.file_list = []
510 |         self.misc_list = []
511 |         self.num_files = 0
512 |         self.uploaded_num_files.setText(f"Your Uploaded Files: <span style='color:#75a7ad;'>{self.num_files}</span>")
513 |     
514 |     # Show/Switch to respective screens
515 |     def show_screen1(self):
516 |         self.current_screen = 0
517 |         self.stacked_widget.setCurrentIndex(self.current_screen)
518 | 
519 |     def show_screen2(self):
520 |         self.current_screen = 1
521 |         self.stacked_widget.setCurrentIndex(self.current_screen)
522 | 
523 |     def show_screen3(self):
524 |         self.current_screen = 2
525 |         self.stacked_widget.setCurrentIndex(self.current_screen)
526 | 
527 |     def show_screen4(self):
528 |         self.current_screen = 3
529 |         self.stacked_widget.setCurrentIndex(self.current_screen)
530 | 
531 |     def show_screen5(self):
532 |         self.current_screen = 4
533 |         self.stacked_widget.setCurrentIndex(self.current_screen)
534 |     
535 |     # Updates slider values in main menu
536 |     def slider1_changed(self, value):
537 |         self.folder_name_length = value
538 |         self.slider1_label.setText(f"Max Folder Name Length: <span style='color:#75a7ad;'>{value} words</span>")
539 | 
540 |     def slider2_changed(self, value):
541 |         self.reading_word_limit = value
542 |         self.slider2_label.setText(f"Word Limit For Reading File: <span style='color:#75a7ad;'>{value} words</span>")
543 | 
544 |     def slider3_changed(self, value):
545 |         self.similarity_threshold = value
546 |         self.slider3_label.setText(f"Similarity Threshold Percent: <span style='color:#75a7ad;'>{value} %</span>")
547 | 


--------------------------------------------------------------------------------
/gui/views/settings.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from PyQt6.QtWidgets import (
  4 |     QPushButton, QVBoxLayout, QLabel, QSlider, QDialog
  5 | )
  6 | from PyQt6.QtGui import QFont
  7 | from PyQt6.QtCore import Qt
  8 | from PyQt6 import QtCore
  9 | 
 10 | from connor import data_path
 11 | 
 12 | 
 13 | class Settings(QDialog):
 14 |     def __init__(self, settings, parent):
 15 |         super().__init__(parent)
 16 |         self.setWindowTitle("Settings")
 17 |         self.setFixedSize(350, 350)
 18 |         self.setModal(True)
 19 |         self.settings = settings
 20 | 
 21 |         # Layouts
 22 |         layout = QVBoxLayout()
 23 |         title_layout = QVBoxLayout()
 24 |         btn_layout = QVBoxLayout()
 25 | 
 26 |         # Fonts
 27 |         md_font = QFont()
 28 |         md_font.setPointSize(14)
 29 |         sm_font = QFont()
 30 |         sm_font.setPointSize(12)
 31 | 
 32 |         title = QLabel("Change Default Values")
 33 |         title.setFont(md_font)
 34 |         title_layout.addWidget(title)
 35 |         layout.addLayout(title_layout)
 36 | 
 37 |         layout.addSpacing(20)
 38 | 
 39 |         # Initializing sliders and buttons
 40 |         self.setting_label1 = QLabel(f"Folder Name Length: {settings['Parameters']['folder_name_length']}")
 41 |         self.setting_label1.setFont(sm_font)
 42 |         self.setting_input1 = QSlider(Qt.Orientation.Horizontal)
 43 |         self.setting_input1.setRange(2, 5)
 44 |         self.setting_input1.setValue(int(settings["Parameters"]["folder_name_length"]))
 45 |         self.setting_input1.valueChanged.connect(self.setting_input1_changed)
 46 |         layout.addWidget(self.setting_label1)
 47 |         layout.addWidget(self.setting_input1)
 48 | 
 49 |         self.setting_label2 = QLabel(f"Reading Word Limit: {settings['Parameters']['reading_word_limit']}")
 50 |         self.setting_label2.setFont(sm_font)
 51 |         self.setting_input2 = QSlider(Qt.Orientation.Horizontal)
 52 |         self.setting_input2.setRange(100, 1000)
 53 |         self.setting_input2.setValue(int(settings["Parameters"]["reading_word_limit"]))
 54 |         self.setting_input2.valueChanged.connect(self.setting_input2_changed)
 55 |         layout.addWidget(self.setting_label2)
 56 |         layout.addWidget(self.setting_input2)
 57 | 
 58 |         self.setting_label3 = QLabel(f"Similarity Threshold: {settings['Parameters']['similarity_threshold']}")
 59 |         self.setting_label3.setFont(sm_font)
 60 |         self.setting_input3 = QSlider(Qt.Orientation.Horizontal)
 61 |         self.setting_input3.setRange(0, 100)
 62 |         self.setting_input3.setValue(int(settings["Parameters"]["similarity_threshold"]))
 63 |         self.setting_input3.valueChanged.connect(self.setting_input3_changed)
 64 |         layout.addWidget(self.setting_label3)
 65 |         layout.addWidget(self.setting_input3)
 66 | 
 67 |         layout.addSpacing(20)
 68 | 
 69 |         update_button = QPushButton("Update")
 70 |         update_button.setFixedSize(80, 35)
 71 |         update_button.setFont(md_font)
 72 |         update_button.clicked.connect(self.save_settings)
 73 |         btn_layout.setAlignment(QtCore.Qt.AlignmentFlag.AlignHCenter)
 74 |         btn_layout.addWidget(update_button)
 75 |         layout.addLayout(btn_layout)
 76 | 
 77 |         self.state_label = QLabel("", self)
 78 |         layout.addWidget(self.state_label)
 79 |         self.setLayout(layout)
 80 | 
 81 |     # Saves new default settings in config file
 82 |     def save_settings(self):
 83 |         self.settings["Parameters"]["folder_name_length"] = str(self.setting_input1.value())
 84 |         self.settings["Parameters"]["reading_word_limit"] = str(self.setting_input2.value())
 85 |         self.settings["Parameters"]["similarity_threshold"] = str(self.setting_input3.value())
 86 | 
 87 |         with open(os.path.join(data_path, "config.ini"), "w") as file:
 88 |             self.settings.write(file)
 89 | 
 90 |         self.state_label.setText("Default settings have been updated successfully")
 91 | 
 92 |     # Updates slider values in main menu
 93 |     def setting_input1_changed(self, value):
 94 |         self.setting_label1.setText(f"Folder Name Length: {value}")
 95 | 
 96 |     def setting_input2_changed(self, value):
 97 |         self.setting_label2.setText(f"Reading Word Limit: {value}")
 98 | 
 99 |     def setting_input3_changed(self, value):
100 |         self.setting_label3.setText(f"Similarity Threshold: {value}")
101 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | docx==0.2.4
2 | nltk==3.9.1
3 | numpy==2.1.3
4 | odfpy==1.4.1
5 | openpyxl==3.1.5
6 | PyPDF2==3.0.1
7 | PyQt6==6.7.1
8 | scikit_learn==1.5.2
9 | sentence_transformers==3.3.1


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from gui import main as gui_main
 4 | from cli import main as cli_main
 5 | 
 6 | 
 7 | def main():
 8 |     parser = argparse.ArgumentParser(prog='Connor', description='Connor: Fast and local NLP file organizer')
 9 |     parser.add_argument('--gui', action='store_true', help='Run the application in GUI mode.')
10 |     subparsers = parser.add_subparsers(dest='command')
11 | 
12 |     # Subparser for updating settings
13 |     settings_parser = subparsers.add_parser('settings', help='Update the settings for the organizer')
14 |     settings_parser.add_argument( '-f', '--folder-word-limit', type=int, 
15 |                                  help='Specify the maximum number of words allowed in the created folder names')
16 |     settings_parser.add_argument( '-r', '--reading-limit',type=int, 
17 |                                  help='Set a limit on the number of words to read from the file content')
18 |     settings_parser.add_argument('-t', '--similarity-threshold', type=int, 
19 |                                  help='Change the similarity threshold for a custom threshold percentage for grouping similar files')
20 |     settings_parser.add_argument('--show', action='store_true', help='Show current settings')
21 | 
22 |     # Subparser for running the organization
23 |     run_parser = subparsers.add_parser('run', help='Run the folder organization process')
24 |     run_parser.add_argument('path', type=str, help='Path to the folder to organize')
25 | 
26 |     args = parser.parse_args()
27 |     cli_tool = cli_main()
28 | 
29 |     # GUI
30 |     if args.gui:
31 |         gui_main()
32 |     else:
33 |         # Organize
34 |         if args.command == 'run':
35 |             cli_tool.organize_folder(args.path)
36 |         # Settings
37 |         elif args.command == 'settings':
38 |             if args.show:
39 |                 cli_tool.show_settings()
40 |             elif (args.folder_word_limit or args.reading_limit or args.similarity_threshold):
41 |                 cli_tool.update_settings(
42 |                     folder_name_length=args.folder_word_limit, 
43 |                     reading_word_limit=args.reading_limit, 
44 |                     similarity_threshold=args.similarity_threshold
45 |                 )
46 |             else:
47 |                 settings_parser.print_help()
48 |         # Help
49 |         else:
50 |             parser.print_help()
51 | 
52 | if __name__ == '__main__':
53 |     main()


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open('README.md', 'r') as file:
 4 |     long_description = file.read()
 5 | 
 6 | setup(
 7 |     name='connor_nlp',
 8 |     version='1.0.0',
 9 |     py_modules=['run'],
10 |     packages=find_packages(include=['connor', 'cli', 'gui', 'connor.*', 'cli.*', 'gui.*'],
11 |                            exclude=['connor/data', 'connor/static', 'connor/static/icons', 
12 |                                     'connor/fonts', 'connor/tmp']),
13 |     include_package_data=True,
14 |     package_data={
15 |         'connor': ['data/*', 'static/*', 'static/icons/*','fonts/*', 'tmp/*'],
16 |     },
17 |     install_requires=[
18 |         "docx==0.2.4",
19 |         "python_pptx==1.0.2",
20 |         "python_docx==0.8.11",
21 |         "nltk==3.9.1",
22 |         "numpy==2.1.3",
23 |         "odfpy==1.4.1",
24 |         "openpyxl==3.1.5",
25 |         "PyPDF2==3.0.1",
26 |         "PyQt6==6.7.1",
27 |         "scikit_learn==1.5.2",
28 |         "sentence_transformers==3.3.1",
29 |     ],
30 |     entry_points={
31 |         'console_scripts': [
32 |             'connor=run:main',
33 |         ],
34 |     },
35 |     author='Ycatsh',
36 |     description='Fast and fully local NLP file organizer that organizes files based on their content.',
37 |     long_description=long_description,
38 |     long_description_content_type='text/markdown',
39 |     url='https://github.com/ycatsh/connor',
40 |     license='MIT',
41 |     license_file='LICENSE',
42 |     classifiers=[
43 |         'Programming Language :: Python :: 3',
44 |         'License :: OSI Approved :: MIT License',
45 |         'Operating System :: OS Independent',
46 |     ],
47 |     python_requires='>=3.10'
48 | )


--------------------------------------------------------------------------------