├── .github
├── demo.mp4
└── logo.png
├── .gitignore
├── LICENSE
├── README.md
├── cli
├── __init__.py
└── commands
│ ├── __init__.py
│ └── command.py
├── connor
├── __init__.py
├── data
│ └── config.ini
├── fonts
│ └── Coder's Crux.ttf
├── processes.py
├── reader.py
├── static
│ ├── icons
│ │ ├── clear.png
│ │ ├── folder.png
│ │ ├── refresh.png
│ │ └── upload.png
│ └── style.css
├── tmp
│ └── keep.txt
└── tree_builder.py
├── gui
├── __init__.py
└── views
│ ├── __init__.py
│ ├── about.py
│ ├── organizer.py
│ └── settings.py
├── requirements.txt
├── run.py
└── setup.py
/.github/demo.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/.github/demo.mp4
--------------------------------------------------------------------------------
/.github/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/.github/logo.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Releases
2 | .releases/
3 |
4 | # Byte-compiled / optimized / DLL files
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 |
9 | # C extensions
10 | *.so
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | *.py,cover
53 | .hypothesis/
54 | .pytest_cache/
55 | cover/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | .pybuilder/
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | # For a library or package, you might want to ignore these files since the code is
90 | # intended to run in multiple environments; otherwise, check them in:
91 | # .python-version
92 |
93 | # pipenv
94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
97 | # install all needed dependencies.
98 | #Pipfile.lock
99 |
100 | # poetry
101 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102 | # This is especially recommended for binary packages to ensure reproducibility, and is more
103 | # commonly ignored for libraries.
104 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105 | #poetry.lock
106 |
107 | # pdm
108 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109 | #pdm.lock
110 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111 | # in version control.
112 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
113 | .pdm.toml
114 | .pdm-python
115 | .pdm-build/
116 |
117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118 | __pypackages__/
119 |
120 | # Celery stuff
121 | celerybeat-schedule
122 | celerybeat.pid
123 |
124 | # SageMath parsed files
125 | *.sage.py
126 |
127 | # Environments
128 | .env
129 | .venv
130 | env/
131 | venv/
132 | ENV/
133 | env.bak/
134 | venv.bak/
135 |
136 | # Spyder project settings
137 | .spyderproject
138 | .spyproject
139 |
140 | # Rope project settings
141 | .ropeproject
142 |
143 | # mkdocs documentation
144 | /site
145 |
146 | # mypy
147 | .mypy_cache/
148 | .dmypy.json
149 | dmypy.json
150 |
151 | # Pyre type checker
152 | .pyre/
153 |
154 | # pytype static type analyzer
155 | .pytype/
156 |
157 | # Cython debug symbols
158 | cython_debug/
159 |
160 | # PyCharm
161 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
162 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
163 | # and can be added to the global gitignore or merged into this file. For a more nuclear
164 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
165 | #.idea/
166 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 ycatsh
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Connor is a file organizer written in [Python](https://www.python.org/). It makes use of the [sentence-transformers](https://sbert.net/) framework for the main organization process and the [PyQt6](https://doc.qt.io/qtforpython-6/) GUI toolkit for the graphical user interface. **It is by no means supposed to substitute for organzing files by hand. It is just a concept**. Connor features a fast and fully local file organizer that uses natural language processing to organize computer files based on their textual content.
6 |
7 |
8 |
9 |
10 | 
11 | 
12 | 
13 |
14 |
15 |
16 | https://github.com/user-attachments/assets/b0d151c6-9a8b-4710-92e9-d410edc57b84
17 |
18 | ## Features
19 | Connor runs locally using the `sentence-transformers/paraphrase-MiniLM-L6-v2` model to analyze file content and organize them based on semantic similarity. It uses cosine similarity to group similar files and applies Latent Dirichlet Allocation (LDA) to name folders.
20 |
21 | Unprocessable files (e.g., images, binaries) are sorted into a `_misc` folder based on their extensions.
22 |
23 | ### Customization Options
24 | 1. **Similarity Threshold:** Set the minimum similarity percentage threshold for grouping.
25 | 2. **Reading Word Limit:** Limit how much of a file is read.
26 | 3. **Folder Name Word Limit:** Set max words for folder names.
27 |
28 | ### User Preferences
29 | **Command Line Interface**: Quick folder organization.
30 | **Graphical Interface**: Simple GUI with file upload support.
31 |
32 |
33 |
34 |
35 |
36 |
37 | ## Installation
38 | There are installation instructions for both GUI and CLI. You can choose the one you want to install. If you're opting for building the application from [source](https://github.com/ycatsh/connor#source) then adding the run file to path is recommended.
39 |
40 | **Install Connor via pip:**
41 | 1. Make sure you have `python` and `pip` installed and added to path.
42 | 2. Run `pip install connor-nlp`
43 |
44 |
45 |
46 | **Install the GUI version of Connor (executable)**
47 | 1. Go to the [latest release](https://github.com/ycatsh/connor/releases).
48 | 3. Follow the steps there.
49 | 2. Run the executable (`.exe`).
50 |
51 |
52 |
53 |
54 |
55 |
56 | ## Usage
57 |
58 | ### Command Structure
59 |
60 | ```bash
61 | connor [command] [options]
62 | ```
63 |
64 | ### Commands
65 | #### `run`: Run the folder organization process.
66 |
67 | **Usage:**
68 | ```bash
69 | connor run
70 | ```
71 |
72 | **Options:**
73 | - `folder_path`: Required. Absolute path to the folder that you want to organize.
74 |
75 | **Example:**
76 | ```bash
77 | connor run /path/to/your/folder
78 | ```
79 |
80 |
81 |
82 | #### `settings`: Update the default settings for the tool.
83 |
84 | **Usage:**
85 | ```bash
86 | connor settings [options]
87 | ```
88 |
89 | **Options:**
90 | - `-f, --folder-word-limit`: Set the maximum length for folder names. (default: 3)
91 | - `-r, --reading-limit`: Specify the word limit for reading files. (default: 200)
92 | - `-t, --similarity-threshold`: Define the similarity threshold percentage. (default: 50)
93 | - `--show`: Show current settings
94 |
95 | **Example:**
96 | ```bash
97 | connor settings -f 2 -r 150 -t 60
98 | ```
99 |
100 | ```console
101 | $ connor settings --show
102 | To see how to update: Connor settings [-h]
103 |
104 | Current settings:
105 | folder words limit 3
106 | reading limit 200
107 | similarity threshold 50%
108 | ```
109 |
110 |
111 |
112 | #### `--gui`: Run Connor as a full fledged GUI from the terminal.
113 |
114 | **Usage:**
115 | ```bash
116 | connor --gui
117 | ```
118 |
119 |
120 |
121 | ### Help
122 | To view help information for commands and options use the ``-h`` or `--help` flag.
123 |
124 | **Example:**
125 | ```console
126 | $ connor -h
127 | usage: Connor [-h] [--gui] {settings,run} ...
128 |
129 | Connor: Fast and local NLP file organizer
130 |
131 | positional arguments:
132 | {settings,run}
133 | settings Update the settings for the organizer
134 | run Run the folder organization process
135 |
136 | options:
137 | -h, --help show this help message and exit
138 | --gui Run the application in GUI mode.
139 | ```
140 |
141 |
142 |
143 |
144 |
145 | ## Source
146 | #### 1. Clone repository:
147 | ```bash
148 | git clone https://github.com/ycatsh/connor.git
149 | cd connor
150 | ```
151 | #### 2. Create and activate virtual environment:
152 | ```bash
153 | python3 -m venv venv
154 | source venv/bin/activate
155 | ```
156 | #### 3. Install dependencies:
157 | ```bash
158 | pip3 install -r requirements.txt
159 | ```
160 | #### 4. Run program:
161 | For GUI:
162 | ```bash
163 | python3 run.py --gui
164 | ```
165 | For CLI:
166 | ```bash
167 | python3 run.py -h
168 | ```
169 |
170 | #### 5. Install locally (optional):
171 | ```bash
172 | pip3 install .
173 | ```
174 |
175 | **Example:**
176 | ```bash
177 | connor --gui
178 | ```
179 | ```bash
180 | connor -h
181 | ```
182 |
183 |
184 |
185 |
186 |
187 |
188 | ## License
189 | This project is distributed under MIT License, which can be found in LICENSE in the root dir of the project. I reserve the right to place future versions of this project under a different license.
--------------------------------------------------------------------------------
/cli/__init__.py:
--------------------------------------------------------------------------------
1 | from .commands import ConnorCLI
2 |
3 |
4 | def main():
5 | return ConnorCLI()
6 |
7 | if __name__ == '__main__':
8 | main()
--------------------------------------------------------------------------------
/cli/commands/__init__.py:
--------------------------------------------------------------------------------
1 | from .command import ConnorCLI
--------------------------------------------------------------------------------
/cli/commands/command.py:
--------------------------------------------------------------------------------
1 | import configparser
2 | import shutil
3 | import os
4 |
5 | from connor import (
6 | init, data_path,
7 | )
8 | from connor.processes import (
9 | get_file_word_list, sim_organize,
10 | rename_folders, organize
11 | )
12 | from connor.tree_builder import make_tree
13 | from connor.reader import prep_files
14 |
15 |
16 | class ConnorCLI:
17 | def __init__(self):
18 | # Loads the default settings from config file
19 | self.settings = configparser.ConfigParser()
20 | self.settings.read(os.path.join(data_path, "config.ini"))
21 |
22 | # Load initial parameters from config
23 | self.folder_name_length = int(self.settings["Parameters"].get("folder_name_length", 3))
24 | self.reading_word_limit = int(self.settings["Parameters"].get("reading_word_limit", 200))
25 | self.similarity_threshold = int(self.settings["Parameters"].get("similarity_threshold", 50))
26 |
27 | # Seperator
28 | terminal_width = shutil.get_terminal_size().columns
29 | self.separator = '-' * terminal_width
30 |
31 | def update_settings(self, folder_name_length=None, reading_word_limit=None, similarity_threshold=None):
32 | if folder_name_length is not None:
33 | self.folder_name_length = folder_name_length
34 | self.settings["Parameters"]["folder_name_length"] = str(folder_name_length)
35 | if reading_word_limit is not None:
36 | self.reading_word_limit = reading_word_limit
37 | self.settings["Parameters"]["reading_word_limit"] = str(reading_word_limit)
38 | if similarity_threshold is not None:
39 | self.similarity_threshold = similarity_threshold
40 | self.settings["Parameters"]["similarity_threshold"] = str(similarity_threshold)
41 |
42 | # Save updated settings to config file
43 | with open(os.path.join(data_path, "config.ini"), "w") as configfile:
44 | self.settings.write(configfile)
45 | print("Settings updated successfully.")
46 |
47 | def show_settings(self):
48 | print("To see how to update: Connor settings [-h]")
49 | print("\nCurrent settings:")
50 | print(f" {'folder words limit':<22} {self.folder_name_length}")
51 | print(f" {'reading limit':<22} {self.reading_word_limit}")
52 | print(f" {'similarity threshold':<22} {self.similarity_threshold}%")
53 |
54 | def organize_folder(self, folder_path):
55 | model, stop_words, lda_model, vectorizer = init()
56 | if not os.path.exists(folder_path):
57 | print(f"Error: The folder '{folder_path}' does not exist.")
58 | return
59 |
60 | print(self.separator)
61 | print(f'To customize default settings instead run the command \nfolder_name_length: {self.folder_name_length}\nreading_word_limit: {self.reading_word_limit}\nsimilarity_threshold: {self.similarity_threshold}%')
62 | print(self.separator)
63 | print(f"Folder '{folder_path}' is being organized...")
64 |
65 | # Preparing files and organizing
66 | folder_dict = {}
67 | prep_files(folder_path, select_folder=True)
68 | self.file_list, misc_list = get_file_word_list(folder_path, self.reading_word_limit, stop_words)
69 | folder_dict, misc_list = sim_organize(model, self.similarity_threshold / 100, self.file_list, misc_list)
70 |
71 | # Fitting the model based on the data provided
72 | data_vectorized = vectorizer.fit_transform(words[1] for words in self.file_list)
73 | lda_model.fit(data_vectorized)
74 |
75 | # Main Process
76 | renamed_dict = rename_folders(vectorizer, lda_model, folder_dict, self.file_list,
77 | self.folder_name_length, misc_list)
78 | print(make_tree(path=folder_path, dict=renamed_dict, is_path_only=False, cli=True))
79 | print(self.separator)
80 |
81 | # Confirm Organization
82 | try:
83 | confirm = input(f"The above directory tree explains how the folder will be organized.\nDo you want to continue? [y/n] ")
84 | if confirm.lower() == 'y' or confirm == '':
85 | organize(folder_path, renamed_dict, self.reading_word_limit, self.folder_name_length,
86 | vectorizer, lda_model, model, stop_words)
87 | print(f"Folder '{folder_path}' organized successfully.")
88 | print(self.separator)
89 | else:
90 | print(f"Folder organization aborted. The files in '{folder_path}' were left untouched.")
91 | print(self.separator)
92 | except KeyboardInterrupt:
93 | print(f"\nAbort. The files in '{folder_path}' were left untouched.")
--------------------------------------------------------------------------------
/connor/__init__.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | import logging
3 | import sys
4 | import os
5 |
6 |
7 | # Managing Paths
8 | def get_path(relative_path):
9 | if getattr(sys, 'frozen', False):
10 | base_path = os.path.dirname(sys.executable)
11 | else:
12 | base_path = os.path.abspath(os.path.dirname(__file__))
13 |
14 | return os.path.join(base_path, relative_path)
15 |
16 | static_path = get_path('static')
17 | font_path = get_path('fonts')
18 | data_path = get_path('data')
19 | tmp_path = get_path('tmp')
20 |
21 |
22 | # Organization Parameters and Models
23 | TOPICS = 50
24 | MISCELLANEOUS_FOLDER_NAME = "_misc"
25 |
26 | def initialize_dependencies():
27 | # Load imports on demand
28 | from sklearn.decomposition import LatentDirichletAllocation
29 | from sklearn.feature_extraction.text import TfidfVectorizer
30 | from sentence_transformers import SentenceTransformer
31 | from nltk.corpus import stopwords
32 | import nltk
33 |
34 | # Logging
35 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
36 | logger = logging.getLogger(__name__)
37 | logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
38 | warnings.filterwarnings("ignore")
39 |
40 | print("Downloading dependencies...")
41 | logger.info("Initializing Sentence Transformer model...")
42 | try:
43 | model = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2')
44 | logger.info("Sentence Transformer model downloaded successfully.")
45 | except Exception as e:
46 | logger.error("Error downloading Sentence Transformer model: %s", e)
47 | return None, None, None, None
48 |
49 | logger.info("Setting up NLTK stop words...")
50 | nltk.download('stopwords', quiet=True)
51 | stop_words = set(stopwords.words('english'))
52 | logger.info("NLTK component set up successfully.")
53 |
54 | logger.info("Initializing LDA model with %d topics...", TOPICS)
55 | lda_model = LatentDirichletAllocation(n_components=TOPICS, learning_decay=0.7, random_state=0)
56 | logger.info("LDA model initialized successfully.")
57 |
58 | logger.info("Initializing TF-IDF Vectorizer...")
59 | vectorizer = TfidfVectorizer(max_df=0.8, min_df=2, stop_words='english')
60 | logger.info("TF-IDF Vectorizer initialized successfully.")
61 |
62 | return model, stop_words, lda_model, vectorizer
63 |
64 |
65 | # Initialize models
66 | def init():
67 | if not os.path.exists(os.path.join(data_path, "init.txt")):
68 | with open(os.path.join(data_path, "init.txt"), 'w') as f:
69 | f.write('initialized')
70 | return initialize_dependencies()
71 | else:
72 | # Load imports on demand
73 | from sklearn.decomposition import LatentDirichletAllocation
74 | from sklearn.feature_extraction.text import TfidfVectorizer
75 | from sentence_transformers import SentenceTransformer
76 | from nltk.corpus import stopwords
77 |
78 | model = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2')
79 | stop_words = set(stopwords.words('english'))
80 | lda_model = LatentDirichletAllocation(n_components=TOPICS, learning_decay=0.7, random_state=0)
81 | vectorizer = TfidfVectorizer(max_df=0.8, min_df=2, stop_words='english')
82 | return model, stop_words, lda_model, vectorizer
--------------------------------------------------------------------------------
/connor/data/config.ini:
--------------------------------------------------------------------------------
1 | [Parameters]
2 | folder_name_length = 3
3 | reading_word_limit = 200
4 | similarity_threshold = 50
5 |
6 | [Extension_Map]
7 | documents = docx odt pdf rtf
8 | text_files = txt
9 | markup = html md
10 | executables = exe msi bat
11 | images = jpg png gif bmp svg
12 | spreadsheets = xlsx csv ods
13 | presentations = pptx ppt odp
14 | audio = mp3 wav aac flac
15 | video = mp4 avi mkv mov
16 | archives = zip rar 7z tar gz
17 | programming = py cpp c java js
18 | style_sheets = css scss
19 | databases = sqlite db sql
20 | fonts = ttf otf woff
21 | scripts = sh ps1 bash
22 | configuration = ini cfg yaml
23 | logs = log
24 | torrent = torrent
25 | backup = bak
26 | web = php asp jsp
27 | system = dll sys
28 | compressed = zip rar 7z tar gz
29 | virtual_machines = ova vdi vmdk
30 | certificates = crt pem
31 |
32 |
--------------------------------------------------------------------------------
/connor/fonts/Coder's Crux.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/connor/fonts/Coder's Crux.ttf
--------------------------------------------------------------------------------
/connor/processes.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import string
4 | import configparser
5 |
6 | import numpy as np
7 | from numpy import dot
8 | from numpy.linalg import norm
9 |
10 | from connor import (
11 | data_path, MISCELLANEOUS_FOLDER_NAME
12 | )
13 | from connor.reader import read_files
14 |
15 |
16 | # pre-processesing the text to focus on relevant content
17 | def preprocess(text, stop_words):
18 | text = text.translate(str.maketrans('', '', string.punctuation))
19 | preprocessed = []
20 | for word in text.split():
21 | if word.lower() not in stop_words:
22 | try:
23 | num_word = int(word)
24 | if num_word > 100: # Ignore small numbers in file names
25 | preprocessed.append(word)
26 | except ValueError:
27 | preprocessed.append(word)
28 | return ' '.join(preprocessed)
29 |
30 |
31 | # Returns a tuple of files names and corresponding content and the ones which are not text-based (i.e. misc)
32 | def get_file_word_list(path, word_limit, stop_words):
33 | raw_text_based, misc = read_files(path, word_limit)
34 | text_based = [(file, preprocess(content, stop_words)) for file, content in raw_text_based if content]
35 | return text_based, misc
36 |
37 |
38 | # Cosine similarity calculation
39 | def calculate_similarity(embeddings):
40 | return dot(embeddings[0], embeddings[1]) / (norm(embeddings[0]) * norm(embeddings[1]))
41 |
42 |
43 | # Files that have a similarity score over a certain threshold are grouped together
44 | def sim_organize(model, simlarity_threshold, files_words_list, misc_list):
45 | grouped_files = set()
46 | file_groups = {}
47 | embeddings = model.encode([w[1] for w in files_words_list], convert_to_tensor=True)
48 |
49 | for i, parent_files in enumerate(files_words_list):
50 | if parent_files[0] not in grouped_files:
51 | is_misc = True
52 | for j, other_files in enumerate(files_words_list):
53 | if i != j and other_files[0] not in grouped_files:
54 | score = calculate_similarity([embeddings[i], embeddings[j]])
55 |
56 | if score >= simlarity_threshold: # similarity threshold decided by user (default: 50%)
57 | if parent_files[0] not in file_groups:
58 | file_groups[parent_files[0]] = [parent_files[0]]
59 |
60 | file_groups[parent_files[0]].append(other_files[0])
61 | grouped_files.add(other_files[0])
62 | is_misc = False
63 |
64 | grouped_files.add(parent_files[0])
65 | if is_misc:
66 | misc_list.append(parent_files[0])
67 |
68 | return file_groups, misc_list
69 |
70 |
71 | # Generating names for the folders
72 | def name_category(vectorizer, lda_model, text_list, folder_word_limit=5, delimiter="_"):
73 | if not text_list:
74 | return "Untitled"
75 |
76 | text_vectorized = vectorizer.transform(text_list)
77 | topic_distribution = lda_model.transform(text_vectorized)
78 | dominant_topic_index = np.argmax(topic_distribution, axis=1)[0]
79 |
80 | feature_names = vectorizer.get_feature_names_out()
81 | topic_words = lda_model.components_[dominant_topic_index]
82 | top_word_indices = topic_words.argsort()[-folder_word_limit:][::-1] # Folder word length limit
83 | top_words = [feature_names[i].capitalize() for i in top_word_indices]
84 |
85 | folder_name = delimiter.join(top_words)
86 | if folder_name:
87 | return folder_name
88 | else:
89 | return folder_name_fallback(vectorizer, text_list, folder_word_limit)
90 |
91 |
92 | def folder_name_fallback(vectorizer, text_list, folder_word_limit=5, delimiter="_"):
93 | text_vectorized = vectorizer.transform(text_list)
94 | feature_names = vectorizer.get_feature_names_out()
95 | scores = text_vectorized.sum(axis=0).A1
96 |
97 | top_word_indices = scores.argsort()[-folder_word_limit:][::-1]
98 | top_words = [feature_names[i] for i in top_word_indices]
99 | capitalized_words = [word.capitalize() for word in top_words]
100 |
101 | return delimiter.join(capitalized_words)
102 |
103 |
104 | # Handling files that cannot be organized (misc)
105 | def misc_handler(misc_files):
106 | config = configparser.ConfigParser()
107 | config.read(os.path.join(data_path, "config.ini"))
108 | exts = config['Extension_Map']
109 | misc_dir = {MISCELLANEOUS_FOLDER_NAME: {}}
110 |
111 | for misc_file in misc_files:
112 | file_ext = os.path.splitext(misc_file)[1][1:]
113 | parent = None
114 | for key, value in exts.items():
115 | if file_ext in value.split():
116 | parent = key
117 | break
118 |
119 | if parent:
120 | if parent not in misc_dir[MISCELLANEOUS_FOLDER_NAME]:
121 | misc_dir[MISCELLANEOUS_FOLDER_NAME][parent] = []
122 | misc_dir[MISCELLANEOUS_FOLDER_NAME][parent].append(misc_file)
123 | else:
124 | if file_ext not in misc_dir[MISCELLANEOUS_FOLDER_NAME]:
125 | misc_dir[MISCELLANEOUS_FOLDER_NAME][file_ext] = []
126 | misc_dir[MISCELLANEOUS_FOLDER_NAME][file_ext].append(misc_file)
127 |
128 | return misc_dir
129 |
130 |
131 | # Re-name the folders with the names determined using topic modeling
132 | def rename_folders(vectorizer, lda_model, folder_dict, files_words_list, folder_word_limit, misc_files):
133 | renamed_dict = {}
134 | folder_names = set()
135 |
136 | def unique_folder_name_gen(content, base_name):
137 | folder_name = name_category(vectorizer, lda_model, content, folder_word_limit)
138 | if folder_name in folder_names:
139 | folder_name = folder_name_fallback(vectorizer, content, folder_word_limit)
140 |
141 | counter = 1
142 | while folder_name in folder_names:
143 | folder_name = f"{base_name}_{counter}"
144 | counter += 1
145 |
146 | return folder_name
147 |
148 | for _, similar_files in folder_dict.items():
149 | content = [files[1] for files in files_words_list if files[0] in similar_files]
150 | base_name = name_category(vectorizer, lda_model, content, folder_word_limit)
151 | folder_name = unique_folder_name_gen(content, base_name)
152 | folder_names.add(folder_name)
153 | renamed_dict[folder_name] = similar_files
154 | misc_dict = misc_handler(misc_files)
155 |
156 | return {**renamed_dict, **misc_dict}
157 |
158 |
159 | # Handles moving files
160 | def move_file(path, file_name, destination_path):
161 | source_file = os.path.join(path, file_name)
162 | destination_file = os.path.join(destination_path, file_name)
163 |
164 | if os.path.exists(source_file):
165 | shutil.move(source_file, destination_file)
166 |
167 |
168 | # Organizing files that are similar (determined using NLP)
169 | def base_organize(path, renamed_dict):
170 | for folder, folder_content in renamed_dict.items():
171 | folder_path = os.path.join(path, folder)
172 |
173 | if not os.path.exists(folder_path):
174 | os.mkdir(folder_path)
175 |
176 | # For sub-folders
177 | if isinstance(folder_content, dict):
178 | base_organize(folder_path, folder_content)
179 |
180 | # For misc-folder
181 | if isinstance(folder_content, dict) and folder == MISCELLANEOUS_FOLDER_NAME:
182 | for sub_folder, file_names in folder_content.items():
183 | for file_name in file_names:
184 | move_file(path, file_name, os.path.join(folder_path, sub_folder))
185 |
186 | if isinstance(folder_content, list):
187 | for file_name in folder_content:
188 | move_file(path, file_name, folder_path)
189 |
190 |
191 | # Organizing inside the generated folders
192 | def sub_organize(path, folder_dict, word_limit, folder_word_limit, vectorizer, lda_model, model, stop_words):
193 | for folder, folder_content in folder_dict.items():
194 | sub_folder = os.path.join(path, folder)
195 |
196 | if len(folder_content) > 6:
197 | sub_file_word_list = get_file_word_list(sub_folder, word_limit, stop_words)[0]
198 | sub_folder_dict = sim_organize(model, sub_file_word_list, simlarity_threshold=0.75) # Grouped only if similarity >=75%
199 |
200 | if len(sub_folder_dict) > 1:
201 | sub_renamed_dict = rename_folders(vectorizer, lda_model, sub_folder_dict, sub_file_word_list,
202 | folder_word_limit, misc_files={})
203 | base_organize(sub_folder, sub_renamed_dict)
204 |
205 |
206 | # Organizing the folder provided by the user
207 | def organize(path, folder_dict, word_limit, folder_word_limit, vectorizer, lda_model, model, stop_words):
208 | base_organize(path, folder_dict)
209 | sub_organize(path, folder_dict, word_limit, folder_word_limit,
210 | vectorizer, lda_model, model, stop_words)
211 |
212 | return
213 |
--------------------------------------------------------------------------------
/connor/reader.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | import os
3 |
4 | from openpyxl import load_workbook
5 | from odf.opendocument import load
6 | from odf import text, teletype
7 | from pptx import Presentation
8 | from docx import Document
9 | import PyPDF2
10 |
11 | from connor import tmp_path
12 |
13 |
14 | # Reads the content from all the files in the provided folderS
15 | def read_text(file_path, word_limit):
16 | with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
17 | content = file.read().split()
18 | return ' '.join(content[:word_limit])
19 |
20 | def read_pdf(file_path, word_limit):
21 | with open(file_path, 'rb') as file:
22 | pdf_reader = PyPDF2.PdfReader(file)
23 | content = ' '.join(page.extract_text() for page in pdf_reader.pages).split()
24 | return ' '.join(content[:word_limit])
25 |
26 | def read_odf(file_path, word_limit):
27 | odf_file = load(file_path)
28 | content = [teletype.extractText(para) for para in odf_file.getElementsByType(text.P)]
29 | return ' '.join(content[:word_limit])
30 |
31 | def read_doc(file_path, word_limit):
32 | doc = Document(file_path)
33 | content = ' '.join([paragraph.text for paragraph in doc.paragraphs]).split()
34 | return ' '.join(content[:word_limit])
35 |
36 | def read_xlsx(file_path, word_limit):
37 | workbook = load_workbook(file_path)
38 | sheet = workbook.active
39 | rows = list(sheet.iter_rows(values_only=True))
40 | content = [cell for row in rows for cell in row if cell]
41 | return ' '.join(str(cell) for cell in content[:word_limit])
42 |
43 | def read_ppt(file_path, word_limit):
44 | presentation = Presentation(file_path)
45 | content = []
46 | for slide in presentation.slides:
47 | for shape in slide.shapes:
48 | if hasattr(shape, "text"):
49 | content.append(shape.text)
50 | return ' '.join(content[:word_limit])
51 |
52 |
53 | # Avoids several ifs by mapping funcs to exts
54 | func_map = {'.txt': read_text, '.html': read_text, '.md': read_text, '.csv': read_text,
55 | '.pdf': read_pdf, '.docx': read_doc, '.odt': read_odf, '.odp': read_odf,
56 | '.xlsx': read_xlsx, '.pptx': read_ppt, '.ppt': read_ppt }
57 |
58 | def prep_files(directory, select_folder, copy_files=False):
59 | # if user selects a folder: Moves the files from existing sub-folders (if any) to root level of selected folder
60 | if select_folder:
61 | for root, _, files in os.walk(directory):
62 | for file_name in files:
63 | file_path = os.path.join(root, file_name)
64 | root_path = os.path.join(directory, file_name)
65 | shutil.move(file_path, root_path)
66 |
67 | # Deletes the now empty existing sub-folders
68 | for root, folders, _ in os.walk(directory, topdown=False):
69 | for folder in folders:
70 | os.rmdir(os.path.join(root, folder))
71 |
72 | # If user uploads files: Moves/Copies the said files to tmp folder in the installation directory
73 | if not select_folder:
74 | tmp_folder = os.path.join(tmp_path, "Organized_Files")
75 | if not os.path.exists(tmp_folder):
76 | os.mkdir(tmp_folder)
77 |
78 | for file_path in directory:
79 | if copy_files:
80 | shutil.copy(file_path, os.path.join(tmp_folder, os.path.basename(file_path)))
81 | else:
82 | shutil.move(file_path, os.path.join(tmp_folder, os.path.basename(file_path)))
83 |
84 | def read_files(directory, word_limit):
85 | words_list = []
86 | misc_list = []
87 | for _, _, files in os.walk(directory):
88 | for file_name in files:
89 | root_path = os.path.join(directory, file_name)
90 |
91 | # Reads the files and adds the data to words_list
92 | if os.path.isfile(root_path):
93 | file_extension = os.path.splitext(file_name)[1]
94 | if file_extension in func_map:
95 | words_list.append((file_name, func_map[file_extension](root_path, word_limit)))
96 | else:
97 | misc_list.append(file_name)
98 |
99 | return words_list, misc_list
100 |
--------------------------------------------------------------------------------
/connor/static/icons/clear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/connor/static/icons/clear.png
--------------------------------------------------------------------------------
/connor/static/icons/folder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/connor/static/icons/folder.png
--------------------------------------------------------------------------------
/connor/static/icons/refresh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/connor/static/icons/refresh.png
--------------------------------------------------------------------------------
/connor/static/icons/upload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycatsh/connor/991eca9084bfd1b09433c76cef5cbafe9180dfc2/connor/static/icons/upload.png
--------------------------------------------------------------------------------
/connor/static/style.css:
--------------------------------------------------------------------------------
1 | QMainWindow, QDialog {
2 | background-color: #202020;
3 | }
4 |
5 | QLabel, QPushButton, QLineEdit, QTextEdit, QCheckBox, QMenu, QMenuBar {
6 | color: #d6d2c8;
7 | }
8 |
9 | QPushButton {
10 | background-color: #323232;
11 | border: 2px solid #333;
12 | }
13 |
14 | QPushButton:hover {
15 | background-color: #444;
16 | }
17 |
18 | QSlider::groove:horizontal {
19 | background: #484848;
20 | height: 8px;
21 | }
22 |
23 | QSlider::handle:horizontal {
24 | background: #75a7ad;
25 | border: 1px solid #333;
26 | width: 10px;
27 | height: 12px;
28 | margin: -9px 0;
29 | }
30 |
31 | QCheckBox::indicator {
32 | color: #75a7ad;
33 | width: 15px;
34 | height: 15px;
35 | }
36 |
37 | QCheckBox::indicator:unchecked {
38 | border: 1px solid #333;
39 | }
40 |
41 | QCheckBox::indicator:checked {
42 | background-color: #75a7ad;
43 | border: 1px solid #75a7ad;
44 | }
45 |
46 | QTextEdit {
47 | background-color: #333;
48 | border: 1px solid #333;
49 | color: white;
50 | padding: 5px;
51 | }
52 |
53 | QLineEdit {
54 | background-color: #323232;
55 | border: 1px solid #333;
56 | color: white;
57 | padding: 5px;
58 | }
59 |
60 | QMenuBar, QMenuBar::item{
61 | background-color: #202020;
62 | }
63 |
64 | QMenu, QMenu::item{
65 | background-color: #484848;
66 | }
67 |
68 | QMenu::item:selected, QMenuBar::item:selected {
69 | background-color: #75a7ad;
70 | }
--------------------------------------------------------------------------------
/connor/tmp/keep.txt:
--------------------------------------------------------------------------------
1 | Don't Delete this placeholder text file.
--------------------------------------------------------------------------------
/connor/tree_builder.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 |
4 | branch = "│ "
5 | connector = "├── "
6 | end = "└── "
7 | space = " "
8 |
9 | # Generates the organization summary (tree structure) recursively
10 | def tree(directory, indent='', is_path=False):
11 | if is_path:
12 | directory = Path(directory)
13 | items = list(directory.iterdir())
14 | pointers = [connector] * (len(items) - 1) + [end]
15 |
16 | for pointer, path in zip(pointers, items):
17 | yield indent + pointer + path.name
18 |
19 | if path.is_dir():
20 | new_indent = branch if pointer == connector else space
21 | yield from tree(path, indent+new_indent, is_path=True)
22 | else:
23 | for folder_name, files in directory.items():
24 | yield indent + folder_name
25 |
26 | if files:
27 | pointers = ['├── '] * (len(files) - 1) + ['└── ']
28 | for pointer, file in zip(pointers, files):
29 | yield indent + pointer + file
30 |
31 | # Generates a string of the tree with relevant formatting
32 | def make_tree(path, dict, is_path_only=False, cli=False):
33 | if cli:
34 | structure = f"Organized Folder:\n{path}\n"
35 | for struct in tree(dict, is_path=is_path_only):
36 | structure += f" {struct}\n"
37 | else:
38 | structure = f" {path}\n"
39 | for struct in tree(dict, is_path=is_path_only):
40 | structure += f" {struct}\n"
41 | structure += "
"
42 |
43 | return structure
--------------------------------------------------------------------------------
/gui/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from PyQt6.QtWidgets import QApplication
4 |
5 | from .views import ConnorGUI
6 |
7 |
8 | def main():
9 | app = QApplication(sys.argv)
10 | nlp_file_Organizer = ConnorGUI()
11 | nlp_file_Organizer.show()
12 | sys.exit(app.exec())
13 |
14 | if __name__ == '__main__':
15 | main()
--------------------------------------------------------------------------------
/gui/views/__init__.py:
--------------------------------------------------------------------------------
1 | from .organizer import ConnorGUI
--------------------------------------------------------------------------------
/gui/views/about.py:
--------------------------------------------------------------------------------
1 | from PyQt6.QtWidgets import QVBoxLayout, QLabel, QDialog, QTextEdit
2 |
3 | class About(QDialog):
4 | def __init__(self, parent):
5 | super().__init__(parent)
6 | self.setWindowTitle("Tutorial")
7 | self.setFixedSize(600, 450)
8 | self.setModal(True)
9 |
10 | # Layout
11 | layout = QVBoxLayout()
12 | section = QVBoxLayout()
13 |
14 | # Section 1
15 | title_lft = QLabel("Select Folder
")
16 | desc_lft = QTextEdit()
17 | desc_lft.setHtml("""
18 | Allows you to select a folder on your computer and organize it using artificial intelligence based on the content or the names of the files.
19 | After selecting or entering the absolute path of the folder in the appropriate screen, click on ORGANIZE FILES to start the organization process.
20 | """)
21 | desc_lft.setReadOnly(True)
22 | desc_lft.setFixedSize(575, 100)
23 |
24 | title_rgt = QLabel("Upload Files
")
25 | desc_rgt = QTextEdit()
26 | desc_rgt.setHtml("""
27 |
36 | Allows you to upload files manually from anywhere on your computer.
37 | After going to the appropriate screen, click on UPLOAD FILES to manually upload files into the app. These files are stored temporarily in tmp/
in the app's root directory for the purpose of organization.
38 | You can also choose between copying or moving the files:
39 |
40 | - Copy Files: copies the files from the original location into the app
41 | - Move Files: moves the files from the original location into the app
42 |
43 | After you have ensured the files are uploaded into the app, click on ORGANIZE FILES to organize the uploaded files into a folder. Upon completion, you can send the organized folder containing your uploaded files (but organized using artificial intelligence) back to anywhere on your computer by clicking on SEND TO COMPUTER.
44 | """)
45 | desc_rgt.setReadOnly(True)
46 | desc_rgt.setFixedSize(575, 240)
47 |
48 | section.addWidget(title_lft)
49 | section.addWidget(desc_lft)
50 | section.addWidget(title_rgt)
51 | section.addWidget(desc_rgt)
52 |
53 | layout.addLayout(section)
54 | self.setLayout(layout)
55 |
--------------------------------------------------------------------------------
/gui/views/organizer.py:
--------------------------------------------------------------------------------
1 | import configparser
2 | import os
3 | import shutil
4 |
5 | from PyQt6.QtWidgets import (
6 | QMainWindow, QPushButton, QLineEdit, QTextEdit, QVBoxLayout,
7 | QHBoxLayout, QWidget, QStackedWidget, QFileDialog, QLabel,
8 | QSlider, QSizePolicy, QCheckBox, QMessageBox, QGridLayout
9 | )
10 | from PyQt6.QtGui import QFont, QIcon, QAction, QFontDatabase
11 | from PyQt6.QtCore import Qt, QFile, QTextStream, QIODevice
12 | from PyQt6 import QtCore
13 |
14 | from connor.processes import (
15 | get_file_word_list, rename_folders, sim_organize, organize
16 | )
17 | from connor import (
18 | init, data_path, static_path, tmp_path, font_path
19 | )
20 | from connor.tree_builder import make_tree
21 | from connor.reader import prep_files
22 | from gui.views.settings import Settings
23 | from gui.views.about import About
24 |
25 |
26 | class ConnorGUI(QMainWindow):
27 | def __init__(self):
28 | super().__init__()
29 |
30 | self.central_widget = QWidget()
31 | self.setCentralWidget(self.central_widget)
32 | self.setWindowFlag(Qt.WindowType.WindowMaximizeButtonHint, False)
33 |
34 | # Loads models
35 | self.model, self.stop_words, self.lda_model, self.vectorizer = init()
36 |
37 | # Loads the default settings from config file
38 | self.settings = configparser.ConfigParser()
39 | self.settings.read(os.path.join(data_path, "config.ini"))
40 |
41 | self.folder_name_length = int(self.settings["Parameters"].get("folder_name_length", 3))
42 | self.reading_word_limit = int(self.settings["Parameters"].get("reading_word_limit", 200))
43 | self.similarity_threshold = int(self.settings["Parameters"].get("similarity_threshold", 50))
44 |
45 | self.init_ui()
46 |
47 | def init_ui(self):
48 | self.setGeometry(550, 250, 850, 600)
49 | self.setWindowTitle("Smart File Organizer")
50 | self.load_stylesheet("style.css")
51 |
52 | # Window
53 | self.stacked_widget = QStackedWidget(self)
54 | self.central_layout = QVBoxLayout(self.central_widget)
55 | self.central_layout.addWidget(self.stacked_widget)
56 |
57 | # Menubar
58 | self.view_action = None
59 | menu_bar = self.menuBar()
60 | self.setup_menu_bar(menu_bar)
61 |
62 | # Default organization variables
63 | self.copy_files = False
64 | self.directories = []
65 | self.file_list = []
66 | self.misc_list = []
67 | self.num_files = 0
68 | self.tmp_folder = os.path.join(tmp_path, "Organized_Files")
69 |
70 | # Screens
71 | self.screen1 = QWidget()
72 | self.screen2 = QWidget()
73 | self.screen3 = QWidget()
74 | self.screen4 = QWidget()
75 | self.screen5 = QWidget()
76 |
77 | self.stacked_widget.addWidget(self.screen1)
78 | self.stacked_widget.addWidget(self.screen2)
79 | self.stacked_widget.addWidget(self.screen3)
80 | self.stacked_widget.addWidget(self.screen4)
81 | self.stacked_widget.addWidget(self.screen5)
82 |
83 | self.create_screen1()
84 | self.create_screen2()
85 | self.create_screen3()
86 | self.create_screen4()
87 | self.create_screen5()
88 |
89 | self.current_screen = 0
90 | self.stacked_widget.setCurrentIndex(self.current_screen)
91 |
92 | # Fonts
93 | custom_font = QFontDatabase.applicationFontFamilies(QFontDatabase.addApplicationFont(os.path.join(font_path, "Coder's Crux.ttf")))[0]
94 | self.update_custom_fonts(custom_font)
95 |
96 | # Loads styling for the application
97 | def load_stylesheet(self, file_name):
98 | css_file = os.path.join(static_path, file_name)
99 | file = QFile(css_file)
100 | if file.open(QIODevice.OpenModeFlag.ReadOnly | QIODevice.OpenModeFlag.Text):
101 | stream = QTextStream(file)
102 | self.setStyleSheet(stream.readAll())
103 |
104 | # Create Menu Bar
105 | def setup_menu_bar(self, menu_bar):
106 | menus = {
107 | "File": {"Exit": self.close},
108 | "Edit": {"Settings": self.show_settings},
109 | "View": {"Menu Bar": self.toggle_menubar},
110 | "Help": {"About": self.show_about},
111 | }
112 |
113 | for menu_name, menu_items in menus.items():
114 | menu = menu_bar.addMenu(menu_name)
115 | for item_name, item_action in menu_items.items():
116 | action = QAction(item_name, self)
117 | action.triggered.connect(item_action)
118 | menu.addAction(action)
119 |
120 | if item_name == "Menu Bar":
121 | self.view_action = action
122 |
123 | # Alt key toggles menubar
124 | def keyPressEvent(self, event):
125 | if event.key() == Qt.Key.Key_Alt:
126 | self.view_action.setChecked(not self.view_action.isChecked())
127 | self.toggle_menubar()
128 |
129 | def toggle_menubar(self):
130 | if self.menuBar().isVisible():
131 | self.menuBar().hide()
132 | else:
133 | self.menuBar().show()
134 |
135 | # Updates font
136 | def update_font(self, widget, font, size=12):
137 | upd_font = QFont(font)
138 | upd_font.setPointSize(size)
139 | widget.setFont(upd_font)
140 |
141 | # Updates elements iwth custom font
142 | def update_custom_fonts(self, custom_font):
143 | for button in self.findChildren(QPushButton):
144 | self.update_font(button, custom_font, 24)
145 |
146 | for slider in self.findChildren(QLabel):
147 | self.update_font(slider, custom_font, 24)
148 |
149 | for text in self.findChildren(QTextEdit):
150 | self.update_font(text, "Monospace", 14)
151 |
152 | # Settings pop-up (Allows the user to change default params)
153 | def show_settings(self):
154 | settings = Settings(self.settings, self)
155 | settings.exec()
156 |
157 | # Tutorial pop-up (Shows General app instructions)
158 | def show_about(self):
159 | about = About(self)
160 | about.exec()
161 |
162 | # Allows user to go back to the previous screen
163 | def return_button(self, prev_screen, parent=None):
164 | return_button = QPushButton("Return", parent)
165 | return_button.setGeometry(720, 5, 100, 30)
166 | return_button.clicked.connect(lambda _, screen=prev_screen: self.stacked_widget.setCurrentWidget(screen))
167 | return return_button
168 |
169 | # Update move/copy files checkbox
170 | def copy_files_checkbox_state(self):
171 | checkbox = self.sender()
172 | if checkbox.isChecked():
173 | self.copy_files = True
174 | else:
175 | self.copy_files = False
176 |
177 | # Main Menu Screen
178 | def create_screen1(self):
179 | layout = QVBoxLayout()
180 | self.screen1.setLayout(layout)
181 |
182 | button_layout = QHBoxLayout()
183 |
184 | # Select folder button
185 | select_folder_button = QPushButton("SELECT FOLDER", parent=self.screen1)
186 | select_folder_button.setFixedSize(200, 60)
187 |
188 | # Upload and organize files button
189 | upload_files_button = QPushButton("UPLOAD FILES", parent=self.screen1)
190 | upload_files_button.setFixedSize(200, 60)
191 |
192 | select_folder_button.clicked.connect(self.show_screen2)
193 | upload_files_button.clicked.connect(self.show_screen3)
194 |
195 | button_layout.addWidget(select_folder_button)
196 | button_layout.addWidget(upload_files_button)
197 |
198 | # Parameter Sliders
199 | slider1_layout = QHBoxLayout()
200 | self.slider1_label = QLabel(f"Max Folder Name Length: {self.folder_name_length} words")
201 | slider1 = QSlider(Qt.Orientation.Horizontal)
202 | slider1.setFixedWidth(200)
203 | slider1.setRange(2, 5)
204 | slider1.setValue(self.folder_name_length)
205 | slider1.setSizePolicy(QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Fixed)
206 | slider1.valueChanged.connect(self.slider1_changed)
207 | slider1_layout.addWidget(self.slider1_label)
208 | slider1_layout.addWidget(slider1)
209 |
210 | slider2_layout = QHBoxLayout()
211 | self.slider2_label = QLabel(f"Word Limit For Reading File: {self.reading_word_limit} words")
212 | slider2 = QSlider(Qt.Orientation.Horizontal)
213 | slider2.setRange(100, 1000)
214 | slider2.setFixedWidth(200)
215 | slider2.setValue(self.reading_word_limit)
216 | slider2.setSizePolicy(QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Fixed)
217 | slider2.valueChanged.connect(self.slider2_changed)
218 | slider2_layout.addWidget(self.slider2_label)
219 | slider2_layout.addWidget(slider2)
220 |
221 | slider3_layout = QHBoxLayout()
222 | self.slider3_label = QLabel(f"Similarity Threshold Percent: {self.similarity_threshold} %")
223 | slider3 = QSlider(Qt.Orientation.Horizontal)
224 | slider3.setFixedWidth(200)
225 | slider3.setRange(0, 100)
226 | slider3.setValue(self.similarity_threshold)
227 | slider3.setSizePolicy(QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Fixed)
228 | slider3.valueChanged.connect(self.slider3_changed)
229 | slider3_layout.addWidget(self.slider3_label)
230 | slider3_layout.addWidget(slider3)
231 |
232 | layout.addLayout(button_layout)
233 | layout.addLayout(slider1_layout)
234 | layout.addLayout(slider2_layout)
235 | layout.addLayout(slider3_layout)
236 |
237 | # Select Folder Screen
238 | def create_screen2(self):
239 | layout = QVBoxLayout()
240 | top_section = QVBoxLayout()
241 | inp_layout = QHBoxLayout()
242 | bot_section = QVBoxLayout()
243 | self.screen2.setLayout(layout)
244 |
245 | # Select Folder button
246 | select_button = QPushButton(parent=self.screen2)
247 | select_button.setIcon(QIcon(os.path.join(static_path, "icons/folder.png")))
248 | select_button.setIconSize(QtCore.QSize(100, 100))
249 | select_button.setFixedSize(50, 50)
250 |
251 | # Folder input field
252 | self.folder_path_input = QLineEdit()
253 | self.folder_path_input.setPlaceholderText("Enter Folder Path")
254 | self.folder_path_input.setFixedHeight(50)
255 | self.update_font(self.folder_path_input, "Monospace", 14)
256 |
257 | # Organize button
258 | organize_button = QPushButton("ORGANIZE SELECTED FOLDER", parent=self.screen2)
259 | organize_button.setFixedSize(820, 50)
260 |
261 | inp_layout.addWidget(select_button)
262 | inp_layout.addWidget(self.folder_path_input)
263 | top_section.addLayout(inp_layout)
264 |
265 | # selected folder label
266 | self.select_folder_label = QLabel(f"Your Selected Folder Before Organization:", parent=self.screen2)
267 | self.select_folder_label.setFixedSize(600, 30)
268 |
269 | # Box for displaying selected folder
270 | self.select_folder_tab = QTextEdit(parent=self.screen2)
271 | self.select_folder_tab.setReadOnly(True)
272 | self.select_folder_tab.setFixedWidth(820)
273 | self.select_folder_tab.setFixedHeight(200)
274 |
275 | bot_section.addWidget(self.select_folder_label)
276 | bot_section.addWidget(self.select_folder_tab)
277 | bot_section.addWidget(organize_button)
278 |
279 | organize_button.clicked.connect(self.organize_selected_folder)
280 | select_button.clicked.connect(self.select_folder)
281 |
282 | layout.addLayout(top_section)
283 | layout.addLayout(bot_section)
284 |
285 | # Return to previous screen button
286 | self.return_button(self.screen1, self.screen2)
287 |
288 | # Upload Files Screen
289 | def create_screen3(self):
290 | layout = QHBoxLayout()
291 | left_section = QVBoxLayout()
292 | right_section = QVBoxLayout()
293 | util_layout = QHBoxLayout()
294 | self.screen3.setLayout(layout)
295 |
296 | # Upload button
297 | upload_button = QPushButton(" UPLOAD", parent=self.screen3)
298 | upload_button.setIcon(QIcon(os.path.join(static_path, "icons/upload.png")))
299 | upload_button.setIconSize(QtCore.QSize(100, 100))
300 | upload_button.setFixedSize(300, 150)
301 |
302 | # Organize button
303 | organize_button = QPushButton("ORGANIZE", parent=self.screen3)
304 | organize_button.setFixedSize(300, 50)
305 |
306 | # copy files instead of move when user uploads files checkbox
307 | checkbox2 = QCheckBox("Copy Uploaded Files", parent=self.screen3)
308 | checkbox2.setFixedSize(200, 25)
309 |
310 | left_section.setAlignment(Qt.AlignmentFlag.AlignCenter)
311 | left_section.addWidget(upload_button)
312 | left_section.addWidget(organize_button)
313 | left_section.addWidget(checkbox2)
314 |
315 | # Uploaded files label
316 | self.uploaded_num_files = QLabel(f"Your Uploaded Files: {self.num_files}", parent=self.screen3)
317 | self.uploaded_num_files.setFixedSize(400, 30)
318 |
319 | # Box for displaying uploaded files
320 | self.uploaded_files_tab = QTextEdit(parent=self.screen3)
321 | self.uploaded_files_tab.setReadOnly(True)
322 | self.uploaded_files_tab.setFixedWidth(450)
323 | self.uploaded_files_tab.setFixedHeight(450)
324 | right_section.addSpacing(50)
325 |
326 | # Loads the already uploaded files
327 | self.refresh_files()
328 |
329 | # Refresh all the uploaded files button
330 | refresh_button = QPushButton(" REFRESH", parent=self.screen3)
331 | refresh_button.setIcon(QIcon(os.path.join(static_path, "icons/refresh.png")))
332 | refresh_button.setIconSize(QtCore.QSize(30, 30))
333 | refresh_button.setFixedSize(222, 35)
334 |
335 | # Clear all the uploaded files button
336 | clear_button = QPushButton(" CLEAR", parent=self.screen3)
337 | clear_button.setIcon(QIcon(os.path.join(static_path, "icons/clear.png")))
338 | clear_button.setIconSize(QtCore.QSize(30, 30))
339 | clear_button.setFixedSize(222, 35)
340 |
341 | util_layout.addWidget(refresh_button)
342 | util_layout.addWidget(clear_button)
343 |
344 | right_section.addWidget(self.uploaded_num_files)
345 | right_section.addWidget(self.uploaded_files_tab)
346 | right_section.addLayout(util_layout)
347 |
348 | checkbox2.stateChanged.connect(self.copy_files_checkbox_state)
349 | upload_button.clicked.connect(self.upload_files)
350 | organize_button.clicked.connect(self.organize_uploaded_files)
351 | clear_button.clicked.connect(self.clear_files)
352 | refresh_button.clicked.connect(self.refresh_files)
353 |
354 | layout.addLayout(left_section)
355 | layout.addLayout(right_section)
356 |
357 | # Return to previous screen button
358 | self.return_button(self.screen1, self.screen3)
359 |
360 | # Organized Folder Summary Screen (if user selects a folder)
361 | def create_screen4(self):
362 | layout = QVBoxLayout()
363 | self.screen4.setLayout(layout)
364 |
365 | self.output_title = QLabel("Folder Successfully organized:")
366 | self.output_title.setFixedSize(400, 30)
367 |
368 | # Organization summary text box
369 | self.output_text = QTextEdit()
370 | self.output_text.setReadOnly(True)
371 |
372 | layout.addWidget(self.output_title)
373 | layout.addWidget(self.output_text)
374 |
375 | # Return to previous screen button
376 | self.return_button(self.screen2, self.screen4)
377 |
378 | # Organized Folder Summary Screen (if user uploads files)
379 | def create_screen5(self):
380 | layout = QVBoxLayout()
381 | self.screen5.setLayout(layout)
382 |
383 | self.output_title = QLabel("Organized Folder Structure:")
384 | self.output_title.setFixedSize(400, 30)
385 |
386 | # Organization summary text box
387 | self.output_text2 = QTextEdit()
388 | self.output_text2.setReadOnly(True)
389 |
390 | # Send organized files (if uploaded) to computer
391 | send_to_comp_button = QPushButton("SEND TO COMPUTER", parent=self.screen5)
392 | send_to_comp_button.setFixedSize(250, 50)
393 | send_to_comp_button.clicked.connect(self.send_to_computer)
394 |
395 | layout.addWidget(self.output_title)
396 | layout.addWidget(self.output_text2)
397 | layout.addWidget(send_to_comp_button)
398 |
399 | # Return to previous screen button
400 | self.return_button(self.screen3, self.screen5)
401 |
402 | # Allows user to choose a folder instead of pasting its path
403 | def select_folder(self):
404 | selected_folder = QFileDialog.getExistingDirectory(self, 'Select Folder')
405 | if selected_folder.strip():
406 | self.folder_path_input.setText(selected_folder)
407 | self.select_folder_tab.setHtml(make_tree(path=selected_folder, dict=selected_folder, is_path_only=True, cli=False))
408 |
409 | # Organizes the selected folder
410 | def organize_selected_folder(self):
411 | # Initializing the file names and content, and grouping them into a dictionary
412 | folder_path = os.path.relpath(self.folder_path_input.text(), os.getcwd())
413 | prep_files(folder_path, select_folder=True)
414 | self.file_list, self.misc_list = get_file_word_list(folder_path, self.reading_word_limit, self.stop_words)
415 | folder_dict, self.misc_list = sim_organize(self.model, self.similarity_threshold/100, self.file_list, self.misc_list)
416 |
417 | # Fitting the model based on the data provided
418 | data_vectorized = self.vectorizer.fit_transform(words[1] for words in self.file_list)
419 | self.lda_model.fit(data_vectorized)
420 |
421 | # Final organization process
422 | renamed_dict = rename_folders(self.vectorizer, self.lda_model, folder_dict,
423 | self.file_list, self.folder_name_length, self.misc_list)
424 | organize(folder_path, renamed_dict, self.reading_word_limit, self.folder_name_length,
425 | self.vectorizer, self.lda_model, self.model, self.stop_words)
426 | self.output_text.setHtml(make_tree(path=folder_path, dict=folder_path, is_path_only=True, cli=False))
427 |
428 | # Switch to summary screen
429 | self.show_screen4()
430 |
431 | # Handles uploading the files
432 | def upload_files(self):
433 | # Initializing the file names and content, and grouping them into a dictionary
434 | self.directories, _ = QFileDialog.getOpenFileNames(self, "Select Files", "", "All Files (*)")
435 | prep_files(self.directories, select_folder=False, copy_files=self.copy_files)
436 | self.refresh_files()
437 |
438 | # Organizes the uploaded files
439 | def organize_uploaded_files(self):
440 | # Initializing the file names and content, and grouping them into a dictionary
441 | self.file_list, self.misc_list = get_file_word_list(self.tmp_folder, self.reading_word_limit, self.stop_words)
442 | folder_dict, self.misc_list = sim_organize(self.model, self.similarity_threshold/100, self.file_list, self.misc_list)
443 |
444 | # Fitting the model based on the data provided
445 | data_vectorized = self.vectorizer.fit_transform(words[1] for words in self.file_list)
446 | self.lda_model.fit(data_vectorized)
447 |
448 | # Final organization process
449 | renamed_dict = rename_folders(self.vectorizer, self.lda_model, folder_dict,
450 | self.file_list, self.folder_name_length, self.misc_list)
451 | organize(self.tmp_folder, renamed_dict, self.reading_word_limit, self.folder_name_length,
452 | self.vectorizer, self.lda_model, self.model, self.stop_words)
453 | self.output_text2.setHtml(make_tree(path=self.tmp_folder, dict=self.tmp_folder, is_path_only=True, cli=False))
454 |
455 | # Switch to summary screen
456 | self.show_screen5()
457 |
458 | # Allows the user to send the organized files (from uploads) to computer
459 | def send_to_computer(self):
460 | send_folder = QFileDialog.getExistingDirectory(self, 'Select Folder')
461 | root_folder = self.tmp_folder
462 |
463 | if send_folder.strip():
464 | for file in os.listdir(root_folder):
465 | shutil.move(os.path.join(root_folder, file), os.path.join(send_folder, file))
466 | shutil.rmtree(root_folder)
467 |
468 | # Success message pop-up
469 | pop_up = self.create_pop_up(title="Sucess", content=f"The uploaded files have been organized and sent to the specified folder:\n{send_folder}", icon=QMessageBox.Icon.Information, options=False)
470 | if pop_up == QMessageBox.StandardButton.Yes:
471 | self.reset_params()
472 | self.show_screen1()
473 | else:
474 | # Error message pop-up
475 | pop_up = self.create_pop_up(title="Confirmation", content="If you do not wish to send these files to your computer? then please click 'Yes' to delete them or 'No' to cancel.\n\n The previously uploaded files and the temporary organized folder will be deleted from uploads if you continue with 'Yes' (this does not delete the actual files if you chose to copy instead of move)", icon=QMessageBox.Icon.Question)
476 | if pop_up == QMessageBox.StandardButton.Yes:
477 | # Deletes previously organized folder (if ignored by the user)
478 | shutil.rmtree(root_folder)
479 | self.reset_params()
480 | self.show_screen1()
481 |
482 | def create_pop_up(self, title, content, icon, options=True):
483 | pop_up = QMessageBox()
484 | pop_up.setWindowTitle(title)
485 | pop_up.setText(content)
486 | pop_up.setIcon(icon)
487 | pop_up.setStandardButtons(QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No if options else QMessageBox.StandardButton.Yes)
488 |
489 | return pop_up.exec()
490 |
491 | def clear_files(self):
492 | shutil.rmtree(self.tmp_folder)
493 | self.reset_params()
494 |
495 | # Refreshes the uploaded files display on screen 3
496 | def refresh_files(self):
497 | self.reset_params()
498 | if os.path.exists(self.tmp_folder):
499 | self.num_files = 0
500 | for file in os.listdir(self.tmp_folder):
501 | self.num_files += 1
502 | self.uploaded_files_tab.append(f"{os.path.basename(file)}
")
503 | self.uploaded_num_files.setText(f"Your Uploaded Files: {self.num_files}")
504 |
505 | # Resets the uploaded files label and text box appropriately
506 | def reset_params(self):
507 | self.uploaded_files_tab.setText("")
508 | self.directories = []
509 | self.file_list = []
510 | self.misc_list = []
511 | self.num_files = 0
512 | self.uploaded_num_files.setText(f"Your Uploaded Files: {self.num_files}")
513 |
514 | # Show/Switch to respective screens
515 | def show_screen1(self):
516 | self.current_screen = 0
517 | self.stacked_widget.setCurrentIndex(self.current_screen)
518 |
519 | def show_screen2(self):
520 | self.current_screen = 1
521 | self.stacked_widget.setCurrentIndex(self.current_screen)
522 |
523 | def show_screen3(self):
524 | self.current_screen = 2
525 | self.stacked_widget.setCurrentIndex(self.current_screen)
526 |
527 | def show_screen4(self):
528 | self.current_screen = 3
529 | self.stacked_widget.setCurrentIndex(self.current_screen)
530 |
531 | def show_screen5(self):
532 | self.current_screen = 4
533 | self.stacked_widget.setCurrentIndex(self.current_screen)
534 |
535 | # Updates slider values in main menu
536 | def slider1_changed(self, value):
537 | self.folder_name_length = value
538 | self.slider1_label.setText(f"Max Folder Name Length: {value} words")
539 |
540 | def slider2_changed(self, value):
541 | self.reading_word_limit = value
542 | self.slider2_label.setText(f"Word Limit For Reading File: {value} words")
543 |
544 | def slider3_changed(self, value):
545 | self.similarity_threshold = value
546 | self.slider3_label.setText(f"Similarity Threshold Percent: {value} %")
547 |
--------------------------------------------------------------------------------
/gui/views/settings.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from PyQt6.QtWidgets import (
4 | QPushButton, QVBoxLayout, QLabel, QSlider, QDialog
5 | )
6 | from PyQt6.QtGui import QFont
7 | from PyQt6.QtCore import Qt
8 | from PyQt6 import QtCore
9 |
10 | from connor import data_path
11 |
12 |
13 | class Settings(QDialog):
14 | def __init__(self, settings, parent):
15 | super().__init__(parent)
16 | self.setWindowTitle("Settings")
17 | self.setFixedSize(350, 350)
18 | self.setModal(True)
19 | self.settings = settings
20 |
21 | # Layouts
22 | layout = QVBoxLayout()
23 | title_layout = QVBoxLayout()
24 | btn_layout = QVBoxLayout()
25 |
26 | # Fonts
27 | md_font = QFont()
28 | md_font.setPointSize(14)
29 | sm_font = QFont()
30 | sm_font.setPointSize(12)
31 |
32 | title = QLabel("Change Default Values")
33 | title.setFont(md_font)
34 | title_layout.addWidget(title)
35 | layout.addLayout(title_layout)
36 |
37 | layout.addSpacing(20)
38 |
39 | # Initializing sliders and buttons
40 | self.setting_label1 = QLabel(f"Folder Name Length: {settings['Parameters']['folder_name_length']}")
41 | self.setting_label1.setFont(sm_font)
42 | self.setting_input1 = QSlider(Qt.Orientation.Horizontal)
43 | self.setting_input1.setRange(2, 5)
44 | self.setting_input1.setValue(int(settings["Parameters"]["folder_name_length"]))
45 | self.setting_input1.valueChanged.connect(self.setting_input1_changed)
46 | layout.addWidget(self.setting_label1)
47 | layout.addWidget(self.setting_input1)
48 |
49 | self.setting_label2 = QLabel(f"Reading Word Limit: {settings['Parameters']['reading_word_limit']}")
50 | self.setting_label2.setFont(sm_font)
51 | self.setting_input2 = QSlider(Qt.Orientation.Horizontal)
52 | self.setting_input2.setRange(100, 1000)
53 | self.setting_input2.setValue(int(settings["Parameters"]["reading_word_limit"]))
54 | self.setting_input2.valueChanged.connect(self.setting_input2_changed)
55 | layout.addWidget(self.setting_label2)
56 | layout.addWidget(self.setting_input2)
57 |
58 | self.setting_label3 = QLabel(f"Similarity Threshold: {settings['Parameters']['similarity_threshold']}")
59 | self.setting_label3.setFont(sm_font)
60 | self.setting_input3 = QSlider(Qt.Orientation.Horizontal)
61 | self.setting_input3.setRange(0, 100)
62 | self.setting_input3.setValue(int(settings["Parameters"]["similarity_threshold"]))
63 | self.setting_input3.valueChanged.connect(self.setting_input3_changed)
64 | layout.addWidget(self.setting_label3)
65 | layout.addWidget(self.setting_input3)
66 |
67 | layout.addSpacing(20)
68 |
69 | update_button = QPushButton("Update")
70 | update_button.setFixedSize(80, 35)
71 | update_button.setFont(md_font)
72 | update_button.clicked.connect(self.save_settings)
73 | btn_layout.setAlignment(QtCore.Qt.AlignmentFlag.AlignHCenter)
74 | btn_layout.addWidget(update_button)
75 | layout.addLayout(btn_layout)
76 |
77 | self.state_label = QLabel("", self)
78 | layout.addWidget(self.state_label)
79 | self.setLayout(layout)
80 |
81 | # Saves new default settings in config file
82 | def save_settings(self):
83 | self.settings["Parameters"]["folder_name_length"] = str(self.setting_input1.value())
84 | self.settings["Parameters"]["reading_word_limit"] = str(self.setting_input2.value())
85 | self.settings["Parameters"]["similarity_threshold"] = str(self.setting_input3.value())
86 |
87 | with open(os.path.join(data_path, "config.ini"), "w") as file:
88 | self.settings.write(file)
89 |
90 | self.state_label.setText("Default settings have been updated successfully")
91 |
92 | # Updates slider values in main menu
93 | def setting_input1_changed(self, value):
94 | self.setting_label1.setText(f"Folder Name Length: {value}")
95 |
96 | def setting_input2_changed(self, value):
97 | self.setting_label2.setText(f"Reading Word Limit: {value}")
98 |
99 | def setting_input3_changed(self, value):
100 | self.setting_label3.setText(f"Similarity Threshold: {value}")
101 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | docx==0.2.4
2 | nltk==3.9.1
3 | numpy==2.1.3
4 | odfpy==1.4.1
5 | openpyxl==3.1.5
6 | PyPDF2==3.0.1
7 | PyQt6==6.7.1
8 | scikit_learn==1.5.2
9 | sentence_transformers==3.3.1
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from gui import main as gui_main
4 | from cli import main as cli_main
5 |
6 |
7 | def main():
8 | parser = argparse.ArgumentParser(prog='Connor', description='Connor: Fast and local NLP file organizer')
9 | parser.add_argument('--gui', action='store_true', help='Run the application in GUI mode.')
10 | subparsers = parser.add_subparsers(dest='command')
11 |
12 | # Subparser for updating settings
13 | settings_parser = subparsers.add_parser('settings', help='Update the settings for the organizer')
14 | settings_parser.add_argument( '-f', '--folder-word-limit', type=int,
15 | help='Specify the maximum number of words allowed in the created folder names')
16 | settings_parser.add_argument( '-r', '--reading-limit',type=int,
17 | help='Set a limit on the number of words to read from the file content')
18 | settings_parser.add_argument('-t', '--similarity-threshold', type=int,
19 | help='Change the similarity threshold for a custom threshold percentage for grouping similar files')
20 | settings_parser.add_argument('--show', action='store_true', help='Show current settings')
21 |
22 | # Subparser for running the organization
23 | run_parser = subparsers.add_parser('run', help='Run the folder organization process')
24 | run_parser.add_argument('path', type=str, help='Path to the folder to organize')
25 |
26 | args = parser.parse_args()
27 | cli_tool = cli_main()
28 |
29 | # GUI
30 | if args.gui:
31 | gui_main()
32 | else:
33 | # Organize
34 | if args.command == 'run':
35 | cli_tool.organize_folder(args.path)
36 | # Settings
37 | elif args.command == 'settings':
38 | if args.show:
39 | cli_tool.show_settings()
40 | elif (args.folder_word_limit or args.reading_limit or args.similarity_threshold):
41 | cli_tool.update_settings(
42 | folder_name_length=args.folder_word_limit,
43 | reading_word_limit=args.reading_limit,
44 | similarity_threshold=args.similarity_threshold
45 | )
46 | else:
47 | settings_parser.print_help()
48 | # Help
49 | else:
50 | parser.print_help()
51 |
52 | if __name__ == '__main__':
53 | main()
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | with open('README.md', 'r') as file:
4 | long_description = file.read()
5 |
6 | setup(
7 | name='connor_nlp',
8 | version='1.0.0',
9 | py_modules=['run'],
10 | packages=find_packages(include=['connor', 'cli', 'gui', 'connor.*', 'cli.*', 'gui.*'],
11 | exclude=['connor/data', 'connor/static', 'connor/static/icons',
12 | 'connor/fonts', 'connor/tmp']),
13 | include_package_data=True,
14 | package_data={
15 | 'connor': ['data/*', 'static/*', 'static/icons/*','fonts/*', 'tmp/*'],
16 | },
17 | install_requires=[
18 | "docx==0.2.4",
19 | "python_pptx==1.0.2",
20 | "python_docx==0.8.11",
21 | "nltk==3.9.1",
22 | "numpy==2.1.3",
23 | "odfpy==1.4.1",
24 | "openpyxl==3.1.5",
25 | "PyPDF2==3.0.1",
26 | "PyQt6==6.7.1",
27 | "scikit_learn==1.5.2",
28 | "sentence_transformers==3.3.1",
29 | ],
30 | entry_points={
31 | 'console_scripts': [
32 | 'connor=run:main',
33 | ],
34 | },
35 | author='Ycatsh',
36 | description='Fast and fully local NLP file organizer that organizes files based on their content.',
37 | long_description=long_description,
38 | long_description_content_type='text/markdown',
39 | url='https://github.com/ycatsh/connor',
40 | license='MIT',
41 | license_file='LICENSE',
42 | classifiers=[
43 | 'Programming Language :: Python :: 3',
44 | 'License :: OSI Approved :: MIT License',
45 | 'Operating System :: OS Independent',
46 | ],
47 | python_requires='>=3.10'
48 | )
--------------------------------------------------------------------------------